e768d30fb6
Backend pipeline:
- POST /api/templates/import (multipart, max 25 MB)
- extract.ts: DOCX→mammoth, PDF→pdf-parse, fallback to OCR via tesseract+poppler-utils
(pdftoppm renders pages to PNG, tesseract reads with rus+eng)
- deepseek.ts: chat completions client with strict JSON response_format
- recognize.ts: structured prompt that produces simplified DocBody (string text),
postprocessor wraps text in TipTap-compatible JSON, validates with zod schema
- prompt enforces placeholder substitution: {{customer.*}}, {{executor.*}},
{{contract.number}}, {{contract.date}}, {{today}}
- error codes: NO_OCR / NO_DEEPSEEK_KEY / UNSUPPORTED_MIME / INVALID_DOC_BODY
Dockerfile: apk add tesseract-ocr (+rus +eng data), poppler-utils, imagemagick
Frontend:
- Templates page: ⤴ Загрузить документ → file picker (.docx,.pdf,.png,.jpg)
- doc type selector (contract/invoice/act/upd)
- import-banner with spinner shows uploading→analyzing stages
- on success navigates to /templates/:id (TemplateEdit) for review
Reuses DEEPSEEK_API_KEY pattern from Hall-planer.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
96 lines
3.7 KiB
TypeScript
96 lines
3.7 KiB
TypeScript
import type { FastifyInstance } from 'fastify';
|
|
import { Prisma } from '@prisma/client';
|
|
import { prisma } from '../../db.js';
|
|
import { getOrganizationId } from '../../lib/org.js';
|
|
import { extractText } from './extract.js';
|
|
import { recognizeTemplate } from './recognize.js';
|
|
|
|
const MAX_BYTES = 20 * 1024 * 1024; // 20 MB
|
|
|
|
export async function templatesImportRoutes(app: FastifyInstance) {
|
|
app.post(
|
|
'/api/templates/import',
|
|
{ preHandler: app.requireDocPermission('user') },
|
|
async (req, reply) => {
|
|
const orgId = getOrganizationId(req);
|
|
const file = await req.file({ limits: { fileSize: MAX_BYTES } });
|
|
if (!file) {
|
|
reply.code(400).send({ error: 'no_file' });
|
|
return;
|
|
}
|
|
const buf = await file.toBuffer();
|
|
const filename = file.filename || 'template';
|
|
const mime = file.mimetype || '';
|
|
|
|
// Опц. имя/тип передаются полями формы; если нет — определит LLM
|
|
const fields = (file.fields ?? {}) as Record<string, { value?: string }>;
|
|
const userName = typeof fields.name?.value === 'string' ? fields.name.value : null;
|
|
const userDocType = typeof fields.docType?.value === 'string' ? fields.docType.value : null;
|
|
|
|
try {
|
|
// 1) Извлекаем текст
|
|
const extracted = await extractText(buf, mime, filename);
|
|
if (extracted.text.trim().length < 30) {
|
|
reply.code(422).send({
|
|
error: 'empty_extracted_text',
|
|
message: 'Не удалось извлечь читаемый текст. Возможно, скан плохого качества.',
|
|
source: extracted.source,
|
|
});
|
|
return;
|
|
}
|
|
|
|
// 2) Распознаём через DeepSeek
|
|
const recognized = await recognizeTemplate(extracted.text);
|
|
const docType = (userDocType as 'contract' | 'invoice' | 'act' | 'upd') ?? recognized.docType;
|
|
const name = userName?.trim() || recognized.title || `Импорт ${new Date().toLocaleDateString('ru-RU')}`;
|
|
|
|
const created = await prisma.documentTemplate.create({
|
|
data: {
|
|
organizationId: orgId,
|
|
docType,
|
|
name,
|
|
body: recognized.docBody as Prisma.InputJsonValue,
|
|
},
|
|
});
|
|
|
|
reply.code(201).send({
|
|
template: created,
|
|
extractedFrom: extracted.source,
|
|
textLength: extracted.text.length,
|
|
});
|
|
} catch (e) {
|
|
const err = e as { code?: string; message?: string; status?: number };
|
|
if (err.code === 'NO_DEEPSEEK_KEY') {
|
|
reply.code(503).send({ error: 'no_deepseek_key', message: err.message });
|
|
return;
|
|
}
|
|
if (err.code === 'NO_OCR') {
|
|
reply.code(503).send({ error: 'no_ocr', message: err.message });
|
|
return;
|
|
}
|
|
if (err.code === 'UNSUPPORTED_MIME') {
|
|
reply.code(415).send({ error: 'unsupported_mime', message: err.message });
|
|
return;
|
|
}
|
|
if (err.code === 'INVALID_DOC_BODY') {
|
|
reply.code(502).send({
|
|
error: 'invalid_doc_body',
|
|
message: 'LLM вернула структуру, не прошедшую валидацию. Попробуйте загрузить ещё раз или отредактировать вручную.',
|
|
});
|
|
return;
|
|
}
|
|
if (err.code === 'DEEPSEEK_HTTP_ERROR') {
|
|
reply.code(502).send({
|
|
error: 'deepseek_error',
|
|
message: err.message,
|
|
status: err.status ?? 502,
|
|
});
|
|
return;
|
|
}
|
|
app.log.error({ err: e }, 'template import failed');
|
|
reply.code(500).send({ error: 'import_failed', message: err.message ?? 'unknown' });
|
|
}
|
|
},
|
|
);
|
|
}
|