feat: import DOCX/PDF/scanned templates via DeepSeek recognition
Backend pipeline:
- POST /api/templates/import (multipart, max 25 MB)
- extract.ts: DOCX→mammoth, PDF→pdf-parse, fallback to OCR via tesseract+poppler-utils
(pdftoppm renders pages to PNG, tesseract reads with rus+eng)
- deepseek.ts: chat completions client with strict JSON response_format
- recognize.ts: structured prompt that produces simplified DocBody (string text),
postprocessor wraps text in TipTap-compatible JSON, validates with zod schema
- prompt enforces placeholder substitution: {{customer.*}}, {{executor.*}},
{{contract.number}}, {{contract.date}}, {{today}}
- error codes: NO_OCR / NO_DEEPSEEK_KEY / UNSUPPORTED_MIME / INVALID_DOC_BODY
Dockerfile: apk add tesseract-ocr (+rus +eng data), poppler-utils, imagemagick
Frontend:
- Templates page: ⤴ Загрузить документ → file picker (.docx,.pdf,.png,.jpg)
- doc type selector (contract/invoice/act/upd)
- import-banner with spinner shows uploading→analyzing stages
- on success navigates to /templates/:id (TemplateEdit) for review
Reuses DEEPSEEK_API_KEY pattern from Hall-planer.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,95 @@
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
import { Prisma } from '@prisma/client';
|
||||
import { prisma } from '../../db.js';
|
||||
import { getOrganizationId } from '../../lib/org.js';
|
||||
import { extractText } from './extract.js';
|
||||
import { recognizeTemplate } from './recognize.js';
|
||||
|
||||
const MAX_BYTES = 20 * 1024 * 1024; // 20 MB
|
||||
|
||||
export async function templatesImportRoutes(app: FastifyInstance) {
|
||||
app.post(
|
||||
'/api/templates/import',
|
||||
{ preHandler: app.requireDocPermission('user') },
|
||||
async (req, reply) => {
|
||||
const orgId = getOrganizationId(req);
|
||||
const file = await req.file({ limits: { fileSize: MAX_BYTES } });
|
||||
if (!file) {
|
||||
reply.code(400).send({ error: 'no_file' });
|
||||
return;
|
||||
}
|
||||
const buf = await file.toBuffer();
|
||||
const filename = file.filename || 'template';
|
||||
const mime = file.mimetype || '';
|
||||
|
||||
// Опц. имя/тип передаются полями формы; если нет — определит LLM
|
||||
const fields = (file.fields ?? {}) as Record<string, { value?: string }>;
|
||||
const userName = typeof fields.name?.value === 'string' ? fields.name.value : null;
|
||||
const userDocType = typeof fields.docType?.value === 'string' ? fields.docType.value : null;
|
||||
|
||||
try {
|
||||
// 1) Извлекаем текст
|
||||
const extracted = await extractText(buf, mime, filename);
|
||||
if (extracted.text.trim().length < 30) {
|
||||
reply.code(422).send({
|
||||
error: 'empty_extracted_text',
|
||||
message: 'Не удалось извлечь читаемый текст. Возможно, скан плохого качества.',
|
||||
source: extracted.source,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// 2) Распознаём через DeepSeek
|
||||
const recognized = await recognizeTemplate(extracted.text);
|
||||
const docType = (userDocType as 'contract' | 'invoice' | 'act' | 'upd') ?? recognized.docType;
|
||||
const name = userName?.trim() || recognized.title || `Импорт ${new Date().toLocaleDateString('ru-RU')}`;
|
||||
|
||||
const created = await prisma.documentTemplate.create({
|
||||
data: {
|
||||
organizationId: orgId,
|
||||
docType,
|
||||
name,
|
||||
body: recognized.docBody as Prisma.InputJsonValue,
|
||||
},
|
||||
});
|
||||
|
||||
reply.code(201).send({
|
||||
template: created,
|
||||
extractedFrom: extracted.source,
|
||||
textLength: extracted.text.length,
|
||||
});
|
||||
} catch (e) {
|
||||
const err = e as { code?: string; message?: string; status?: number };
|
||||
if (err.code === 'NO_DEEPSEEK_KEY') {
|
||||
reply.code(503).send({ error: 'no_deepseek_key', message: err.message });
|
||||
return;
|
||||
}
|
||||
if (err.code === 'NO_OCR') {
|
||||
reply.code(503).send({ error: 'no_ocr', message: err.message });
|
||||
return;
|
||||
}
|
||||
if (err.code === 'UNSUPPORTED_MIME') {
|
||||
reply.code(415).send({ error: 'unsupported_mime', message: err.message });
|
||||
return;
|
||||
}
|
||||
if (err.code === 'INVALID_DOC_BODY') {
|
||||
reply.code(502).send({
|
||||
error: 'invalid_doc_body',
|
||||
message: 'LLM вернула структуру, не прошедшую валидацию. Попробуйте загрузить ещё раз или отредактировать вручную.',
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (err.code === 'DEEPSEEK_HTTP_ERROR') {
|
||||
reply.code(502).send({
|
||||
error: 'deepseek_error',
|
||||
message: err.message,
|
||||
status: err.status ?? 502,
|
||||
});
|
||||
return;
|
||||
}
|
||||
app.log.error({ err: e }, 'template import failed');
|
||||
reply.code(500).send({ error: 'import_failed', message: err.message ?? 'unknown' });
|
||||
}
|
||||
},
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user