Files
doc-manager/apps/api/src/modules/templates/import.routes.ts
T
admin e768d30fb6 feat: import DOCX/PDF/scanned templates via DeepSeek recognition
Backend pipeline:
- POST /api/templates/import (multipart, max 25 MB)
- extract.ts: DOCX→mammoth, PDF→pdf-parse, fallback to OCR via tesseract+poppler-utils
  (pdftoppm renders pages to PNG, tesseract reads with rus+eng)
- deepseek.ts: chat completions client with strict JSON response_format
- recognize.ts: structured prompt that produces simplified DocBody (string text),
  postprocessor wraps text in TipTap-compatible JSON, validates with zod schema
- prompt enforces placeholder substitution: {{customer.*}}, {{executor.*}},
  {{contract.number}}, {{contract.date}}, {{today}}
- error codes: NO_OCR / NO_DEEPSEEK_KEY / UNSUPPORTED_MIME / INVALID_DOC_BODY

Dockerfile: apk add tesseract-ocr (+rus +eng data), poppler-utils, imagemagick

Frontend:
- Templates page: ⤴ Загрузить документ → file picker (.docx,.pdf,.png,.jpg)
- doc type selector (contract/invoice/act/upd)
- import-banner with spinner shows uploading→analyzing stages
- on success navigates to /templates/:id (TemplateEdit) for review

Reuses DEEPSEEK_API_KEY pattern from Hall-planer.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 11:40:28 +03:00

96 lines
3.7 KiB
TypeScript

import type { FastifyInstance } from 'fastify';
import { Prisma } from '@prisma/client';
import { prisma } from '../../db.js';
import { getOrganizationId } from '../../lib/org.js';
import { extractText } from './extract.js';
import { recognizeTemplate } from './recognize.js';
const MAX_BYTES = 20 * 1024 * 1024; // 20 MB
export async function templatesImportRoutes(app: FastifyInstance) {
app.post(
'/api/templates/import',
{ preHandler: app.requireDocPermission('user') },
async (req, reply) => {
const orgId = getOrganizationId(req);
const file = await req.file({ limits: { fileSize: MAX_BYTES } });
if (!file) {
reply.code(400).send({ error: 'no_file' });
return;
}
const buf = await file.toBuffer();
const filename = file.filename || 'template';
const mime = file.mimetype || '';
// Опц. имя/тип передаются полями формы; если нет — определит LLM
const fields = (file.fields ?? {}) as Record<string, { value?: string }>;
const userName = typeof fields.name?.value === 'string' ? fields.name.value : null;
const userDocType = typeof fields.docType?.value === 'string' ? fields.docType.value : null;
try {
// 1) Извлекаем текст
const extracted = await extractText(buf, mime, filename);
if (extracted.text.trim().length < 30) {
reply.code(422).send({
error: 'empty_extracted_text',
message: 'Не удалось извлечь читаемый текст. Возможно, скан плохого качества.',
source: extracted.source,
});
return;
}
// 2) Распознаём через DeepSeek
const recognized = await recognizeTemplate(extracted.text);
const docType = (userDocType as 'contract' | 'invoice' | 'act' | 'upd') ?? recognized.docType;
const name = userName?.trim() || recognized.title || `Импорт ${new Date().toLocaleDateString('ru-RU')}`;
const created = await prisma.documentTemplate.create({
data: {
organizationId: orgId,
docType,
name,
body: recognized.docBody as Prisma.InputJsonValue,
},
});
reply.code(201).send({
template: created,
extractedFrom: extracted.source,
textLength: extracted.text.length,
});
} catch (e) {
const err = e as { code?: string; message?: string; status?: number };
if (err.code === 'NO_DEEPSEEK_KEY') {
reply.code(503).send({ error: 'no_deepseek_key', message: err.message });
return;
}
if (err.code === 'NO_OCR') {
reply.code(503).send({ error: 'no_ocr', message: err.message });
return;
}
if (err.code === 'UNSUPPORTED_MIME') {
reply.code(415).send({ error: 'unsupported_mime', message: err.message });
return;
}
if (err.code === 'INVALID_DOC_BODY') {
reply.code(502).send({
error: 'invalid_doc_body',
message: 'LLM вернула структуру, не прошедшую валидацию. Попробуйте загрузить ещё раз или отредактировать вручную.',
});
return;
}
if (err.code === 'DEEPSEEK_HTTP_ERROR') {
reply.code(502).send({
error: 'deepseek_error',
message: err.message,
status: err.status ?? 502,
});
return;
}
app.log.error({ err: e }, 'template import failed');
reply.code(500).send({ error: 'import_failed', message: err.message ?? 'unknown' });
}
},
);
}