feat: import DOCX/PDF/scanned templates via DeepSeek recognition

Backend pipeline:
- POST /api/templates/import (multipart, max 25 MB)
- extract.ts: DOCX→mammoth, PDF→pdf-parse, fallback to OCR via tesseract+poppler-utils
  (pdftoppm renders pages to PNG, tesseract reads with rus+eng)
- deepseek.ts: chat completions client with strict JSON response_format
- recognize.ts: structured prompt that produces simplified DocBody (string text),
  postprocessor wraps text in TipTap-compatible JSON, validates with zod schema
- prompt enforces placeholder substitution: {{customer.*}}, {{executor.*}},
  {{contract.number}}, {{contract.date}}, {{today}}
- error codes: NO_OCR / NO_DEEPSEEK_KEY / UNSUPPORTED_MIME / INVALID_DOC_BODY

Dockerfile: apk add tesseract-ocr (+rus +eng data), poppler-utils, imagemagick

Frontend:
- Templates page: ⤴ Загрузить документ → file picker (.docx,.pdf,.png,.jpg)
- doc type selector (contract/invoice/act/upd)
- import-banner with spinner shows uploading→analyzing stages
- on success navigates to /templates/:id (TemplateEdit) for review

Reuses DEEPSEEK_API_KEY pattern from Hall-planer.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
admin
2026-05-01 11:40:28 +03:00
parent 624d378bb5
commit e768d30fb6
13 changed files with 1114 additions and 7 deletions
+98 -5
View File
@@ -1,8 +1,9 @@
import { useEffect, useState } from 'react';
import { useEffect, useRef, useState } from 'react';
import { Link, useNavigate } from 'react-router-dom';
import { api, type DocBody, type DocType, type DocumentTemplate } from '../api.js';
import { api, ApiError, type DocBody, type DocType, type DocumentTemplate } from '../api.js';
import { Button, EmptyState, Field, Modal, Select } from '../components/ui.js';
import { emptyRich } from '../lib/richtext.js';
import { redirectToLogin } from '../auth.js';
const DOC_TYPE_LABEL: Record<DocType, string> = {
contract: 'Договор', invoice: 'Счёт', act: 'Акт', upd: 'УПД',
@@ -22,10 +23,18 @@ function emptyBody(): DocBody {
};
}
type ImportState =
| { stage: 'idle' }
| { stage: 'uploading' | 'analyzing'; filename: string }
| { stage: 'error'; message: string };
export function TemplatesPage() {
const [items, setItems] = useState<DocumentTemplate[] | null>(null);
const [creating, setCreating] = useState<{ name: string; docType: DocType } | null>(null);
const [importState, setImportState] = useState<ImportState>({ stage: 'idle' });
const [importDocType, setImportDocType] = useState<DocType>('contract');
const [error, setError] = useState<string | null>(null);
const fileInputRef = useRef<HTMLInputElement>(null);
const navigate = useNavigate();
async function load() {
@@ -76,15 +85,99 @@ export function TemplatesPage() {
}
}
async function importFile(file: File) {
setImportState({ stage: 'uploading', filename: file.name });
const fd = new FormData();
fd.append('docType', importDocType);
fd.append('file', file);
try {
const res = await fetch('/api/templates/import', {
method: 'POST',
credentials: 'include',
body: fd,
});
if (res.status === 401) redirectToLogin();
setImportState({ stage: 'analyzing', filename: file.name });
const text = await res.text();
const data = text ? JSON.parse(text) : {};
if (!res.ok) {
const msg =
data.error === 'no_deepseek_key'
? 'DeepSeek API ключ не настроен на сервере.'
: data.error === 'no_ocr'
? 'OCR (tesseract) недоступен на сервере. Загрузка сканов невозможна.'
: data.error === 'unsupported_mime'
? 'Неподдерживаемый формат файла. DOCX, PDF, PNG, JPG.'
: data.error === 'empty_extracted_text'
? 'Не удалось извлечь текст (плохое качество скана).'
: data.error === 'invalid_doc_body'
? 'LLM вернула невалидную структуру. Попробуйте ещё раз.'
: data.message || `${data.error} (HTTP ${res.status})`;
setImportState({ stage: 'error', message: msg });
return;
}
const tpl: DocumentTemplate = data.template;
setImportState({ stage: 'idle' });
navigate(`/templates/${tpl.id}`);
} catch (e) {
setImportState({ stage: 'error', message: String(e) });
}
}
function onPickFile(e: React.ChangeEvent<HTMLInputElement>) {
const f = e.target.files?.[0];
if (f) void importFile(f);
e.target.value = '';
}
return (
<main className="content">
<header className="page-head">
<h2>Шаблоны</h2>
<Button variant="primary" onClick={() => setCreating({ name: '', docType: 'contract' })}>
+ Новый шаблон
</Button>
<div style={{ display: 'flex', gap: 8, alignItems: 'center' }}>
<Select
label=""
value={importDocType}
onChange={(v) => setImportDocType(v as DocType)}
options={[
{ value: 'contract', label: 'Договор' },
{ value: 'invoice', label: 'Счёт' },
{ value: 'act', label: 'Акт' },
{ value: 'upd', label: 'УПД' },
]}
/>
<Button onClick={() => fileInputRef.current?.click()} disabled={importState.stage !== 'idle' && importState.stage !== 'error'}>
Загрузить документ
</Button>
<input
ref={fileInputRef}
type="file"
accept=".docx,.pdf,.png,.jpg,.jpeg,.tif,.tiff,application/pdf,image/*"
style={{ display: 'none' }}
onChange={onPickFile}
/>
<Button variant="primary" onClick={() => setCreating({ name: '', docType: 'contract' })}>
+ Новый шаблон
</Button>
</div>
</header>
{importState.stage === 'uploading' || importState.stage === 'analyzing' ? (
<div className="import-banner">
<div className="spinner" />
<span>
{importState.stage === 'uploading' ? 'Загружаю' : 'Распознаю'} «{importState.filename}»
{importState.stage === 'analyzing' ? ' DeepSeek анализирует структуру (10–60 сек).' : ''}
</span>
</div>
) : null}
{importState.stage === 'error' ? (
<div className="error-text">
Импорт не удался: {importState.message}
<Button variant="ghost" onClick={() => setImportState({ stage: 'idle' })}>×</Button>
</div>
) : null}
{error ? <div className="error-text">{error}</div> : null}
{items === null ? (
+17
View File
@@ -275,6 +275,23 @@ body {
.tabs { border-bottom-color: #2a2e35; }
}
/* === import banner === */
.import-banner {
display: flex; align-items: center; gap: 12px;
padding: 12px 16px; margin: 12px 0;
background: #eff6ff; border: 1px solid #bfdbfe; color: #1e40af;
border-radius: 8px;
}
.spinner {
width: 16px; height: 16px; border-radius: 50%;
border: 2px solid #bfdbfe; border-top-color: #2563eb;
animation: spin 0.8s linear infinite;
}
@keyframes spin { to { transform: rotate(360deg); } }
@media (prefers-color-scheme: dark) {
.import-banner { background: #14213d; border-color: #1e3a8a; color: #93c5fd; }
}
/* === inn lookup === */
.inn-lookup { margin-top: 6px; display: flex; flex-direction: column; gap: 4px; }
.inn-lookup__error { font-size: 12px; color: #c0392b; }