From e768d30fb66baabdc4990a706b9932964a46db18 Mon Sep 17 00:00:00 2001 From: admin Date: Fri, 1 May 2026 11:40:28 +0300 Subject: [PATCH] feat: import DOCX/PDF/scanned templates via DeepSeek recognition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backend pipeline: - POST /api/templates/import (multipart, max 25 MB) - extract.ts: DOCX→mammoth, PDF→pdf-parse, fallback to OCR via tesseract+poppler-utils (pdftoppm renders pages to PNG, tesseract reads with rus+eng) - deepseek.ts: chat completions client with strict JSON response_format - recognize.ts: structured prompt that produces simplified DocBody (string text), postprocessor wraps text in TipTap-compatible JSON, validates with zod schema - prompt enforces placeholder substitution: {{customer.*}}, {{executor.*}}, {{contract.number}}, {{contract.date}}, {{today}} - error codes: NO_OCR / NO_DEEPSEEK_KEY / UNSUPPORTED_MIME / INVALID_DOC_BODY Dockerfile: apk add tesseract-ocr (+rus +eng data), poppler-utils, imagemagick Frontend: - Templates page: ⤴ Загрузить документ → file picker (.docx,.pdf,.png,.jpg) - doc type selector (contract/invoice/act/upd) - import-banner with spinner shows uploading→analyzing stages - on success navigates to /templates/:id (TemplateEdit) for review Reuses DEEPSEEK_API_KEY pattern from Hall-planer. Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/api/.env.example | 6 + apps/api/package.json | 5 + apps/api/src/env.ts | 5 + apps/api/src/modules/templates/deepseek.ts | 48 ++ apps/api/src/modules/templates/extract.ts | 114 ++++ .../src/modules/templates/import.routes.ts | 95 +++ apps/api/src/modules/templates/recognize.ts | 156 +++++ apps/api/src/server.ts | 4 + apps/web/src/pages/Templates.tsx | 103 +++- apps/web/src/styles.css | 17 + docker/Dockerfile.api | 7 +- docker/docker-compose.yml | 3 + package-lock.json | 558 +++++++++++++++++- 13 files changed, 1114 insertions(+), 7 deletions(-) create mode 100644 apps/api/src/modules/templates/deepseek.ts create mode 100644 apps/api/src/modules/templates/extract.ts create mode 100644 apps/api/src/modules/templates/import.routes.ts create mode 100644 apps/api/src/modules/templates/recognize.ts diff --git a/apps/api/.env.example b/apps/api/.env.example index a388b72..42763a0 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -35,6 +35,12 @@ DEFAULT_ORGANIZATION_ID=00000000-0000-0000-0000-000000000001 # Бесплатно до 10000 запросов/сутки. DADATA_API_KEY= +# DeepSeek — разбор загруженных шаблонов (DOCX/PDF/скан) в DocBody-структуру. +# Получить ключ: https://platform.deepseek.com/api_keys +DEEPSEEK_API_KEY= +# DEEPSEEK_BASE_URL=https://api.deepseek.com +# DEEPSEEK_MODEL=deepseek-chat + # --- Dev-only --- # Если 1 — пропускает проверку JWT и подсовывает фейкового admin'а. # В production отказывается стартовать с этой переменной. diff --git a/apps/api/package.json b/apps/api/package.json index 2dbeee8..44922b6 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -20,15 +20,20 @@ "@fastify/cookie": "^9.4.0", "@fastify/cors": "^9.0.1", "@fastify/helmet": "^11.1.1", + "@fastify/multipart": "^10.0.0", "@prisma/client": "^5.22.0", "fastify": "^4.28.1", "fastify-plugin": "^4.5.1", "jose": "^5.9.6", + "mammoth": "^1.12.0", + "node-tesseract-ocr": "^2.2.1", + "pdf-parse": "^2.4.5", "puppeteer-core": "^24.42.0", "zod": "^3.23.8" }, "devDependencies": { "@types/node": "^20.16.0", + "@types/pdf-parse": "^1.1.5", "embedded-postgres": "^18.3.0-beta.17", "pino-pretty": "^11.3.0", "prisma": "^5.22.0", diff --git a/apps/api/src/env.ts b/apps/api/src/env.ts index 58246dd..5ee2600 100644 --- a/apps/api/src/env.ts +++ b/apps/api/src/env.ts @@ -25,6 +25,11 @@ const EnvSchema = z.object({ // Получить: https://dadata.ru/api/find-party/ (нужен «API-ключ для приложения», не secret) DADATA_API_KEY: z.string().optional(), + // DeepSeek (https://api.deepseek.com) — для разбора документов в DocBody. + DEEPSEEK_API_KEY: z.string().optional(), + DEEPSEEK_BASE_URL: z.string().url().default('https://api.deepseek.com'), + DEEPSEEK_MODEL: z.string().default('deepseek-chat'), + DEFAULT_ORGANIZATION_ID: z.string().uuid().default('00000000-0000-0000-0000-000000000001'), // Только для локальной разработки. В проде — НИКОГДА. Hard-check ниже. diff --git a/apps/api/src/modules/templates/deepseek.ts b/apps/api/src/modules/templates/deepseek.ts new file mode 100644 index 0000000..88b833f --- /dev/null +++ b/apps/api/src/modules/templates/deepseek.ts @@ -0,0 +1,48 @@ +import { env } from '../../env.js'; + +type ChatMessage = { role: 'system' | 'user' | 'assistant'; content: string }; + +type DeepSeekResponse = { + choices: { message: { content: string } }[]; +}; + +export async function deepseekJsonChat(messages: ChatMessage[], opts: { temperature?: number; maxTokens?: number } = {}): Promise { + if (!env.DEEPSEEK_API_KEY) { + throw Object.assign(new Error('DeepSeek API ключ не настроен (DEEPSEEK_API_KEY).'), { + code: 'NO_DEEPSEEK_KEY', + }); + } + const res = await fetch(`${env.DEEPSEEK_BASE_URL}/chat/completions`, { + method: 'POST', + headers: { + Authorization: `Bearer ${env.DEEPSEEK_API_KEY}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + model: env.DEEPSEEK_MODEL, + messages, + response_format: { type: 'json_object' }, + temperature: opts.temperature ?? 0.2, + max_tokens: opts.maxTokens ?? 8000, + }), + signal: AbortSignal.timeout(120_000), + }); + if (!res.ok) { + const body = await res.text().catch(() => ''); + throw Object.assign(new Error(`DeepSeek ${res.status}: ${body.slice(0, 300)}`), { + code: 'DEEPSEEK_HTTP_ERROR', + status: res.status, + }); + } + const data = (await res.json()) as DeepSeekResponse; + const content = data.choices?.[0]?.message?.content; + if (!content) throw new Error('DeepSeek: пустой ответ'); + try { + return JSON.parse(content) as T; + } catch { + throw Object.assign(new Error('DeepSeek вернул не валидный JSON'), { + code: 'DEEPSEEK_BAD_JSON', + raw: content, + }); + } +} diff --git a/apps/api/src/modules/templates/extract.ts b/apps/api/src/modules/templates/extract.ts new file mode 100644 index 0000000..0e9f616 --- /dev/null +++ b/apps/api/src/modules/templates/extract.ts @@ -0,0 +1,114 @@ +import { execFile } from 'node:child_process'; +import { mkdtemp, readFile, readdir, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { promisify } from 'node:util'; +import mammoth from 'mammoth'; +import * as pdfParseNs from 'pdf-parse'; + +// pdf-parse в ESM экспортирует default или сам namespace — нормализуем. +const pdfParse: (buf: Buffer) => Promise<{ text: string; numpages: number }> = + // @ts-expect-error namespace shape varies between cjs/esm + (pdfParseNs.default ?? pdfParseNs) as never; + +const execFileP = promisify(execFile); + +export type ExtractedSource = 'docx' | 'pdf-text' | 'pdf-ocr' | 'image-ocr'; +export type Extracted = { text: string; source: ExtractedSource; pageCount?: number }; + +const OCR_LANGS = 'rus+eng'; + +/** + * Извлекает читаемый текст из произвольного загруженного документа. + * - DOCX → mammoth (без OCR); + * - PDF с текстовым слоем → pdf-parse; + * - PDF-скан → pdftoppm в PNG → tesseract; + * - PNG/JPG → tesseract напрямую. + * + * Если в системе нет tesseract/poppler — для не-DOCX вернётся ошибка с code=NO_OCR. + */ +export async function extractText(buf: Buffer, mime: string, filename: string): Promise { + const lower = (mime + ' ' + filename).toLowerCase(); + if (lower.includes('wordprocessingml') || lower.endsWith('.docx')) { + const r = await mammoth.extractRawText({ buffer: buf }); + return { text: cleanup(r.value), source: 'docx' }; + } + if (mime.includes('pdf') || lower.endsWith('.pdf')) { + return extractFromPdf(buf); + } + if (mime.startsWith('image/') || /\.(png|jpe?g|tiff?|bmp)$/i.test(filename)) { + const text = await ocrImageBuffer(buf, '.png'); + return { text: cleanup(text), source: 'image-ocr' }; + } + throw Object.assign(new Error(`Неподдерживаемый формат: ${mime || filename}`), { + code: 'UNSUPPORTED_MIME', + }); +} + +async function extractFromPdf(buf: Buffer): Promise { + const parsed = await pdfParse(buf); + const text = cleanup(parsed.text); + // Эвристика: если текст-слой даёт меньше 60 символов на страницу — это сканированный PDF. + const charsPerPage = parsed.numpages > 0 ? text.length / parsed.numpages : text.length; + if (text.length > 200 && charsPerPage > 60) { + return { text, source: 'pdf-text', pageCount: parsed.numpages }; + } + // OCR pipeline + const ocr = await ocrPdfBuffer(buf); + return { text: cleanup(ocr), source: 'pdf-ocr', pageCount: parsed.numpages }; +} + +async function ocrImageBuffer(buf: Buffer, ext: string): Promise { + const dir = await mkdtemp(join(tmpdir(), 'docmgr-ocr-')); + try { + const inFile = join(dir, `in${ext}`); + await writeFile(inFile, buf); + const { stdout } = await execFileP('tesseract', [inFile, 'stdout', '-l', OCR_LANGS], { + maxBuffer: 32 * 1024 * 1024, + }); + return stdout; + } catch (e) { + if ((e as { code?: string }).code === 'ENOENT') { + throw Object.assign(new Error('Tesseract не установлен в окружении. PDF-сканы обработать не получится.'), { + code: 'NO_OCR', + }); + } + throw e; + } finally { + await rm(dir, { recursive: true, force: true }); + } +} + +async function ocrPdfBuffer(buf: Buffer): Promise { + const dir = await mkdtemp(join(tmpdir(), 'docmgr-pdf-')); + try { + const inFile = join(dir, 'in.pdf'); + await writeFile(inFile, buf); + // pdftoppm input.pdf out -png -r 200 → out-1.png, out-2.png ... + try { + await execFileP('pdftoppm', [inFile, join(dir, 'out'), '-png', '-r', '200']); + } catch (e) { + if ((e as { code?: string }).code === 'ENOENT') { + throw Object.assign(new Error('pdftoppm (poppler-utils) не установлен.'), { code: 'NO_OCR' }); + } + throw e; + } + const files = (await readdir(dir)).filter((f) => f.startsWith('out-') && f.endsWith('.png')).sort(); + const parts: string[] = []; + for (const f of files) { + const pageBuf = await readFile(join(dir, f)); + parts.push(await ocrImageBuffer(pageBuf, '.png')); + } + return parts.join('\n\n'); + } finally { + await rm(dir, { recursive: true, force: true }); + } +} + +function cleanup(s: string): string { + return s + .replace(/­/g, '') // soft hyphens + .replace(/[ \t]+\n/g, '\n') + .replace(/\n{3,}/g, '\n\n') + .trim(); +} diff --git a/apps/api/src/modules/templates/import.routes.ts b/apps/api/src/modules/templates/import.routes.ts new file mode 100644 index 0000000..adfb8f1 --- /dev/null +++ b/apps/api/src/modules/templates/import.routes.ts @@ -0,0 +1,95 @@ +import type { FastifyInstance } from 'fastify'; +import { Prisma } from '@prisma/client'; +import { prisma } from '../../db.js'; +import { getOrganizationId } from '../../lib/org.js'; +import { extractText } from './extract.js'; +import { recognizeTemplate } from './recognize.js'; + +const MAX_BYTES = 20 * 1024 * 1024; // 20 MB + +export async function templatesImportRoutes(app: FastifyInstance) { + app.post( + '/api/templates/import', + { preHandler: app.requireDocPermission('user') }, + async (req, reply) => { + const orgId = getOrganizationId(req); + const file = await req.file({ limits: { fileSize: MAX_BYTES } }); + if (!file) { + reply.code(400).send({ error: 'no_file' }); + return; + } + const buf = await file.toBuffer(); + const filename = file.filename || 'template'; + const mime = file.mimetype || ''; + + // Опц. имя/тип передаются полями формы; если нет — определит LLM + const fields = (file.fields ?? {}) as Record; + const userName = typeof fields.name?.value === 'string' ? fields.name.value : null; + const userDocType = typeof fields.docType?.value === 'string' ? fields.docType.value : null; + + try { + // 1) Извлекаем текст + const extracted = await extractText(buf, mime, filename); + if (extracted.text.trim().length < 30) { + reply.code(422).send({ + error: 'empty_extracted_text', + message: 'Не удалось извлечь читаемый текст. Возможно, скан плохого качества.', + source: extracted.source, + }); + return; + } + + // 2) Распознаём через DeepSeek + const recognized = await recognizeTemplate(extracted.text); + const docType = (userDocType as 'contract' | 'invoice' | 'act' | 'upd') ?? recognized.docType; + const name = userName?.trim() || recognized.title || `Импорт ${new Date().toLocaleDateString('ru-RU')}`; + + const created = await prisma.documentTemplate.create({ + data: { + organizationId: orgId, + docType, + name, + body: recognized.docBody as Prisma.InputJsonValue, + }, + }); + + reply.code(201).send({ + template: created, + extractedFrom: extracted.source, + textLength: extracted.text.length, + }); + } catch (e) { + const err = e as { code?: string; message?: string; status?: number }; + if (err.code === 'NO_DEEPSEEK_KEY') { + reply.code(503).send({ error: 'no_deepseek_key', message: err.message }); + return; + } + if (err.code === 'NO_OCR') { + reply.code(503).send({ error: 'no_ocr', message: err.message }); + return; + } + if (err.code === 'UNSUPPORTED_MIME') { + reply.code(415).send({ error: 'unsupported_mime', message: err.message }); + return; + } + if (err.code === 'INVALID_DOC_BODY') { + reply.code(502).send({ + error: 'invalid_doc_body', + message: 'LLM вернула структуру, не прошедшую валидацию. Попробуйте загрузить ещё раз или отредактировать вручную.', + }); + return; + } + if (err.code === 'DEEPSEEK_HTTP_ERROR') { + reply.code(502).send({ + error: 'deepseek_error', + message: err.message, + status: err.status ?? 502, + }); + return; + } + app.log.error({ err: e }, 'template import failed'); + reply.code(500).send({ error: 'import_failed', message: err.message ?? 'unknown' }); + } + }, + ); +} diff --git a/apps/api/src/modules/templates/recognize.ts b/apps/api/src/modules/templates/recognize.ts new file mode 100644 index 0000000..dc29367 --- /dev/null +++ b/apps/api/src/modules/templates/recognize.ts @@ -0,0 +1,156 @@ +import type { DocType } from '@prisma/client'; +import { DocBody as DocBodySchema } from '@doc-manager/shared'; +import { deepseekJsonChat } from './deepseek.js'; + +// LLM возвращает упрощённую структуру (string text вместо TipTap-JSON). +// Конвертим её в полный DocBody с RichText на нашей стороне. + +type LlmBlock = + | { type: 'heading'; level: 1 | 2 | 3; text: string } + | { type: 'paragraph'; text: string } + | { type: 'party'; role: 'executor' | 'customer'; bind: { kind: 'self' | 'client' } } + | { type: 'services_table'; columns?: string[]; lines?: [] } + | { type: 'totals'; showVat?: boolean; showInWords?: boolean } + | { type: 'terms'; text: string } + | { type: 'signatures'; sides: ('executor' | 'customer')[] } + | { type: 'custom_text'; text: string } + | { type: 'page_break' }; + +type LlmResult = { + docType?: 'contract' | 'invoice' | 'act' | 'upd'; + title?: string; + blocks: LlmBlock[]; +}; + +const SYSTEM_PROMPT = `Ты — парсер юридических документов на русском языке. Получаешь plain-text договора, счёта, акта или УПД и возвращаешь его структуру строго в JSON формате DocBody. + +Цель: создать ШАБЛОН, в котором конкретные данные сторон, номера, даты — заменены на плейсхолдеры. Шаблон потом будет инстансироваться для конкретного клиента и документа. + +Правила замены на плейсхолдеры (заменяй ТОЛЬКО эти сущности, остальной текст оставь как есть): +- Конкретный номер договора/счёта → {{contract.number}} +- Конкретная дата документа → {{contract.date}} +- Текущая дата (на момент рендера) → {{today}} +- Реквизиты исполнителя (наша компания): + {{executor.name}}, {{executor.inn}}, {{executor.kpp}}, {{executor.ogrn}}, {{executor.legalAddress}}, + {{executor.signatoryName}}, {{executor.signatoryPosition}}, + {{executor.bankName}}, {{executor.bankBik}}, {{executor.bankAccount}} +- Реквизиты заказчика (клиента): + {{customer.name}}, {{customer.inn}}, {{customer.kpp}}, {{customer.address}}, + {{customer.email}}, {{customer.phone}}, {{customer.contactPerson}} + +Структура ответа (JSON-объект): +{ + "docType": "contract" | "invoice" | "act" | "upd", + "title": "Договор оказания услуг", + "blocks": [ + { "type": "heading", "level": 1, "text": "Договор оказания услуг № {{contract.number}} от {{contract.date}}" }, + { "type": "paragraph", "text": "..." }, + { "type": "party", "role": "executor", "bind": { "kind": "self" } }, + { "type": "party", "role": "customer", "bind": { "kind": "client" } }, + { "type": "services_table", "columns": ["name","qty","unit","price","vat","sum"], "lines": [] }, + { "type": "totals", "showVat": true, "showInWords": true }, + { "type": "terms", "text": "..." }, + { "type": "signatures", "sides": ["executor","customer"] }, + { "type": "custom_text", "text": "..." }, + { "type": "page_break" } + ] +} + +Важные правила: +1. Если в документе встречается реквизитный блок одной из сторон (ИНН/КПП/адрес/банк) — выдай ОДИН блок "party" с правильным role, не добавляй параграф с текстовым перечислением реквизитов. +2. Услуги/работы в табличной форме — НЕ переписывай построчно, ставь блок "services_table" (lines пустой массив, строки пользователь добавит вручную). +3. Если есть итоговые суммы (Итого, в т.ч. НДС, сумма прописью) — ставь блок "totals" со включенными нужными опциями. +4. Подписи в конце — блок "signatures" с указанием каких сторон видно. +5. Нумерованные пункты «1. Предмет договора», «2. Цена и порядок расчётов» и т.п. — каждый раздел как блок "terms" (заголовок раздела можно положить отдельным "heading" level 2). +6. Не выдумывай блоки, которых нет в документе. +7. text внутри блоков — обычная строка (не TipTap JSON), может содержать \\n для новых параграфов внутри блока. +8. Заполняй "title" коротким названием документа. +9. Если не уверен в типе документа — ставь "contract".`; + +function uid(): string { + return Math.random().toString(36).slice(2, 11); +} + +function plainToRich(text: string): unknown { + const lines = text.split(/\r?\n/); + return { + type: 'doc', + content: lines.map((line) => ({ + type: 'paragraph', + content: line ? [{ type: 'text', text: line }] : [], + })), + }; +} + +function llmToDocBody(result: LlmResult): { docBody: unknown; docType: DocType; title: string } { + const docType: DocType = (result.docType as DocType) ?? 'contract'; + const title = result.title ?? 'Документ'; + const blocks = (result.blocks ?? []).map((b) => { + const id = uid(); + switch (b.type) { + case 'heading': + return { id, type: 'heading', level: clampLevel(b.level), text: plainToRich(b.text) }; + case 'paragraph': + return { id, type: 'paragraph', text: plainToRich(b.text) }; + case 'party': + return { + id, + type: 'party', + role: b.role, + bind: b.bind?.kind === 'self' ? { kind: 'self' as const } : { kind: 'client' as const }, + }; + case 'services_table': + return { + id, + type: 'services_table', + columns: ['name', 'qty', 'unit', 'price', 'vat', 'sum'] as const, + lines: [], + }; + case 'totals': + return { id, type: 'totals', showVat: b.showVat ?? true, showInWords: b.showInWords ?? true }; + case 'terms': + return { id, type: 'terms', text: plainToRich(b.text) }; + case 'signatures': + return { id, type: 'signatures', sides: b.sides?.length ? b.sides : (['executor', 'customer'] as const) }; + case 'custom_text': + return { id, type: 'custom_text', text: plainToRich(b.text) }; + case 'page_break': + return { id, type: 'page_break' }; + default: + return { id, type: 'custom_text', text: plainToRich('') }; + } + }); + const docBody = { version: 1, blocks, vars: {} }; + return { docBody, docType, title }; +} + +function clampLevel(l: number): 1 | 2 | 3 { + if (l === 1 || l === 2 || l === 3) return l; + return 1; +} + +export async function recognizeTemplate(text: string): Promise<{ docBody: unknown; docType: DocType; title: string; raw?: unknown }> { + // Документ может быть длинным — DeepSeek-chat умеет до 64k context, сожмём только если совсем огромный + const trimmed = text.length > 60_000 ? text.slice(0, 60_000) + '\n\n…[документ обрезан]' : text; + + const llm = await deepseekJsonChat( + [ + { role: 'system', content: SYSTEM_PROMPT }, + { role: 'user', content: trimmed }, + ], + { temperature: 0.1, maxTokens: 8000 }, + ); + + const built = llmToDocBody(llm); + + // Финальная валидация на нашей стороне + const parsed = DocBodySchema.safeParse(built.docBody); + if (!parsed.success) { + throw Object.assign(new Error('DocBody после LLM не прошёл валидацию'), { + code: 'INVALID_DOC_BODY', + issues: parsed.error.flatten(), + raw: built.docBody, + }); + } + return { ...built, docBody: parsed.data, raw: llm }; +} diff --git a/apps/api/src/server.ts b/apps/api/src/server.ts index 178b9a6..ec1ed9b 100644 --- a/apps/api/src/server.ts +++ b/apps/api/src/server.ts @@ -3,6 +3,7 @@ import Fastify from 'fastify'; import cookie from '@fastify/cookie'; import cors from '@fastify/cors'; import helmet from '@fastify/helmet'; +import multipart from '@fastify/multipart'; import { env } from './env.js'; import authPlugin from './plugins/auth.js'; import { healthRoutes } from './routes/health.js'; @@ -14,6 +15,7 @@ import { servicesRoutes } from './modules/services/routes.js'; import { documentsRoutes } from './modules/documents/routes.js'; import { documentsPdfRoutes } from './modules/documents/pdf.routes.js'; import { templatesRoutes } from './modules/templates/routes.js'; +import { templatesImportRoutes } from './modules/templates/import.routes.js'; import { dadataRoutes } from './modules/dadata/routes.js'; import { shutdownBrowser } from './modules/documents/pdf.js'; import activeOrgPlugin from './plugins/activeOrg.js'; @@ -38,6 +40,7 @@ async function main() { credentials: true, }); await app.register(cookie); + await app.register(multipart, { limits: { fileSize: 25 * 1024 * 1024 } }); await app.register(authPlugin); await app.register(activeOrgPlugin); @@ -50,6 +53,7 @@ async function main() { await app.register(documentsRoutes); await app.register(documentsPdfRoutes); await app.register(templatesRoutes); + await app.register(templatesImportRoutes); await app.register(dadataRoutes); app.addHook('onClose', async () => { diff --git a/apps/web/src/pages/Templates.tsx b/apps/web/src/pages/Templates.tsx index e2fc006..a2a56b3 100644 --- a/apps/web/src/pages/Templates.tsx +++ b/apps/web/src/pages/Templates.tsx @@ -1,8 +1,9 @@ -import { useEffect, useState } from 'react'; +import { useEffect, useRef, useState } from 'react'; import { Link, useNavigate } from 'react-router-dom'; -import { api, type DocBody, type DocType, type DocumentTemplate } from '../api.js'; +import { api, ApiError, type DocBody, type DocType, type DocumentTemplate } from '../api.js'; import { Button, EmptyState, Field, Modal, Select } from '../components/ui.js'; import { emptyRich } from '../lib/richtext.js'; +import { redirectToLogin } from '../auth.js'; const DOC_TYPE_LABEL: Record = { contract: 'Договор', invoice: 'Счёт', act: 'Акт', upd: 'УПД', @@ -22,10 +23,18 @@ function emptyBody(): DocBody { }; } +type ImportState = + | { stage: 'idle' } + | { stage: 'uploading' | 'analyzing'; filename: string } + | { stage: 'error'; message: string }; + export function TemplatesPage() { const [items, setItems] = useState(null); const [creating, setCreating] = useState<{ name: string; docType: DocType } | null>(null); + const [importState, setImportState] = useState({ stage: 'idle' }); + const [importDocType, setImportDocType] = useState('contract'); const [error, setError] = useState(null); + const fileInputRef = useRef(null); const navigate = useNavigate(); async function load() { @@ -76,15 +85,99 @@ export function TemplatesPage() { } } + async function importFile(file: File) { + setImportState({ stage: 'uploading', filename: file.name }); + const fd = new FormData(); + fd.append('docType', importDocType); + fd.append('file', file); + try { + const res = await fetch('/api/templates/import', { + method: 'POST', + credentials: 'include', + body: fd, + }); + if (res.status === 401) redirectToLogin(); + setImportState({ stage: 'analyzing', filename: file.name }); + const text = await res.text(); + const data = text ? JSON.parse(text) : {}; + if (!res.ok) { + const msg = + data.error === 'no_deepseek_key' + ? 'DeepSeek API ключ не настроен на сервере.' + : data.error === 'no_ocr' + ? 'OCR (tesseract) недоступен на сервере. Загрузка сканов невозможна.' + : data.error === 'unsupported_mime' + ? 'Неподдерживаемый формат файла. DOCX, PDF, PNG, JPG.' + : data.error === 'empty_extracted_text' + ? 'Не удалось извлечь текст (плохое качество скана).' + : data.error === 'invalid_doc_body' + ? 'LLM вернула невалидную структуру. Попробуйте ещё раз.' + : data.message || `${data.error} (HTTP ${res.status})`; + setImportState({ stage: 'error', message: msg }); + return; + } + const tpl: DocumentTemplate = data.template; + setImportState({ stage: 'idle' }); + navigate(`/templates/${tpl.id}`); + } catch (e) { + setImportState({ stage: 'error', message: String(e) }); + } + } + + function onPickFile(e: React.ChangeEvent) { + const f = e.target.files?.[0]; + if (f) void importFile(f); + e.target.value = ''; + } + return (

Шаблоны

- +
+ + +
+ {importState.stage === 'uploading' || importState.stage === 'analyzing' ? ( +
+
+ + {importState.stage === 'uploading' ? 'Загружаю' : 'Распознаю'} «{importState.filename}»… + {importState.stage === 'analyzing' ? ' DeepSeek анализирует структуру (10–60 сек).' : ''} + +
+ ) : null} + {importState.stage === 'error' ? ( +
+ Импорт не удался: {importState.message} + +
+ ) : null} + {error ?
{error}
: null} {items === null ? ( diff --git a/apps/web/src/styles.css b/apps/web/src/styles.css index 38461a9..eeca75c 100644 --- a/apps/web/src/styles.css +++ b/apps/web/src/styles.css @@ -275,6 +275,23 @@ body { .tabs { border-bottom-color: #2a2e35; } } +/* === import banner === */ +.import-banner { + display: flex; align-items: center; gap: 12px; + padding: 12px 16px; margin: 12px 0; + background: #eff6ff; border: 1px solid #bfdbfe; color: #1e40af; + border-radius: 8px; +} +.spinner { + width: 16px; height: 16px; border-radius: 50%; + border: 2px solid #bfdbfe; border-top-color: #2563eb; + animation: spin 0.8s linear infinite; +} +@keyframes spin { to { transform: rotate(360deg); } } +@media (prefers-color-scheme: dark) { + .import-banner { background: #14213d; border-color: #1e3a8a; color: #93c5fd; } +} + /* === inn lookup === */ .inn-lookup { margin-top: 6px; display: flex; flex-direction: column; gap: 4px; } .inn-lookup__error { font-size: 12px; color: #c0392b; } diff --git a/docker/Dockerfile.api b/docker/Dockerfile.api index a49164c..a55d8c7 100644 --- a/docker/Dockerfile.api +++ b/docker/Dockerfile.api @@ -13,7 +13,12 @@ RUN apk add --no-cache \ ca-certificates \ ttf-dejavu \ ttf-liberation \ - font-noto-cjk + font-noto-cjk \ + tesseract-ocr \ + tesseract-ocr-data-rus \ + tesseract-ocr-data-eng \ + poppler-utils \ + imagemagick ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium-browser ENV PUPPETEER_SKIP_DOWNLOAD=true diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index bf01ab7..97e6369 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -42,6 +42,9 @@ services: TOCHKA_JWT_KEY: ${TOCHKA_JWT_KEY:-} TOCHKA_WEBHOOK_SECRET: ${TOCHKA_WEBHOOK_SECRET:-} DADATA_API_KEY: ${DADATA_API_KEY:-} + DEEPSEEK_API_KEY: ${DEEPSEEK_API_KEY:-} + DEEPSEEK_BASE_URL: ${DEEPSEEK_BASE_URL:-https://api.deepseek.com} + DEEPSEEK_MODEL: ${DEEPSEEK_MODEL:-deepseek-chat} DEFAULT_ORGANIZATION_ID: ${DEFAULT_ORGANIZATION_ID:-00000000-0000-0000-0000-000000000001} DEV_BYPASS_AUTH: "0" expose: diff --git a/package-lock.json b/package-lock.json index 352877d..6e8d856 100644 --- a/package-lock.json +++ b/package-lock.json @@ -26,15 +26,20 @@ "@fastify/cookie": "^9.4.0", "@fastify/cors": "^9.0.1", "@fastify/helmet": "^11.1.1", + "@fastify/multipart": "^10.0.0", "@prisma/client": "^5.22.0", "fastify": "^4.28.1", "fastify-plugin": "^4.5.1", "jose": "^5.9.6", + "mammoth": "^1.12.0", + "node-tesseract-ocr": "^2.2.1", + "pdf-parse": "^2.4.5", "puppeteer-core": "^24.42.0", "zod": "^3.23.8" }, "devDependencies": { "@types/node": "^20.16.0", + "@types/pdf-parse": "^1.1.5", "embedded-postgres": "^18.3.0-beta.17", "pino-pretty": "^11.3.0", "prisma": "^5.22.0", @@ -950,6 +955,12 @@ "fast-uri": "^2.0.0" } }, + "node_modules/@fastify/busboy": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/@fastify/busboy/-/busboy-3.2.0.tgz", + "integrity": "sha512-m9FVDXU3GT2ITSe0UaMA5rU3QkfC/UXtCU8y0gSN/GugTqtVldOBWIB5V6V3sbmenVZUIpU6f+mPEO2+m5iTaA==", + "license": "MIT" + }, "node_modules/@fastify/cookie": { "version": "9.4.0", "resolved": "https://registry.npmjs.org/@fastify/cookie/-/cookie-9.4.0.tgz", @@ -970,6 +981,22 @@ "mnemonist": "0.39.6" } }, + "node_modules/@fastify/deepmerge": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/@fastify/deepmerge/-/deepmerge-3.2.1.tgz", + "integrity": "sha512-N5Oqvltoa2r9z1tbx4xjky0oRR60v+T47Ic4J1ukoVQcptLOrIdRnCSdTGmOmajZuHVKlTnfcmrjyqsGEW1ztA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT" + }, "node_modules/@fastify/error": { "version": "3.4.1", "resolved": "https://registry.npmjs.org/@fastify/error/-/error-3.4.1.tgz", @@ -1004,6 +1031,77 @@ "fast-deep-equal": "^3.1.3" } }, + "node_modules/@fastify/multipart": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/@fastify/multipart/-/multipart-10.0.0.tgz", + "integrity": "sha512-pUx3Z1QStY7E7kwvDTIvB6P+rF5lzP+iqPgZyJyG3yBJVPvQaZxzDHYbQD89rbY0ciXrMOyGi8ezHDVexLvJDA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT", + "dependencies": { + "@fastify/busboy": "^3.0.0", + "@fastify/deepmerge": "^3.0.0", + "@fastify/error": "^4.0.0", + "fastify-plugin": "^5.0.0", + "secure-json-parse": "^4.0.0" + } + }, + "node_modules/@fastify/multipart/node_modules/@fastify/error": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/@fastify/error/-/error-4.2.0.tgz", + "integrity": "sha512-RSo3sVDXfHskiBZKBPRgnQTtIqpi/7zhJOEmAxCiBcM7d0uwdGdxLlsCaLzGs8v8NnxIRlfG0N51p5yFaOentQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT" + }, + "node_modules/@fastify/multipart/node_modules/fastify-plugin": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/fastify-plugin/-/fastify-plugin-5.1.0.tgz", + "integrity": "sha512-FAIDA8eovSt5qcDgcBvDuX/v0Cjz0ohGhENZ/wpc3y+oZCY2afZ9Baqql3g/lC+OHRnciQol4ww7tuthOb9idw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT" + }, + "node_modules/@fastify/multipart/node_modules/secure-json-parse": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/secure-json-parse/-/secure-json-parse-4.1.0.tgz", + "integrity": "sha512-l4KnYfEyqYJxDwlNVyRfO2E4NTHfMKAWdUuA8J0yve2Dz/E/PdBepY03RvyJpssIpRFwJoCD55wA+mEDs6ByWA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "BSD-3-Clause" + }, "node_modules/@jridgewell/gen-mapping": { "version": "0.3.13", "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz", @@ -1054,6 +1152,205 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@napi-rs/canvas": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.80.tgz", + "integrity": "sha512-DxuT1ClnIPts1kQx8FBmkk4BQDTfI5kIzywAaMjQSXfNnra5UFU9PwurXrl+Je3bJ6BGsp/zmshVVFbCmyI+ww==", + "license": "MIT", + "workspaces": [ + "e2e/*" + ], + "engines": { + "node": ">= 10" + }, + "optionalDependencies": { + "@napi-rs/canvas-android-arm64": "0.1.80", + "@napi-rs/canvas-darwin-arm64": "0.1.80", + "@napi-rs/canvas-darwin-x64": "0.1.80", + "@napi-rs/canvas-linux-arm-gnueabihf": "0.1.80", + "@napi-rs/canvas-linux-arm64-gnu": "0.1.80", + "@napi-rs/canvas-linux-arm64-musl": "0.1.80", + "@napi-rs/canvas-linux-riscv64-gnu": "0.1.80", + "@napi-rs/canvas-linux-x64-gnu": "0.1.80", + "@napi-rs/canvas-linux-x64-musl": "0.1.80", + "@napi-rs/canvas-win32-x64-msvc": "0.1.80" + } + }, + "node_modules/@napi-rs/canvas-android-arm64": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-android-arm64/-/canvas-android-arm64-0.1.80.tgz", + "integrity": "sha512-sk7xhN/MoXeuExlggf91pNziBxLPVUqF2CAVnB57KLG/pz7+U5TKG8eXdc3pm0d7Od0WreB6ZKLj37sX9muGOQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-darwin-arm64": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-arm64/-/canvas-darwin-arm64-0.1.80.tgz", + "integrity": "sha512-O64APRTXRUiAz0P8gErkfEr3lipLJgM6pjATwavZ22ebhjYl/SUbpgM0xcWPQBNMP1n29afAC/Us5PX1vg+JNQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-darwin-x64": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-x64/-/canvas-darwin-x64-0.1.80.tgz", + "integrity": "sha512-FqqSU7qFce0Cp3pwnTjVkKjjOtxMqRe6lmINxpIZYaZNnVI0H5FtsaraZJ36SiTHNjZlUB69/HhxNDT1Aaa9vA==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm-gnueabihf": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm-gnueabihf/-/canvas-linux-arm-gnueabihf-0.1.80.tgz", + "integrity": "sha512-eyWz0ddBDQc7/JbAtY4OtZ5SpK8tR4JsCYEZjCE3dI8pqoWUC8oMwYSBGCYfsx2w47cQgQCgMVRVTFiiO38hHQ==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm64-gnu": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-gnu/-/canvas-linux-arm64-gnu-0.1.80.tgz", + "integrity": "sha512-qwA63t8A86bnxhuA/GwOkK3jvb+XTQaTiVML0vAWoHyoZYTjNs7BzoOONDgTnNtr8/yHrq64XXzUoLqDzU+Uuw==", + "cpu": [ + "arm64" + ], + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-arm64-musl": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-musl/-/canvas-linux-arm64-musl-0.1.80.tgz", + "integrity": "sha512-1XbCOz/ymhj24lFaIXtWnwv/6eFHXDrjP0jYkc6iHQ9q8oXKzUX1Lc6bu+wuGiLhGh2GS/2JlfORC5ZcXimRcg==", + "cpu": [ + "arm64" + ], + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-riscv64-gnu": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-riscv64-gnu/-/canvas-linux-riscv64-gnu-0.1.80.tgz", + "integrity": "sha512-XTzR125w5ZMs0lJcxRlS1K3P5RaZ9RmUsPtd1uGt+EfDyYMu4c6SEROYsxyatbbu/2+lPe7MPHOO/0a0x7L/gw==", + "cpu": [ + "riscv64" + ], + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-x64-gnu": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-gnu/-/canvas-linux-x64-gnu-0.1.80.tgz", + "integrity": "sha512-BeXAmhKg1kX3UCrJsYbdQd3hIMDH/K6HnP/pG2LuITaXhXBiNdh//TVVVVCBbJzVQaV5gK/4ZOCMrQW9mvuTqA==", + "cpu": [ + "x64" + ], + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-linux-x64-musl": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-musl/-/canvas-linux-x64-musl-0.1.80.tgz", + "integrity": "sha512-x0XvZWdHbkgdgucJsRxprX/4o4sEed7qo9rCQA9ugiS9qE2QvP0RIiEugtZhfLH3cyI+jIRFJHV4Fuz+1BHHMg==", + "cpu": [ + "x64" + ], + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/canvas-win32-x64-msvc": { + "version": "0.1.80", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.80.tgz", + "integrity": "sha512-Z8jPsM6df5V8B1HrCHB05+bDiCxjE9QA//3YrkKIdVDEwn5RKaqOxCJDRJkl48cJbylcrJbW4HxZbTte8juuPg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, "node_modules/@pinojs/redact": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/@pinojs/redact/-/redact-0.4.0.tgz", @@ -1634,6 +1931,16 @@ "undici-types": "~6.21.0" } }, + "node_modules/@types/pdf-parse": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/@types/pdf-parse/-/pdf-parse-1.1.5.tgz", + "integrity": "sha512-kBfrSXsloMnUJOKi25s3+hRmkycHfLK6A09eRGqF/N8BkQoPUmaCr+q8Cli5FnfohEz/rsv82zAiPz/LXtOGhA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/prop-types": { "version": "15.7.15", "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.15.tgz", @@ -1693,6 +2000,15 @@ "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" } }, + "node_modules/@xmldom/xmldom": { + "version": "0.8.13", + "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.8.13.tgz", + "integrity": "sha512-KRYzxepc14G/CEpEGc3Yn+JKaAeT63smlDr+vjB8jRfgTBBI9wRj/nkQEO+ucV8p8I9bfKLWp37uHgFrbntPvw==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/abort-controller": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", @@ -1794,6 +2110,15 @@ "url": "https://github.com/chalk/ansi-styles?sponsor=1" } }, + "node_modules/argparse": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", + "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==", + "license": "MIT", + "dependencies": { + "sprintf-js": "~1.0.2" + } + }, "node_modules/ast-types": { "version": "0.13.4", "resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.13.4.tgz", @@ -1930,7 +2255,6 @@ "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", - "dev": true, "funding": [ { "type": "github", @@ -1969,6 +2293,12 @@ "node": ">=10.0.0" } }, + "node_modules/bluebird": { + "version": "3.4.7", + "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.4.7.tgz", + "integrity": "sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==", + "license": "MIT" + }, "node_modules/browserslist": { "version": "4.28.2", "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.2.tgz", @@ -2190,6 +2520,12 @@ "node": ">=6.6.0" } }, + "node_modules/core-util-is": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz", + "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==", + "license": "MIT" + }, "node_modules/csstype": { "version": "3.2.3", "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz", @@ -2253,6 +2589,21 @@ "integrity": "sha512-kRfgp8vWVjBu/fbYCiVFiOqsCk3CrMKEo3WbgGT2NXK2dG7vawWPBljixajVgGK9II8rDO9G0oD0zLt3I1daRg==", "license": "BSD-3-Clause" }, + "node_modules/dingbat-to-unicode": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dingbat-to-unicode/-/dingbat-to-unicode-1.0.1.tgz", + "integrity": "sha512-98l0sW87ZT58pU4i61wa2OHwxbiYSbuxsCBozaVnYX2iCnr3bLM3fIes1/ej7h1YdOKuKt/MLs706TVnALA65w==", + "license": "BSD-2-Clause" + }, + "node_modules/duck": { + "version": "0.1.12", + "resolved": "https://registry.npmjs.org/duck/-/duck-0.1.12.tgz", + "integrity": "sha512-wkctla1O6VfP89gQ+J/yDesM0S7B7XLXjKGzXxMDVFg7uEn706niAtyYovKbyq1oT9YwDcly721/iUWoc8MVRg==", + "license": "BSD", + "dependencies": { + "underscore": "^1.13.1" + } + }, "node_modules/electron-to-chromium": { "version": "1.5.345", "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.345.tgz", @@ -2777,6 +3128,18 @@ ], "license": "BSD-3-Clause" }, + "node_modules/immediate": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz", + "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==", + "license": "MIT" + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "license": "ISC" + }, "node_modules/ip-address": { "version": "10.1.1", "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.1.tgz", @@ -2804,6 +3167,12 @@ "node": ">=8" } }, + "node_modules/isarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", + "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==", + "license": "MIT" + }, "node_modules/jose": { "version": "5.10.0", "resolved": "https://registry.npmjs.org/jose/-/jose-5.10.0.tgz", @@ -2870,6 +3239,57 @@ "node": ">=6" } }, + "node_modules/jszip": { + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz", + "integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==", + "license": "(MIT OR GPL-3.0-or-later)", + "dependencies": { + "lie": "~3.3.0", + "pako": "~1.0.2", + "readable-stream": "~2.3.6", + "setimmediate": "^1.0.5" + } + }, + "node_modules/jszip/node_modules/readable-stream": { + "version": "2.3.8", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz", + "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", + "license": "MIT", + "dependencies": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "node_modules/jszip/node_modules/safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", + "license": "MIT" + }, + "node_modules/jszip/node_modules/string_decoder": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.1.0" + } + }, + "node_modules/lie": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz", + "integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==", + "license": "MIT", + "dependencies": { + "immediate": "~3.0.5" + } + }, "node_modules/light-my-request": { "version": "5.14.0", "resolved": "https://registry.npmjs.org/light-my-request/-/light-my-request-5.14.0.tgz", @@ -2893,6 +3313,17 @@ "loose-envify": "cli.js" } }, + "node_modules/lop": { + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/lop/-/lop-0.4.2.tgz", + "integrity": "sha512-RefILVDQ4DKoRZsJ4Pj22TxE3omDO47yFpkIBoDKzkqPRISs5U1cnAdg/5583YPkWPaLIYHOKRMQSvjFsO26cw==", + "license": "BSD-2-Clause", + "dependencies": { + "duck": "^0.1.12", + "option": "~0.2.1", + "underscore": "^1.13.1" + } + }, "node_modules/lru-cache": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", @@ -2903,6 +3334,30 @@ "yallist": "^3.0.2" } }, + "node_modules/mammoth": { + "version": "1.12.0", + "resolved": "https://registry.npmjs.org/mammoth/-/mammoth-1.12.0.tgz", + "integrity": "sha512-cwnK1RIcRdDMi2HRx2EXGYlxqIEh0Oo3bLhorgnsVJi2UkbX1+jKxuBNR9PC5+JaX7EkmJxFPmo6mjLpqShI2w==", + "license": "BSD-2-Clause", + "dependencies": { + "@xmldom/xmldom": "^0.8.6", + "argparse": "~1.0.3", + "base64-js": "^1.5.1", + "bluebird": "~3.4.0", + "dingbat-to-unicode": "^1.0.1", + "jszip": "^3.7.1", + "lop": "^0.4.2", + "path-is-absolute": "^1.0.0", + "underscore": "^1.13.1", + "xmlbuilder": "^10.0.0" + }, + "bin": { + "mammoth": "bin/mammoth" + }, + "engines": { + "node": ">=12.0.0" + } + }, "node_modules/minimist": { "version": "1.2.8", "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", @@ -2969,6 +3424,15 @@ "dev": true, "license": "MIT" }, + "node_modules/node-tesseract-ocr": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/node-tesseract-ocr/-/node-tesseract-ocr-2.2.1.tgz", + "integrity": "sha512-Q9cD79JGpPNQBxbi1fV+OAsTxYKLpx22sagsxSyKbu1u+t6UarApf5m32uVc8a5QAP1Wk7fIPN0aJFGGEE9DyQ==", + "license": "MIT", + "engines": { + "node": ">=10" + } + }, "node_modules/obliterator": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/obliterator/-/obliterator-2.0.5.tgz", @@ -2993,6 +3457,12 @@ "wrappy": "1" } }, + "node_modules/option": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/option/-/option-0.2.4.tgz", + "integrity": "sha512-pkEqbDyl8ou5cpq+VsnQbe/WlEy5qS7xPzMS1U55OCG9KPvwFD46zDbxQIj3egJSFc3D+XhYOPUzz49zQAVy7A==", + "license": "BSD-2-Clause" + }, "node_modules/pac-proxy-agent": { "version": "7.2.0", "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz", @@ -3025,6 +3495,53 @@ "node": ">= 14" } }, + "node_modules/pako": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", + "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==", + "license": "(MIT AND Zlib)" + }, + "node_modules/path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/pdf-parse": { + "version": "2.4.5", + "resolved": "https://registry.npmjs.org/pdf-parse/-/pdf-parse-2.4.5.tgz", + "integrity": "sha512-mHU89HGh7v+4u2ubfnevJ03lmPgQ5WU4CxAVmTSh/sxVTEDYd1er/dKS/A6vg77NX47KTEoihq8jZBLr8Cxuwg==", + "license": "Apache-2.0", + "dependencies": { + "@napi-rs/canvas": "0.1.80", + "pdfjs-dist": "5.4.296" + }, + "bin": { + "pdf-parse": "bin/cli.mjs" + }, + "engines": { + "node": ">=20.16.0 <21 || >=22.3.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/mehmet-kozan" + } + }, + "node_modules/pdfjs-dist": { + "version": "5.4.296", + "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-5.4.296.tgz", + "integrity": "sha512-DlOzet0HO7OEnmUmB6wWGJrrdvbyJKftI1bhMitK7O2N8W2gc757yyYBbINy9IDafXAV9wmKr9t7xsTaNKRG5Q==", + "license": "Apache-2.0", + "engines": { + "node": ">=20.16.0 || >=22.3.0" + }, + "optionalDependencies": { + "@napi-rs/canvas": "^0.1.80" + } + }, "node_modules/pend": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz", @@ -3316,6 +3833,12 @@ "node": ">= 0.6.0" } }, + "node_modules/process-nextick-args": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", + "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==", + "license": "MIT" + }, "node_modules/process-warning": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/process-warning/-/process-warning-3.0.0.tgz", @@ -3683,6 +4206,12 @@ "integrity": "sha512-oeM1lpU/UvhTxw+g3cIfxXHyJRc/uidd3yK1P242gzHds0udQBYzs3y8j4gCCW+ZJ7ad0yctld8RYO+bdurlvw==", "license": "MIT" }, + "node_modules/setimmediate": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz", + "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==", + "license": "MIT" + }, "node_modules/shell-quote": { "version": "1.8.3", "resolved": "https://registry.npmjs.org/shell-quote/-/shell-quote-1.8.3.tgz", @@ -3772,6 +4301,12 @@ "node": ">= 10.x" } }, + "node_modules/sprintf-js": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", + "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==", + "license": "BSD-3-Clause" + }, "node_modules/streamx": { "version": "2.25.0", "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.25.0.tgz", @@ -3994,6 +4529,12 @@ "node": ">=14.17" } }, + "node_modules/underscore": { + "version": "1.13.8", + "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.13.8.tgz", + "integrity": "sha512-DXtD3ZtEQzc7M8m4cXotyHR+FAS18C64asBYY5vqZexfYryNNnDc02W4hKg3rdQuqOYas1jkseX0+nZXjTXnvQ==", + "license": "MIT" + }, "node_modules/undici-types": { "version": "6.21.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", @@ -4032,6 +4573,12 @@ "browserslist": ">= 4.21.0" } }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "license": "MIT" + }, "node_modules/vite": { "version": "5.4.21", "resolved": "https://registry.npmjs.org/vite/-/vite-5.4.21.tgz", @@ -4572,6 +5119,15 @@ } } }, + "node_modules/xmlbuilder": { + "version": "10.1.1", + "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-10.1.1.tgz", + "integrity": "sha512-OyzrcFLL/nb6fMGHbiRDuPup9ljBycsdCypwuyg5AAHvyWzGfChJpCXMG88AGTIMFhGZ9RccFN1e6lhg3hkwKg==", + "license": "MIT", + "engines": { + "node": ">=4.0" + } + }, "node_modules/xtend": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz",