feat: VoiceScript STT+OCR 초기 버전

This commit is contained in:
root
2026-04-20 06:15:35 +09:00
commit ddd51da26e
11 changed files with 2163 additions and 0 deletions

33
app/Dockerfile Normal file
View File

@@ -0,0 +1,33 @@
FROM python:3.11-slim
RUN apt-get update && apt-get install -y \
ffmpeg \
libsndfile1 \
libgomp1 \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender1 \
libgl1 \
wget \
curl \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY requirements.txt .
# PaddlePaddle CPU (AMD64) — paddleocr 3.x 호환
RUN pip install --no-cache-dir paddlepaddle==3.0.0 \
-i https://pypi.tuna.tsinghua.edu.cn/simple
# 나머지 패키지
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
RUN mkdir -p /data/uploads /data/outputs
EXPOSE 8000
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

45
app/auth.py Normal file
View File

@@ -0,0 +1,45 @@
import os
from datetime import datetime, timedelta
from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from jose import JWTError, jwt
SECRET_KEY = os.getenv("JWT_SECRET", "fallback-secret-change-this")
ALGORITHM = "HS256"
EXPIRE_HOURS = int(os.getenv("JWT_EXPIRE_HOURS", "12"))
AUTH_USERNAME = os.getenv("AUTH_USERNAME", "admin")
AUTH_PASSWORD = os.getenv("AUTH_PASSWORD", "changeme1234")
bearer = HTTPBearer(auto_error=False)
def authenticate(username: str, password: str) -> bool:
return username == AUTH_USERNAME and password == AUTH_PASSWORD
def create_access_token(username: str) -> str:
expire = datetime.utcnow() + timedelta(hours=EXPIRE_HOURS)
return jwt.encode({"sub": username, "exp": expire}, SECRET_KEY, algorithm=ALGORITHM)
def require_auth(credentials: HTTPAuthorizationCredentials = Depends(bearer)):
if credentials is None:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="인증이 필요합니다",
headers={"WWW-Authenticate": "Bearer"},
)
try:
payload = jwt.decode(credentials.credentials, SECRET_KEY, algorithms=[ALGORITHM])
username = payload.get("sub")
if username is None or username != AUTH_USERNAME:
raise JWTError()
except JWTError:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="토큰이 유효하지 않거나 만료되었습니다",
headers={"WWW-Authenticate": "Bearer"},
)
return username

153
app/main.py Normal file
View File

@@ -0,0 +1,153 @@
import os
import uuid
import time
import glob
import aiofiles
from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Form, Request
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from auth import authenticate, create_access_token, require_auth
from tasks import celery_app, transcribe_task
from ocr_tasks import ocr_task
app = FastAPI(title="VoiceScript API")
UPLOAD_DIR = os.getenv("UPLOAD_DIR", "/data/uploads")
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs")
MAX_UPLOAD_BYTES = int(os.getenv("MAX_UPLOAD_MB", "500")) * 1024 * 1024
OUTPUT_KEEP_SECS = int(os.getenv("OUTPUT_KEEP_HOURS", "48")) * 3600
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
AUDIO_EXT = {"mp3","mp4","wav","m4a","ogg","flac","aac","wma","webm","mkv","avi","mov"}
IMAGE_EXT = {"jpg","jpeg","png","bmp","tiff","tif","webp","gif"}
# ── 인증 ──────────────────────────────────────────────────────
@app.post("/api/login")
def login(username: str = Form(...), password: str = Form(...)):
if not authenticate(username, password):
raise HTTPException(status_code=401, detail="아이디 또는 비밀번호가 올바르지 않습니다")
return {"access_token": create_access_token(username), "token_type": "bearer"}
@app.get("/api/me")
def me(user: str = Depends(require_auth)):
return {"username": user}
# ── STT ───────────────────────────────────────────────────────
@app.post("/api/transcribe")
async def transcribe(request: Request, file: UploadFile = File(...),
_: str = Depends(require_auth)):
_check_size(request)
ext = _ext(file.filename)
if ext not in AUDIO_EXT:
raise HTTPException(400, f"지원하지 않는 형식. 지원: {', '.join(sorted(AUDIO_EXT))}")
file_id = str(uuid.uuid4())
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
await _save(file, save_path)
task = transcribe_task.delay(file_id, save_path)
return {"task_id": task.id, "file_id": file_id, "filename": file.filename}
# ── OCR ───────────────────────────────────────────────────────
@app.post("/api/ocr")
async def ocr(
request: Request,
file: UploadFile = File(...),
mode: str = Form("text"),
backend: str = Form("paddle"),
ollama_model: str = Form("granite3.2-vision"),
custom_prompt: str = Form(""),
_: str = Depends(require_auth),
):
_check_size(request)
ext = _ext(file.filename)
if ext not in IMAGE_EXT:
raise HTTPException(400, f"지원하지 않는 형식. 지원: {', '.join(sorted(IMAGE_EXT))}")
if mode not in ("text", "structure"): mode = "text"
if backend not in ("paddle", "ollama"): backend = "paddle"
file_id = str(uuid.uuid4())
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
await _save(file, save_path)
task = ocr_task.delay(file_id, save_path, mode, backend, ollama_model, custom_prompt)
return {"task_id": task.id, "file_id": file_id,
"filename": file.filename, "mode": mode, "backend": backend}
# ── 상태 조회 (celery_app.AsyncResult 사용) ───────────────────
@app.get("/api/status/{task_id}")
def get_status(task_id: str, _: str = Depends(require_auth)):
r = celery_app.AsyncResult(task_id)
if r.state == "PENDING":
return {"state": "pending", "progress": 0, "message": "대기 중..."}
if r.state == "PROGRESS":
m = r.info or {}
return {"state": "progress", "progress": m.get("progress", 0),
"message": m.get("message", "처리 중...")}
if r.state == "SUCCESS":
return {"state": "success", "progress": 100, **r.result}
if r.state == "FAILURE":
return {"state": "failure", "progress": 0, "message": str(r.info)}
return {"state": r.state.lower(), "progress": 0}
# ── 다운로드 ──────────────────────────────────────────────────
@app.get("/api/download/{filename}")
def download(filename: str, _: str = Depends(require_auth)):
if ".." in filename or "/" in filename:
raise HTTPException(400, "잘못된 파일명")
path = os.path.join(OUTPUT_DIR, filename)
if not os.path.exists(path):
raise HTTPException(404, "파일을 찾을 수 없습니다")
media = ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
if filename.endswith(".xlsx") else "text/plain")
return FileResponse(path, media_type=media, filename=filename)
# ── 결과 파일 정리 ────────────────────────────────────────────
@app.post("/api/cleanup")
def cleanup(_: str = Depends(require_auth)):
return {"removed": _cleanup_outputs()}
@app.on_event("startup")
async def on_startup():
_cleanup_outputs()
# ── 유틸 ──────────────────────────────────────────────────────
def _check_size(request: Request):
cl = request.headers.get("content-length")
if cl and int(cl) > MAX_UPLOAD_BYTES:
raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
def _cleanup_outputs() -> int:
if OUTPUT_KEEP_SECS == 0:
return 0
cutoff = time.time() - OUTPUT_KEEP_SECS
removed = 0
for f in glob.glob(os.path.join(OUTPUT_DIR, "*")):
try:
if os.path.getmtime(f) < cutoff:
os.remove(f); removed += 1
except Exception:
pass
return removed
def _ext(fn):
return fn.rsplit(".", 1)[-1].lower() if "." in fn else ""
async def _save(file: UploadFile, path: str):
written = 0
async with aiofiles.open(path, "wb") as f:
while chunk := await file.read(1024 * 1024):
written += len(chunk)
if written > MAX_UPLOAD_BYTES:
await f.close()
os.remove(path)
raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
await f.write(chunk)
app.mount("/", StaticFiles(directory="static", html=True), name="static")

288
app/ocr_tasks.py Normal file
View File

@@ -0,0 +1,288 @@
"""
OCR Celery Tasks
- PaddleOCR 3.x 호환 (use_gpu/show_log/cls 파라미터 제거, 결과구조 변경 반영)
- backend="paddle" → PaddleOCR 로컬 실행
- backend="ollama" → Ollama Vision API 호출
"""
import os
import base64
import httpx
from celery import Celery
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs")
OCR_LANG = os.getenv("OCR_LANG", "korean")
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434")
OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "180"))
celery_app = Celery("ocr_tasks", broker=REDIS_URL, backend=REDIS_URL)
celery_app.conf.update(
task_serializer="json",
result_serializer="json",
accept_content=["json"],
task_track_started=True,
result_expires=3600,
)
# PaddleOCR 싱글톤
_ocr_engine = None
_struct_engine = None
def get_ocr():
global _ocr_engine
if _ocr_engine is None:
from paddleocr import PaddleOCR
print(f"[PaddleOCR] 로딩 (lang={OCR_LANG})")
# PaddleOCR 3.x: use_gpu/show_log 파라미터 제거됨
_ocr_engine = PaddleOCR(use_angle_cls=True, lang=OCR_LANG)
print("[PaddleOCR] 완료")
return _ocr_engine
def get_structure():
global _struct_engine
if _struct_engine is None:
from paddleocr import PPStructure
print("[PPStructure] 로딩")
_struct_engine = PPStructure(table=True, ocr=True, lang=OCR_LANG)
print("[PPStructure] 완료")
return _struct_engine
# ════════════════════════════════════════════════════════════════
# 메인 Task
# ════════════════════════════════════════════════════════════════
@celery_app.task(bind=True, name="tasks.ocr_task", queue="ocr")
def ocr_task(self, file_id, image_path, mode="text",
backend="paddle", ollama_model="granite3.2-vision", custom_prompt=""):
self.update_state(state="PROGRESS", meta={"progress": 8, "message": "엔진 준비 중..."})
try:
if backend == "ollama":
result = _run_ollama(self, file_id, image_path, mode, ollama_model, custom_prompt)
else:
result = _run_paddle(self, file_id, image_path, mode)
try: os.remove(image_path)
except: pass
return result
except Exception as e:
try: os.remove(image_path)
except: pass
raise Exception(f"OCR 실패: {str(e)}")
# ════════════════════════════════════════════════════════════════
# Ollama 백엔드
# ════════════════════════════════════════════════════════════════
_OLLAMA_PROMPTS = {
"text": "이 이미지에서 모든 텍스트를 정확하게 추출해줘. 원본의 줄 구분과 단락 구조를 유지해줘.",
"structure": "이 이미지를 분석해서 표는 마크다운 표 형식으로, 나머지 텍스트는 원본 구조를 유지하며 추출해줘.",
}
def _run_ollama(task, file_id, image_path, mode, ollama_model, custom_prompt):
task.update_state(state="PROGRESS",
meta={"progress": 15, "message": f"Ollama ({ollama_model}) 연결 중..."})
with open(image_path, "rb") as f:
img_b64 = base64.b64encode(f.read()).decode()
prompt = custom_prompt.strip() or _OLLAMA_PROMPTS.get(mode, _OLLAMA_PROMPTS["text"])
task.update_state(state="PROGRESS", meta={"progress": 30, "message": "모델 추론 중..."})
try:
resp = httpx.post(f"{OLLAMA_URL}/api/chat", json={
"model": ollama_model,
"messages": [{"role": "user", "content": prompt, "images": [img_b64]}],
"stream": False,
"options": {"temperature": 0.1},
}, timeout=float(OLLAMA_TIMEOUT))
resp.raise_for_status()
except httpx.ConnectError:
raise Exception(f"Ollama 서버 연결 실패 ({OLLAMA_URL})")
except httpx.TimeoutException:
raise Exception(f"Ollama 응답 시간 초과 ({OLLAMA_TIMEOUT}초). OLLAMA_TIMEOUT 값을 늘려주세요.")
task.update_state(state="PROGRESS", meta={"progress": 85, "message": "결과 저장 중..."})
full_text = resp.json().get("message", {}).get("content", "").strip()
if not full_text:
raise Exception("Ollama 빈 응답. 모델이 설치되어 있는지 확인하세요.")
tables = _parse_md_tables(full_text) if mode == "structure" else []
os.makedirs(OUTPUT_DIR, exist_ok=True)
txt_file = f"{file_id}_ocr.txt"
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
f.write(f"# OCR 결과 (Ollama / {ollama_model})\n\n{full_text}")
xlsx_file = None
if tables:
xlsx_file = f"{file_id}_tables.xlsx"
_save_excel(tables, os.path.join(OUTPUT_DIR, xlsx_file))
tables_html = [_md_table_to_html(t) for t in tables]
lines = [{"text": l, "confidence": 1.0, "bbox": []}
for l in full_text.splitlines() if l.strip()]
return {
"mode": mode, "backend": "ollama", "ollama_model": ollama_model,
"full_text": full_text, "lines": lines, "line_count": len(lines),
"txt_file": txt_file,
"tables": [{"html": h, "rows": len(t),
"cols": max(len(r) for r in t) if t else 0}
for h, t in zip(tables_html, tables)],
"xlsx_file": xlsx_file,
}
# ════════════════════════════════════════════════════════════════
# PaddleOCR 백엔드
# ════════════════════════════════════════════════════════════════
def _run_paddle(task, file_id, image_path, mode):
import cv2
img = cv2.imread(image_path)
if img is None:
raise ValueError("이미지를 읽을 수 없습니다")
os.makedirs(OUTPUT_DIR, exist_ok=True)
return _paddle_structure(task, file_id, img) if mode == "structure" \
else _paddle_text(task, file_id, img)
def _paddle_text(task, file_id, img):
task.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 인식 중..."})
# PaddleOCR 3.x: cls 파라미터 제거, 결과 구조 변경
result = get_ocr().ocr(img)
task.update_state(state="PROGRESS", meta={"progress": 80, "message": "결과 정리 중..."})
lines = []
if result and len(result) > 0:
r = result[0]
# PaddleOCR 3.x 결과 구조: dict with rec_texts, rec_scores
if isinstance(r, dict):
texts = r.get("rec_texts", [])
scores = r.get("rec_scores", [])
for text, conf in zip(texts, scores):
if text.strip():
lines.append({"text": text,
"confidence": round(float(conf), 3),
"bbox": []})
# 구버전 호환 (list of [bbox, (text, conf)])
elif isinstance(r, list):
for item in r:
if item and len(item) == 2:
_, (text, conf) = item
if text.strip():
lines.append({"text": text,
"confidence": round(float(conf), 3),
"bbox": []})
full_text = "\n".join(l["text"] for l in lines)
txt_file = f"{file_id}_ocr.txt"
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
f.write(full_text)
return {"mode": "text", "backend": "paddle",
"full_text": full_text, "lines": lines,
"line_count": len(lines), "txt_file": txt_file,
"tables": [], "xlsx_file": None}
def _paddle_structure(task, file_id, img):
task.update_state(state="PROGRESS", meta={"progress": 20, "message": "레이아웃 분석 중..."})
result = get_structure()(img)
task.update_state(state="PROGRESS", meta={"progress": 60, "message": "표 구조 추출 중..."})
text_blocks, tables_html, tables_data = [], [], []
for region in result:
rtype = region.get("type", "").lower()
if rtype == "table":
html = region.get("res", {}).get("html", "")
if html:
tables_html.append(html)
tables_data.append(_html_table_to_list(html))
elif rtype in ("text", "title", "figure_caption"):
for line in (region.get("res", []) or []):
if isinstance(line, (list, tuple)) and len(line) == 2:
_, (text, _conf) = line
text_blocks.append(text)
full_text = "\n".join(text_blocks)
task.update_state(state="PROGRESS", meta={"progress": 80, "message": "Excel 생성 중..."})
xlsx_file = None
if tables_data:
xlsx_file = f"{file_id}_tables.xlsx"
_save_excel(tables_data, os.path.join(OUTPUT_DIR, xlsx_file))
txt_file = f"{file_id}_ocr.txt"
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
f.write("# 텍스트\n\n" + full_text)
lines = [{"text": t, "confidence": 1.0, "bbox": []} for t in text_blocks]
tables_meta = [{"html": h, "rows": len(d),
"cols": max(len(r) for r in d) if d else 0}
for h, d in zip(tables_html, tables_data)]
return {"mode": "structure", "backend": "paddle",
"full_text": full_text, "lines": lines,
"line_count": len(lines), "txt_file": txt_file,
"tables": tables_meta, "xlsx_file": xlsx_file}
# ════════════════════════════════════════════════════════════════
# 공통 유틸
# ════════════════════════════════════════════════════════════════
def _parse_md_tables(text):
tables, current = [], []
for line in text.splitlines():
s = line.strip()
if s.startswith("|") and s.endswith("|"):
if all(c in "| -:" for c in s): continue
current.append([c.strip() for c in s.strip("|").split("|")])
else:
if len(current) >= 2: tables.append(current)
current = []
if len(current) >= 2: tables.append(current)
return tables
def _md_table_to_html(table):
if not table: return ""
rows = ""
for i, row in enumerate(table):
tag = "th" if i == 0 else "td"
cells = "".join(f"<{tag}>{c}</{tag}>" for c in row)
rows += f"<tr>{cells}</tr>"
return f"<table>{rows}</table>"
def _html_table_to_list(html):
from html.parser import HTMLParser
class P(HTMLParser):
def __init__(self):
super().__init__()
self.rows, self._row, self._cell, self._in = [], [], [], False
def handle_starttag(self, tag, attrs):
if tag == "tr": self._row = []
elif tag in ("td","th"): self._cell = []; self._in = True
def handle_endtag(self, tag):
if tag in ("td","th"):
self._row.append("".join(self._cell).strip()); self._in = False
elif tag == "tr":
if self._row: self.rows.append(self._row)
def handle_data(self, data):
if self._in: self._cell.append(data)
p = P(); p.feed(html); return p.rows
def _save_excel(tables, path):
wb = openpyxl.Workbook()
wb.remove(wb.active)
for i, table in enumerate(tables, 1):
ws = wb.create_sheet(f"{i}")
thin = Side(style="thin", color="2A2A33")
bdr = Border(left=thin, right=thin, top=thin, bottom=thin)
for r_idx, row in enumerate(table, 1):
for c_idx, val in enumerate(row, 1):
cell = ws.cell(row=r_idx, column=c_idx, value=val)
cell.border = bdr
cell.alignment = Alignment(horizontal="center",
vertical="center", wrap_text=True)
if r_idx == 1:
cell.fill = PatternFill("solid", fgColor="1A1A2E")
cell.font = Font(color="00E5A0", bold=True, size=10)
else:
cell.font = Font(size=10)
for col in ws.columns:
w = max((len(str(c.value or "")) for c in col), default=8)
ws.column_dimensions[col[0].column_letter].width = min(w + 4, 40)
if not wb.sheetnames: wb.create_sheet("Sheet1")
wb.save(path)

21
app/requirements.txt Normal file
View File

@@ -0,0 +1,21 @@
fastapi==0.115.0
uvicorn[standard]==0.30.6
python-multipart==0.0.9
celery==5.4.0
redis==5.0.8
faster-whisper==1.0.3
aiofiles==23.2.1
# 인증 (bcrypt 제거 — 직접 비교 방식 사용)
python-jose[cryptography]==3.3.0
# PaddleOCR 3.x
paddleocr>=3.0.0
opencv-python-headless>=4.8.0
# Ollama API 호출
httpx>=0.27.0
# Excel 출력
openpyxl==3.1.2
Pillow>=10.0.0

771
app/static/index.html Normal file
View File

@@ -0,0 +1,771 @@
<!DOCTYPE html>
<html lang="ko">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>VoiceScript — STT & OCR</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;500;600&family=IBM+Plex+Sans+KR:wght@300;400;500;600&display=swap" rel="stylesheet">
<style>
:root{
--bg:#08080a;--surf:#0f0f14;--surf2:#141419;--border:#1c1c24;--border2:#272730;
--accent:#00e5a0;--accent2:#00b37a;--blue:#4da6ff;--purple:#a78bfa;--warn:#ff6b35;
--text:#e4e4f0;--muted:#52526a;
--mono:'IBM Plex Mono',monospace;--sans:'IBM Plex Sans KR',sans-serif;
}
*,*::before,*::after{box-sizing:border-box;margin:0;padding:0}
body{background:var(--bg);color:var(--text);font-family:var(--sans);min-height:100vh;display:flex;flex-direction:column}
/* ── LOGIN ── */
#login-overlay{position:fixed;inset:0;background:var(--bg);display:flex;align-items:center;justify-content:center;z-index:999}
.login-box{width:380px;padding:48px 40px;background:var(--surf);border:1px solid var(--border2);border-radius:6px}
.login-logo{display:flex;align-items:center;gap:12px;margin-bottom:36px}
.login-mark{width:28px;height:28px;background:var(--accent);clip-path:polygon(0 20%,100% 0,100% 80%,0 100%)}
.login-title{font-family:var(--mono);font-size:1rem;font-weight:600;letter-spacing:.08em}
.login-title span{color:var(--accent)}
.field{margin-bottom:16px}
.field label{display:block;font-family:var(--mono);font-size:.65rem;letter-spacing:.12em;color:var(--muted);text-transform:uppercase;margin-bottom:6px}
.field input{width:100%;padding:10px 12px;background:var(--bg);border:1px solid var(--border2);border-radius:3px;color:var(--text);font-family:var(--mono);font-size:.85rem;outline:none;transition:border-color .15s}
.field input:focus{border-color:var(--accent)}
#btn-login{width:100%;margin-top:8px;padding:12px;background:var(--accent);color:#000;border:none;border-radius:3px;font-family:var(--mono);font-size:.82rem;font-weight:600;letter-spacing:.1em;cursor:pointer;transition:all .15s;text-transform:uppercase}
#btn-login:hover{background:#00ffb3}
#login-err{display:none;margin-top:12px;padding:10px 12px;background:rgba(255,107,53,.08);border:1px solid rgba(255,107,53,.3);border-radius:3px;font-family:var(--mono);font-size:.72rem;color:var(--warn)}
/* ── HEADER ── */
header{border-bottom:1px solid var(--border);padding:14px 28px;display:flex;align-items:center;gap:12px;position:sticky;top:0;background:rgba(8,8,10,.92);backdrop-filter:blur(12px);z-index:100}
.logo-mark{width:28px;height:28px;background:var(--accent);clip-path:polygon(0 20%,100% 0,100% 80%,0 100%)}
header h1{font-family:var(--mono);font-size:1rem;font-weight:600;letter-spacing:.08em}
header h1 span{color:var(--accent)}
#user-info{margin-left:auto;display:flex;align-items:center;gap:12px;font-family:var(--mono);font-size:.68rem;color:var(--muted)}
#user-name{color:var(--accent)}
#btn-logout{background:none;border:1px solid var(--border2);color:var(--muted);padding:4px 10px;border-radius:2px;font-family:var(--mono);font-size:.65rem;cursor:pointer;letter-spacing:.08em;transition:all .15s;text-transform:uppercase}
#btn-logout:hover{border-color:var(--warn);color:var(--warn)}
/* ── NAV ── */
.nav-tabs{display:flex;border-bottom:1px solid var(--border);padding:0 28px;background:var(--surf)}
.nav-tab{font-family:var(--mono);font-size:.72rem;letter-spacing:.1em;text-transform:uppercase;padding:14px 20px;background:none;border:none;color:var(--muted);cursor:pointer;border-bottom:2px solid transparent;transition:all .15s;display:flex;align-items:center;gap:8px}
.nav-tab.active{color:var(--accent);border-bottom-color:var(--accent)}
.nav-tab:hover:not(.active){color:var(--text)}
/* ── PAGE / PANEL ── */
.page{display:none;flex:1}
.page.active{display:flex}
.two-panel{display:grid;grid-template-columns:1fr 1fr;width:100%;max-width:1360px;margin:0 auto}
.panel{padding:32px 36px;border-right:1px solid var(--border);min-height:calc(100vh - 120px)}
.panel:last-child{border-right:none}
.panel-title{font-family:var(--mono);font-size:.65rem;letter-spacing:.15em;color:var(--muted);text-transform:uppercase;margin-bottom:24px;display:flex;align-items:center;gap:10px}
.panel-title::after{content:'';flex:1;height:1px;background:var(--border)}
/* ── DROPZONE ── */
.dropzone{border:1px dashed var(--border2);border-radius:4px;padding:44px 24px;text-align:center;cursor:pointer;transition:all .2s;position:relative;background:var(--surf)}
.dropzone:hover,.dropzone.dragover{border-color:var(--accent);background:rgba(0,229,160,.04)}
.dropzone input[type=file]{position:absolute;inset:0;opacity:0;cursor:pointer}
.drop-icon{font-size:2.2rem;margin-bottom:12px;display:block;opacity:.35}
.drop-label{font-size:.9rem;color:var(--muted);line-height:1.7}
.drop-label strong{color:var(--text);font-weight:500}
.drop-formats{margin-top:10px;font-family:var(--mono);font-size:.62rem;color:var(--muted);letter-spacing:.05em}
/* ── FILE INFO ── */
.file-info{display:none;margin-top:14px;padding:12px 14px;background:var(--surf);border:1px solid var(--border2);border-radius:3px;font-family:var(--mono);font-size:.75rem}
.file-info .fname{color:var(--accent);margin-bottom:3px;word-break:break-all}
.file-info .fsize{color:var(--muted)}
/* ── SECTION LABEL ── */
.sec-label{font-family:var(--mono);font-size:.63rem;letter-spacing:.1em;color:var(--muted);text-transform:uppercase;margin-bottom:7px}
/* ── ENGINE SELECTOR ── */
.engine-selector{margin-top:18px}
.engine-btns{display:grid;grid-template-columns:1fr 1fr;gap:8px;margin-top:7px}
.engine-btn{
padding:12px 8px;background:var(--surf);border:1px solid var(--border2);
color:var(--muted);border-radius:4px;font-family:var(--mono);font-size:.72rem;
letter-spacing:.06em;cursor:pointer;transition:all .18s;text-align:center;
display:flex;flex-direction:column;align-items:center;gap:5px;
}
.engine-btn .e-icon{font-size:1.4rem;opacity:.5;transition:opacity .18s}
.engine-btn .e-name{font-weight:600;font-size:.72rem}
.engine-btn .e-desc{font-size:.6rem;color:var(--muted);letter-spacing:.04em;line-height:1.4}
.engine-btn.active[data-engine="paddle"]{background:rgba(0,229,160,.07);border-color:var(--accent2);color:var(--accent)}
.engine-btn.active[data-engine="paddle"] .e-icon{opacity:1}
.engine-btn.active[data-engine="paddle"] .e-desc{color:var(--accent2)}
.engine-btn.active[data-engine="ollama"]{background:rgba(167,139,250,.07);border-color:#7c6cd4;color:var(--purple)}
.engine-btn.active[data-engine="ollama"] .e-icon{opacity:1}
.engine-btn.active[data-engine="ollama"] .e-desc{color:#9b8de6}
/* ── OLLAMA OPTIONS (조건부 표시) ── */
#ollama-options{
margin-top:14px;padding:14px;background:var(--surf2);
border:1px solid #272040;border-radius:4px;
display:none;
}
#ollama-options.visible{display:block}
.ollama-model-grid{display:grid;grid-template-columns:1fr 1fr;gap:8px;margin-top:7px}
.model-card{
padding:10px 12px;background:var(--surf);border:1px solid var(--border2);
border-radius:3px;cursor:pointer;transition:all .15s;
}
.model-card:hover{border-color:#7c6cd4}
.model-card.active{background:rgba(167,139,250,.08);border-color:#7c6cd4}
.mc-name{font-family:var(--mono);font-size:.7rem;color:var(--text);margin-bottom:4px}
.mc-size{font-family:var(--mono);font-size:.6rem;color:var(--muted)}
.mc-tag{font-family:var(--mono);font-size:.55rem;padding:2px 5px;border-radius:2px;margin-top:4px;display:inline-block}
.mc-tag.ocr{background:rgba(0,229,160,.1);color:var(--accent)}
.mc-tag.doc{background:rgba(77,166,255,.1);color:var(--blue)}
.mc-tag.gen{background:rgba(167,139,250,.1);color:var(--purple)}
.mc-tag.best{background:rgba(255,107,53,.1);color:#ff9d6b}
/* ── 커스텀 프롬프트 ── */
#custom-prompt-wrap{margin-top:12px;display:none}
#custom-prompt-wrap.visible{display:block}
textarea.cprompt{
width:100%;background:var(--surf);border:1px solid var(--border2);color:var(--text);
padding:10px 12px;border-radius:3px;font-family:var(--mono);font-size:.72rem;
line-height:1.6;resize:vertical;min-height:72px;outline:none;
}
textarea.cprompt:focus{border-color:#7c6cd4}
.cprompt-toggle{
display:inline-flex;align-items:center;gap:6px;
font-family:var(--mono);font-size:.65rem;color:var(--muted);cursor:pointer;
margin-top:8px;
}
.cprompt-toggle:hover{color:var(--text)}
/* ── OPTIONS ── */
.options{margin-top:14px;display:grid;grid-template-columns:1fr 1fr;gap:10px}
.opt-item label{display:block;font-family:var(--mono);font-size:.63rem;letter-spacing:.1em;color:var(--muted);text-transform:uppercase;margin-bottom:5px}
.opt-item select{width:100%;background:var(--surf);border:1px solid var(--border2);color:var(--text);padding:8px 10px;border-radius:3px;font-family:var(--mono);font-size:.75rem;outline:none;cursor:pointer;appearance:none;-webkit-appearance:none;background-image:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='10' height='6'%3E%3Cpath d='M0 0l5 6 5-6z' fill='%2352526a'/%3E%3C/svg%3E");background-repeat:no-repeat;background-position:right 10px center}
.opt-item select:focus{border-color:var(--accent)}
/* ── MODE TOGGLE ── */
.mode-toggle{margin-top:14px}
.mode-btns{display:grid;grid-template-columns:1fr 1fr;gap:8px;margin-top:7px}
.mode-btn{padding:9px;background:var(--surf);border:1px solid var(--border2);color:var(--muted);border-radius:3px;font-family:var(--mono);font-size:.7rem;letter-spacing:.07em;cursor:pointer;transition:all .15s;text-align:center;text-transform:uppercase}
.mode-btn.active{background:rgba(0,229,160,.07);border-color:var(--accent2);color:var(--accent)}
#mode-desc{margin-top:6px;font-family:var(--mono);font-size:.62rem;color:var(--muted);line-height:1.6}
/* ── BUTTON ── */
.btn-start{margin-top:16px;width:100%;padding:13px;border:none;border-radius:3px;font-family:var(--mono);font-size:.82rem;font-weight:600;letter-spacing:.1em;cursor:pointer;transition:all .15s;text-transform:uppercase}
.btn-start.green{background:var(--accent);color:#000}
.btn-start.green:hover:not(:disabled){background:#00ffb3;transform:translateY(-1px)}
.btn-start.purple{background:var(--purple);color:#fff}
.btn-start.purple:hover:not(:disabled){background:#c4b5fd;transform:translateY(-1px)}
.btn-start:disabled{background:var(--border2);color:var(--muted);cursor:not-allowed;transform:none}
/* ── PROGRESS ── */
.prog-box{display:none;margin-top:16px}
.prog-header{display:flex;justify-content:space-between;margin-bottom:6px}
.prog-msg{font-family:var(--mono);font-size:.72rem;color:var(--muted)}
.prog-pct{font-family:var(--mono);font-size:.72rem}
.prog-track{height:2px;background:var(--border);border-radius:1px;overflow:hidden}
.prog-fill{height:100%;transition:width .4s ease;width:0%;border-radius:1px}
.waveform{display:flex;align-items:center;justify-content:center;gap:3px;margin-top:16px;height:28px}
.wave-bar{width:3px;border-radius:2px;opacity:.6;animation:wave 1s ease-in-out infinite}
.wave-bar:nth-child(1){animation-delay:0s;height:8px}
.wave-bar:nth-child(2){animation-delay:.1s;height:14px}
.wave-bar:nth-child(3){animation-delay:.2s;height:22px}
.wave-bar:nth-child(4){animation-delay:.3s;height:18px}
.wave-bar:nth-child(5){animation-delay:.4s;height:26px}
.wave-bar:nth-child(6){animation-delay:.3s;height:18px}
.wave-bar:nth-child(7){animation-delay:.2s;height:22px}
.wave-bar:nth-child(8){animation-delay:.1s;height:14px}
.wave-bar:nth-child(9){animation-delay:0s;height:8px}
@keyframes wave{0%,100%{transform:scaleY(.4);opacity:.3}50%{transform:scaleY(1.2);opacity:.9}}
/* ── ERROR ── */
.err-box{display:none;margin-top:12px;padding:10px 12px;background:rgba(255,107,53,.08);border:1px solid rgba(255,107,53,.3);border-radius:3px;font-family:var(--mono);font-size:.72rem;color:var(--warn);white-space:pre-wrap;line-height:1.6}
/* ── RESULT ── */
.result-meta{display:none;flex-wrap:wrap;gap:8px;margin-bottom:14px}
.meta-chip{font-family:var(--mono);font-size:.63rem;padding:4px 9px;border:1px solid var(--border2);border-radius:2px;color:var(--muted);letter-spacing:.04em}
.meta-chip span{color:var(--accent)}
.meta-chip.ollama span{color:var(--purple)}
.result-tabs{display:none;border-bottom:1px solid var(--border);margin-bottom:14px}
.tab-btn{font-family:var(--mono);font-size:.67rem;letter-spacing:.1em;padding:9px 14px;background:none;border:none;color:var(--muted);cursor:pointer;border-bottom:2px solid transparent;transition:all .15s;text-transform:uppercase}
.tab-btn.active{color:var(--accent);border-bottom-color:var(--accent)}
.tab-btn:hover:not(.active){color:var(--text)}
.tab-content{display:none;flex-direction:column;flex:1}
.tab-content.active{display:flex}
.result-textarea{flex:1;min-height:360px;background:var(--surf);border:1px solid var(--border);color:var(--text);padding:16px;border-radius:3px;font-family:var(--mono);font-size:.78rem;line-height:1.8;resize:vertical;outline:none;white-space:pre-wrap}
.segments-list,.lines-list,.segments-list-ocr{flex:1;min-height:360px;overflow-y:auto;background:var(--surf);border:1px solid var(--border);border-radius:3px}
.seg-item{display:grid;grid-template-columns:110px 1fr;border-bottom:1px solid var(--border)}
.seg-item:last-child{border-bottom:none}
.seg-item:hover{background:rgba(255,255,255,.015)}
.seg-time{padding:11px 12px;font-family:var(--mono);font-size:.65rem;color:var(--muted);border-right:1px solid var(--border);white-space:nowrap;line-height:1.6}
.seg-text{padding:11px 14px;font-size:.8rem;line-height:1.6}
.line-item{display:grid;grid-template-columns:60px 1fr;border-bottom:1px solid var(--border)}
.line-item:last-child{border-bottom:none}
.line-conf{padding:9px 10px;font-family:var(--mono);font-size:.62rem;border-right:1px solid var(--border);text-align:center;display:flex;align-items:center;justify-content:center}
.line-conf.high{color:var(--accent)}.line-conf.mid{color:#f0b42a}.line-conf.low{color:var(--warn)}
.line-text{padding:9px 12px;font-size:.8rem;line-height:1.5}
.table-wrapper{overflow-x:auto;margin-bottom:14px;border:1px solid var(--border);border-radius:3px}
.ocr-table{width:100%;border-collapse:collapse;font-size:.78rem;font-family:var(--mono)}
.ocr-table th{background:#1a1a2e;color:var(--accent);padding:8px 12px;text-align:left;border:1px solid var(--border2);font-weight:500}
.ocr-table td{padding:8px 12px;border:1px solid var(--border);line-height:1.5}
.ocr-table tr:nth-child(even) td{background:rgba(255,255,255,.015)}
.table-title{font-family:var(--mono);font-size:.68rem;color:var(--muted);letter-spacing:.08em;padding:10px 12px;background:var(--surf2);border-bottom:1px solid var(--border);text-transform:uppercase}
.result-actions{display:none;gap:8px;margin-top:12px}
.btn-act{flex:1;padding:9px;background:none;border:1px solid var(--border2);color:var(--text);border-radius:3px;font-family:var(--mono);font-size:.68rem;letter-spacing:.08em;cursor:pointer;transition:all .15s;text-transform:uppercase}
.btn-act:hover{border-color:var(--accent);color:var(--accent)}
.btn-act.primary{background:rgba(0,229,160,.07);border-color:var(--accent2);color:var(--accent)}
.btn-act.excel{background:rgba(77,166,255,.07);border-color:#3a7cc4;color:var(--blue)}
.btn-act.excel:hover{background:rgba(77,166,255,.14)}
.empty-state{flex:1;display:flex;flex-direction:column;align-items:center;justify-content:center;gap:10px;color:var(--muted);padding:60px 0}
.empty-icon{font-size:2.2rem;opacity:.18}
.empty-text{font-family:var(--mono);font-size:.68rem;letter-spacing:.1em;text-align:center;line-height:1.9;text-transform:uppercase}
@media(max-width:900px){.two-panel{grid-template-columns:1fr}.panel{border-right:none;border-bottom:1px solid var(--border);min-height:auto;padding:24px 18px}.panel:last-child{border-bottom:none}}
</style>
</head>
<body>
<!-- ════════ LOGIN ════════ -->
<div id="login-overlay">
<div class="login-box">
<div class="login-logo">
<div class="login-mark"></div>
<div class="login-title">Voice<span>Script</span></div>
</div>
<div class="field"><label>아이디</label><input type="text" id="inp-user" placeholder="username" autocomplete="username"></div>
<div class="field"><label>비밀번호</label><input type="password" id="inp-pass" placeholder="password" autocomplete="current-password"></div>
<button id="btn-login">로그인</button>
<div id="login-err"></div>
</div>
</div>
<!-- ════════ HEADER ════════ -->
<header>
<div class="logo-mark"></div>
<h1>Voice<span>Script</span></h1>
<div id="user-info"><span id="user-name"></span><button id="btn-logout">로그아웃</button></div>
</header>
<!-- ════════ NAV ════════ -->
<div class="nav-tabs">
<button class="nav-tab active" data-page="stt"><span>🎙</span> STT 음성변환</button>
<button class="nav-tab" data-page="ocr"><span>🔍</span> OCR 이미지인식</button>
</div>
<!-- ════════════════════════════════════════════════
STT PAGE
════════════════════════════════════════════════ -->
<div class="page active" id="page-stt">
<div class="two-panel">
<section class="panel">
<div class="panel-title">파일 업로드</div>
<div class="dropzone" id="stt-drop">
<input type="file" id="stt-input" accept=".mp3,.mp4,.wav,.m4a,.ogg,.flac,.aac,.wma,.webm,.mkv,.avi,.mov">
<span class="drop-icon">🎵</span>
<div class="drop-label"><strong>드래그하거나 클릭하여 선택</strong><br>음성 또는 영상 파일</div>
<div class="drop-formats">mp3 · wav · m4a · ogg · flac · aac · mp4 · webm · mkv</div>
</div>
<div class="file-info" id="stt-info"><div class="fname" id="stt-fname"></div><div class="fsize" id="stt-fsize"></div></div>
<div class="options">
<div class="opt-item"><label>언어</label>
<select id="stt-lang"><option value="ko">한국어</option><option value="en">English</option><option value="ja">日本語</option><option value="zh">中文</option><option value="">자동 감지</option></select>
</div>
<div class="opt-item"><label>출력</label>
<select id="stt-fmt"><option value="full">전체 텍스트</option><option value="timestamp">타임스탬프</option></select>
</div>
</div>
<button class="btn-start green" id="stt-btn" disabled>변환 시작</button>
<div class="prog-box" id="stt-prog">
<div class="prog-header"><span class="prog-msg" id="stt-pmsg">처리 중...</span><span class="prog-pct" id="stt-ppct" style="color:var(--accent)">0%</span></div>
<div class="prog-track"><div class="prog-fill" id="stt-pfill" style="background:var(--accent)"></div></div>
<div class="waveform"><div class="wave-bar" style="background:var(--accent)"></div><div class="wave-bar" style="background:var(--accent)"></div><div class="wave-bar" style="background:var(--accent)"></div><div class="wave-bar" style="background:var(--accent)"></div><div class="wave-bar" style="background:var(--accent)"></div><div class="wave-bar" style="background:var(--accent)"></div><div class="wave-bar" style="background:var(--accent)"></div><div class="wave-bar" style="background:var(--accent)"></div><div class="wave-bar" style="background:var(--accent)"></div></div>
</div>
<div class="err-box" id="stt-err"></div>
</section>
<section class="panel">
<div class="panel-title">변환 결과</div>
<div class="result-meta" id="stt-meta">
<div class="meta-chip">언어 <span id="stt-mlang"></span></div>
<div class="meta-chip">길이 <span id="stt-mdur"></span></div>
<div class="meta-chip">세그먼트 <span id="stt-msegs"></span></div>
</div>
<div class="result-tabs" id="stt-tabs">
<button class="tab-btn active" data-tab="stt-text">전체 텍스트</button>
<button class="tab-btn" data-tab="stt-segs">타임스탬프</button>
</div>
<div class="tab-content active" id="stt-text">
<div class="empty-state" id="stt-empty"><div class="empty-icon">📝</div><div class="empty-text">파일 업로드 후<br>변환을 시작하면<br>결과가 표시됩니다</div></div>
<textarea class="result-textarea" id="stt-result" style="display:none" readonly></textarea>
</div>
<div class="tab-content" id="stt-segs"><div class="segments-list" id="stt-seglist"></div></div>
<div class="result-actions" id="stt-actions">
<button class="btn-act" id="stt-copy">복사</button>
<button class="btn-act primary" id="stt-dl">TXT 다운로드</button>
<button class="btn-act" id="stt-new">새 파일</button>
</div>
</section>
</div>
</div>
<!-- ════════════════════════════════════════════════
OCR PAGE
════════════════════════════════════════════════ -->
<div class="page" id="page-ocr">
<div class="two-panel">
<!-- 업로드 & 설정 -->
<section class="panel">
<div class="panel-title">이미지 업로드</div>
<div class="dropzone" id="ocr-drop">
<input type="file" id="ocr-input" accept=".jpg,.jpeg,.png,.bmp,.tiff,.tif,.webp,.gif">
<span class="drop-icon">🖼</span>
<div class="drop-label"><strong>드래그하거나 클릭하여 선택</strong><br>이미지 파일</div>
<div class="drop-formats">jpg · png · bmp · tiff · webp · gif</div>
</div>
<div class="file-info" id="ocr-info"><div class="fname" id="ocr-fname"></div><div class="fsize" id="ocr-fsize"></div></div>
<div id="ocr-preview-wrap" style="display:none;margin-top:12px">
<img id="ocr-preview" style="max-width:100%;max-height:180px;border:1px solid var(--border);border-radius:3px;object-fit:contain">
</div>
<!-- ─── OCR 엔진 선택 ─── -->
<div class="engine-selector">
<div class="sec-label">OCR 엔진</div>
<div class="engine-btns">
<button class="engine-btn active" data-engine="paddle">
<span class="e-icon">🐾</span>
<span class="e-name">PaddleOCR</span>
<span class="e-desc">로컬 실행 · 표 구조 분석<br>PP-Structure 지원</span>
</button>
<button class="engine-btn" data-engine="ollama">
<span class="e-icon">🦙</span>
<span class="e-name">Ollama Vision</span>
<span class="e-desc">기존 Ollama 서버 사용<br>자연어 지시 가능</span>
</button>
</div>
</div>
<!-- ─── Ollama 전용 옵션 ─── -->
<div id="ollama-options">
<div class="sec-label" style="margin-bottom:7px">모델 선택</div>
<div class="ollama-model-grid">
<div class="model-card active" data-model="granite3.2-vision">
<div class="mc-name">granite3.2-vision</div>
<div class="mc-size">IBM · ~2GB</div>
<span class="mc-tag doc">문서/표 특화</span>
</div>
<div class="model-card" data-model="deepseek-ocr:3b">
<div class="mc-name">deepseek-ocr:3b</div>
<div class="mc-size">DeepSeek · ~2GB</div>
<span class="mc-tag ocr">OCR 전용</span>
</div>
<div class="model-card" data-model="llama3.2-vision:11b">
<div class="mc-name">llama3.2-vision:11b</div>
<div class="mc-size">Meta · ~8GB</div>
<span class="mc-tag gen">범용 고정확도</span>
</div>
<div class="model-card" data-model="richardyoung/olmocr2:7b-q8">
<div class="mc-name">olmocr2:7b-q8</div>
<div class="mc-size">AllenAI · ~9GB</div>
<span class="mc-tag best">최고 정확도</span>
</div>
</div>
<!-- 커스텀 프롬프트 -->
<div class="cprompt-toggle" id="cprompt-toggle">
<span id="cprompt-arrow"></span> 커스텀 프롬프트 직접 입력
</div>
<div id="custom-prompt-wrap">
<textarea class="cprompt" id="custom-prompt" placeholder="예: 이 영수증에서 품목명과 금액만 표 형식으로 추출해줘"></textarea>
<div style="font-family:var(--mono);font-size:.6rem;color:var(--muted);margin-top:4px">비워두면 인식 모드에 맞는 기본 프롬프트가 사용됩니다</div>
</div>
</div>
<!-- ─── 인식 모드 ─── -->
<div class="mode-toggle">
<div class="sec-label">인식 모드</div>
<div class="mode-btns">
<button class="mode-btn active" data-mode="text">📄 텍스트 추출</button>
<button class="mode-btn" data-mode="structure">📊 표 구조 분석</button>
</div>
<div id="mode-desc" style="margin-top:6px;font-family:var(--mono);font-size:.62rem;color:var(--muted);line-height:1.6">
일반 텍스트와 글자를 인식합니다
</div>
</div>
<!-- ─── PaddleOCR 언어 (Paddle 전용 표시) ─── -->
<div id="paddle-lang-wrap" class="options" style="grid-template-columns:1fr">
<div class="opt-item"><label>OCR 언어</label>
<select id="ocr-lang"><option value="korean">한국어</option><option value="en">English</option><option value="japan">日本語</option><option value="chinese_cht">中文 (繁)</option><option value="ch">中文 (簡)</option></select>
</div>
</div>
<button class="btn-start green" id="ocr-btn" disabled>인식 시작</button>
<div class="prog-box" id="ocr-prog">
<div class="prog-header"><span class="prog-msg" id="ocr-pmsg">처리 중...</span><span class="prog-pct" id="ocr-ppct" style="color:var(--accent)">0%</span></div>
<div class="prog-track"><div class="prog-fill" id="ocr-pfill" style="background:var(--accent)"></div></div>
<div class="waveform" id="ocr-wave" style="display:none">
<div class="wave-bar" style="background:var(--accent)"></div><div class="wave-bar" style="background:var(--accent)"></div><div class="wave-bar" style="background:var(--accent)"></div><div class="wave-bar" style="background:var(--accent)"></div><div class="wave-bar" style="background:var(--accent)"></div><div class="wave-bar" style="background:var(--accent)"></div><div class="wave-bar" style="background:var(--accent)"></div><div class="wave-bar" style="background:var(--accent)"></div><div class="wave-bar" style="background:var(--accent)"></div>
</div>
</div>
<div class="err-box" id="ocr-err"></div>
</section>
<!-- 결과 -->
<section class="panel">
<div class="panel-title">인식 결과</div>
<div class="result-meta" id="ocr-meta">
<div class="meta-chip">줄 수 <span id="ocr-mlines"></span></div>
<div class="meta-chip">모드 <span id="ocr-mmode"></span></div>
<div class="meta-chip" id="ocr-mbackend-chip">엔진 <span id="ocr-mbackend"></span></div>
<div class="meta-chip"><span id="ocr-mtables"></span></div>
</div>
<div class="result-tabs" id="ocr-tabs">
<button class="tab-btn active" data-tab="ocr-text">전체 텍스트</button>
<button class="tab-btn" data-tab="ocr-lines">줄별 신뢰도</button>
<button class="tab-btn" data-tab="ocr-tables">표 뷰어</button>
</div>
<div class="tab-content active" id="ocr-text">
<div class="empty-state" id="ocr-empty"><div class="empty-icon">🔍</div><div class="empty-text">이미지 업로드 후<br>인식을 시작하면<br>결과가 표시됩니다</div></div>
<textarea class="result-textarea" id="ocr-result" style="display:none" readonly></textarea>
</div>
<div class="tab-content" id="ocr-lines"><div class="lines-list" id="ocr-linelist"></div></div>
<div class="tab-content" id="ocr-tables">
<div id="ocr-tablelist" style="overflow-y:auto;max-height:480px"></div>
<div class="empty-state" id="ocr-tableempty"><div class="empty-icon">📊</div><div class="empty-text">표 구조 분석 모드를<br>선택하면 표를<br>추출할 수 있습니다</div></div>
</div>
<div class="result-actions" id="ocr-actions">
<button class="btn-act" id="ocr-copy">복사</button>
<button class="btn-act primary" id="ocr-dl-txt">TXT 저장</button>
<button class="btn-act excel" id="ocr-dl-xlsx" style="display:none">Excel 저장</button>
<button class="btn-act" id="ocr-new">새 파일</button>
</div>
</section>
</div>
</div>
<script>
// ════════════════════════════════════════════════════════════
// STATE
// ════════════════════════════════════════════════════════════
let token = localStorage.getItem('vs_token') || null;
let sttFile=null,sttOutputFile=null,sttTaskId=null;
let ocrFile=null,ocrOutputTxt=null,ocrOutputXlsx=null,ocrTaskId=null;
let ocrEngine='paddle', ocrMode='text', ocrModel='granite3.2-vision';
// ════════════════════════════════════════════════════════════
// AUTH
// ════════════════════════════════════════════════════════════
async function checkAuth(){
if(!token){showLogin();return}
try{const r=await api('GET','/api/me');if(r.ok){const d=await r.json();document.getElementById('user-name').textContent=d.username;hideLogin()}else showLogin()}
catch{showLogin()}
}
const showLogin=()=>document.getElementById('login-overlay').style.display='flex';
const hideLogin=()=>document.getElementById('login-overlay').style.display='none';
document.getElementById('btn-login').addEventListener('click',doLogin);
document.getElementById('inp-pass').addEventListener('keydown',e=>{if(e.key==='Enter')doLogin()});
async function doLogin(){
const u=document.getElementById('inp-user').value.trim(),p=document.getElementById('inp-pass').value;
const err=document.getElementById('login-err');err.style.display='none';
if(!u||!p){err.style.display='block';err.textContent='아이디와 비밀번호를 입력하세요';return}
const fd=new FormData();fd.append('username',u);fd.append('password',p);
try{
const r=await fetch('/api/login',{method:'POST',body:fd});const d=await r.json();
if(!r.ok){err.style.display='block';err.textContent=d.detail||'로그인 실패';return}
token=d.access_token;localStorage.setItem('vs_token',token);
document.getElementById('user-name').textContent=u;hideLogin();
}catch{err.style.display='block';err.textContent='서버 연결 실패'}
}
document.getElementById('btn-logout').addEventListener('click',()=>{
token=null;localStorage.removeItem('vs_token');showLogin();document.getElementById('inp-pass').value='';
});
const api=(method,url,body)=>{const o={method,headers:{Authorization:'Bearer '+(token||'')}};if(body)o.body=body;return fetch(url,o)};
// ════════════════════════════════════════════════════════════
// NAV TABS
// ════════════════════════════════════════════════════════════
document.querySelectorAll('.nav-tab').forEach(btn=>{
btn.addEventListener('click',()=>{
document.querySelectorAll('.nav-tab').forEach(b=>b.classList.remove('active'));
document.querySelectorAll('.page').forEach(p=>p.classList.remove('active'));
btn.classList.add('active');document.getElementById('page-'+btn.dataset.page).classList.add('active');
});
});
// ════════════════════════════════════════════════════════════
// STT
// ════════════════════════════════════════════════════════════
const sttDrop=document.getElementById('stt-drop'),sttInput=document.getElementById('stt-input');
sttInput.addEventListener('change',()=>setSttFile(sttInput.files[0]));
sttDrop.addEventListener('dragover',e=>{e.preventDefault();sttDrop.classList.add('dragover')});
sttDrop.addEventListener('dragleave',()=>sttDrop.classList.remove('dragover'));
sttDrop.addEventListener('drop',e=>{e.preventDefault();sttDrop.classList.remove('dragover');setSttFile(e.dataTransfer.files[0])});
function setSttFile(f){if(!f)return;sttFile=f;showFileInfo('stt',f);document.getElementById('stt-btn').disabled=false;document.getElementById('stt-err').style.display='none'}
document.getElementById('stt-btn').addEventListener('click',async()=>{
if(!sttFile)return;document.getElementById('stt-err').style.display='none';setSttLoading(true);
const fd=new FormData();fd.append('file',sttFile);
try{const r=await api('POST','/api/transcribe',fd);const d=await r.json();if(!r.ok)throw new Error(d.detail||'업로드 실패');sttTaskId=d.task_id;pollTask(sttTaskId,d=>setProg('stt',d.progress||0,d.message||'처리 중...',false),showSttResult,e=>{document.getElementById('stt-err').style.display='block';document.getElementById('stt-err').textContent='⚠ '+e;setSttLoading(false)})}
catch(e){document.getElementById('stt-err').style.display='block';document.getElementById('stt-err').textContent='⚠ '+e.message;setSttLoading(false)}
});
function setSttLoading(on){document.getElementById('stt-btn').disabled=on;document.getElementById('stt-prog').style.display=on?'block':'none';if(on)setProg('stt',0,'준비 중...',false)}
function showSttResult(d){
sttOutputFile=d.output_file;
document.getElementById('stt-mlang').textContent=(d.language||'').toUpperCase();
document.getElementById('stt-mdur').textContent=fmtDur(d.duration);
document.getElementById('stt-msegs').textContent=(d.segments||[]).length+'개';
document.getElementById('stt-meta').style.display='flex';
document.getElementById('stt-tabs').style.display='flex';
document.getElementById('stt-empty').style.display='none';
document.getElementById('stt-result').style.display='block';
document.getElementById('stt-result').value=d.text||'';
const sl=document.getElementById('stt-seglist');sl.innerHTML='';
(d.segments||[]).forEach(s=>{const r=document.createElement('div');r.className='seg-item';r.innerHTML=`<div class="seg-time">${fmtTime(s.start)}<br>→ ${fmtTime(s.end)}</div><div class="seg-text">${esc(s.text)}</div>`;sl.appendChild(r)});
document.getElementById('stt-actions').style.display='flex';setSttLoading(false);
}
document.getElementById('stt-copy').addEventListener('click',()=>copyText(document.getElementById('stt-result').value,document.getElementById('stt-copy')));
document.getElementById('stt-dl').addEventListener('click',()=>dlFile(sttOutputFile));
document.getElementById('stt-new').addEventListener('click',resetStt);
function resetStt(){sttFile=null;sttInput.value='';sttOutputFile=null;document.getElementById('stt-info').style.display='none';document.getElementById('stt-btn').disabled=true;document.getElementById('stt-prog').style.display='none';document.getElementById('stt-err').style.display='none';document.getElementById('stt-meta').style.display='none';document.getElementById('stt-tabs').style.display='none';document.getElementById('stt-empty').style.display='flex';document.getElementById('stt-result').style.display='none';document.getElementById('stt-result').value='';document.getElementById('stt-seglist').innerHTML='';document.getElementById('stt-actions').style.display='none';resetTabs('stt-tabs')}
// ════════════════════════════════════════════════════════════
// OCR — ENGINE SELECTOR
// ════════════════════════════════════════════════════════════
document.querySelectorAll('.engine-btn').forEach(btn=>{
btn.addEventListener('click',()=>{
document.querySelectorAll('.engine-btn').forEach(b=>b.classList.remove('active'));
btn.classList.add('active');
ocrEngine=btn.dataset.engine;
const ollamaOpts=document.getElementById('ollama-options');
const paddleLang=document.getElementById('paddle-lang-wrap');
const ocrBtn=document.getElementById('ocr-btn');
if(ocrEngine==='ollama'){
ollamaOpts.classList.add('visible');
paddleLang.style.display='none';
ocrBtn.className='btn-start purple';
ocrBtn.style.background='';
} else {
ollamaOpts.classList.remove('visible');
paddleLang.style.display='grid';
ocrBtn.className='btn-start green';
}
if(ocrFile) ocrBtn.disabled=false;
});
});
// ─── Ollama 모델 카드 ───────────────────────────────────────
document.querySelectorAll('.model-card').forEach(card=>{
card.addEventListener('click',()=>{
document.querySelectorAll('.model-card').forEach(c=>c.classList.remove('active'));
card.classList.add('active');
ocrModel=card.dataset.model;
});
});
// ─── 커스텀 프롬프트 토글 ───────────────────────────────────
document.getElementById('cprompt-toggle').addEventListener('click',()=>{
const wrap=document.getElementById('custom-prompt-wrap');
const arrow=document.getElementById('cprompt-arrow');
const open=wrap.classList.toggle('visible');
arrow.textContent=open?'▼':'▶';
});
// ─── 인식 모드 ─────────────────────────────────────────────
const modeDescs={
text:'일반 텍스트와 글자를 인식합니다',
structure:'표 구조를 감지하고 Excel로 저장합니다'
};
document.querySelectorAll('.mode-btn').forEach(btn=>{
btn.addEventListener('click',()=>{
document.querySelectorAll('.mode-btn').forEach(b=>b.classList.remove('active'));
btn.classList.add('active');ocrMode=btn.dataset.mode;
document.getElementById('mode-desc').textContent=modeDescs[ocrMode]||'';
});
});
// ════════════════════════════════════════════════════════════
// OCR — FILE & RUN
// ════════════════════════════════════════════════════════════
const ocrDrop=document.getElementById('ocr-drop'),ocrInput=document.getElementById('ocr-input');
ocrInput.addEventListener('change',()=>setOcrFile(ocrInput.files[0]));
ocrDrop.addEventListener('dragover',e=>{e.preventDefault();ocrDrop.classList.add('dragover')});
ocrDrop.addEventListener('dragleave',()=>ocrDrop.classList.remove('dragover'));
ocrDrop.addEventListener('drop',e=>{e.preventDefault();ocrDrop.classList.remove('dragover');setOcrFile(e.dataTransfer.files[0])});
function setOcrFile(f){
if(!f)return;ocrFile=f;showFileInfo('ocr',f);
document.getElementById('ocr-btn').disabled=false;
document.getElementById('ocr-err').style.display='none';
const p=document.getElementById('ocr-preview'),w=document.getElementById('ocr-preview-wrap');
p.src=URL.createObjectURL(f);w.style.display='block';
}
document.getElementById('ocr-btn').addEventListener('click',async()=>{
if(!ocrFile)return;
document.getElementById('ocr-err').style.display='none';
const isOllama=ocrEngine==='ollama';
setOcrLoading(true,isOllama);
const fd=new FormData();
fd.append('file',ocrFile);
fd.append('mode',ocrMode);
fd.append('backend',ocrEngine);
fd.append('ollama_model',ocrModel);
fd.append('custom_prompt',document.getElementById('custom-prompt').value||'');
try{
const r=await api('POST','/api/ocr',fd);const d=await r.json();
if(!r.ok)throw new Error(d.detail||'업로드 실패');
ocrTaskId=d.task_id;
pollTask(ocrTaskId,
d=>setProg('ocr',d.progress||0,d.message||'처리 중...',isOllama),
showOcrResult,
e=>{document.getElementById('ocr-err').style.display='block';document.getElementById('ocr-err').textContent='⚠ '+e;setOcrLoading(false,isOllama)}
);
}catch(e){document.getElementById('ocr-err').style.display='block';document.getElementById('ocr-err').textContent='⚠ '+e.message;setOcrLoading(false,isOllama)}
});
function setOcrLoading(on,isOllama=false){
document.getElementById('ocr-btn').disabled=on;
document.getElementById('ocr-prog').style.display=on?'block':'none';
document.getElementById('ocr-wave').style.display=on?'flex':'none';
const color=isOllama?'var(--purple)':'var(--accent)';
document.getElementById('ocr-pfill').style.background=color;
document.getElementById('ocr-ppct').style.color=color;
document.querySelectorAll('#ocr-wave .wave-bar').forEach(b=>b.style.background=color);
if(on)setProg('ocr',0,'준비 중...',isOllama);
}
function showOcrResult(d){
ocrOutputTxt=d.txt_file||null;ocrOutputXlsx=d.xlsx_file||null;
const isOllama=d.backend==='ollama';
const color=isOllama?'var(--purple)':'var(--accent)';
document.getElementById('ocr-mlines').textContent=(d.line_count||0)+'줄';
document.getElementById('ocr-mmode').textContent=d.mode==='structure'?'구조분석':'텍스트';
document.getElementById('ocr-mbackend').textContent=isOllama?`Ollama·${d.ollama_model||''}`:'PaddleOCR';
document.getElementById('ocr-mbackend').style.color=color;
document.getElementById('ocr-mtables').textContent=(d.tables||[]).length+'개';
document.getElementById('ocr-meta').style.display='flex';
document.getElementById('ocr-tabs').style.display='flex';
document.getElementById('ocr-empty').style.display='none';
document.getElementById('ocr-result').style.display='block';
document.getElementById('ocr-result').value=d.full_text||'';
// 탭 버튼 색상 동기화
document.querySelectorAll('#ocr-tabs .tab-btn').forEach(b=>{
b.style.setProperty('--tw-color',color);
b.addEventListener('click',()=>{if(b.classList.contains('active'))b.style.color=color;});
});
// 줄별 신뢰도
const ll=document.getElementById('ocr-linelist');ll.innerHTML='';
(d.lines||[]).forEach(line=>{
const conf=line.confidence||0,cls=conf>=0.9?'high':conf>=0.7?'mid':'low';
const row=document.createElement('div');row.className='line-item';
const confLabel=isOllama?'AI':''+Math.round(conf*100)+'%';
row.innerHTML=`<div class="line-conf ${cls}">${confLabel}</div><div class="line-text">${esc(line.text)}</div>`;
ll.appendChild(row);
});
// 표 뷰어
const tl=document.getElementById('ocr-tablelist'),te=document.getElementById('ocr-tableempty');
tl.innerHTML='';
const tables=d.tables||[];
if(tables.length===0){te.style.display='flex'}
else{
te.style.display='none';
tables.forEach((t,i)=>{
const w=document.createElement('div');
w.innerHTML=`<div class="table-title">표 ${i+1}${t.rows||0}× ${t.cols||0}열</div><div class="table-wrapper">${toStyledTable(t.html||'')}</div>`;
tl.appendChild(w);
});
}
document.getElementById('ocr-actions').style.display='flex';
document.getElementById('ocr-dl-xlsx').style.display=ocrOutputXlsx?'inline-flex':'none';
setOcrLoading(false,isOllama);
document.getElementById('ocr-prog').style.display='none';
document.getElementById('ocr-wave').style.display='none';
}
function toStyledTable(html){return html.replace(/<table/g,'<table class="ocr-table"')}
document.getElementById('ocr-copy').addEventListener('click',()=>copyText(document.getElementById('ocr-result').value,document.getElementById('ocr-copy')));
document.getElementById('ocr-dl-txt').addEventListener('click',()=>dlFile(ocrOutputTxt));
document.getElementById('ocr-dl-xlsx').addEventListener('click',()=>dlFile(ocrOutputXlsx));
document.getElementById('ocr-new').addEventListener('click',resetOcr);
function resetOcr(){
ocrFile=null;ocrInput.value='';ocrOutputTxt=null;ocrOutputXlsx=null;
document.getElementById('ocr-info').style.display='none';
document.getElementById('ocr-preview-wrap').style.display='none';
document.getElementById('ocr-btn').disabled=true;
document.getElementById('ocr-prog').style.display='none';
document.getElementById('ocr-wave').style.display='none';
document.getElementById('ocr-err').style.display='none';
document.getElementById('ocr-meta').style.display='none';
document.getElementById('ocr-tabs').style.display='none';
document.getElementById('ocr-empty').style.display='flex';
document.getElementById('ocr-result').style.display='none';
document.getElementById('ocr-result').value='';
document.getElementById('ocr-linelist').innerHTML='';
document.getElementById('ocr-tablelist').innerHTML='';
document.getElementById('ocr-actions').style.display='none';
resetTabs('ocr-tabs');
}
// ════════════════════════════════════════════════════════════
// RESULT TABS
// ════════════════════════════════════════════════════════════
document.addEventListener('click',e=>{
if(!e.target.classList.contains('tab-btn'))return;
const parent=e.target.closest('.result-tabs');
parent.querySelectorAll('.tab-btn').forEach(b=>b.classList.remove('active'));
e.target.classList.add('active');
const panel=parent.closest('.panel');
panel.querySelectorAll('.tab-content').forEach(c=>c.classList.remove('active'));
const t=document.getElementById(e.target.dataset.tab);if(t)t.classList.add('active');
});
function resetTabs(id){const t=document.getElementById(id);if(!t)return;t.querySelectorAll('.tab-btn').forEach((b,i)=>b.classList.toggle('active',i===0));const p=t.closest('.panel');p.querySelectorAll('.tab-content').forEach((c,i)=>c.classList.toggle('active',i===0))}
// ════════════════════════════════════════════════════════════
// POLLING / UTILS
// ════════════════════════════════════════════════════════════
function pollTask(taskId,onProgress,onSuccess,onError){
const t=setInterval(async()=>{
try{
const r=await api('GET','/api/status/'+taskId);
if(r.status===401){clearInterval(t);showLogin();return}
const d=await r.json();onProgress(d);
if(d.state==='success'){clearInterval(t);onSuccess(d)}
else if(d.state==='failure'){clearInterval(t);onError(d.message||'실패')}
}catch{}
},1500);
}
async function dlFile(fn){
if(!fn)return;
try{const r=await api('GET','/api/download/'+fn);if(!r.ok)return;const b=await r.blob();const u=URL.createObjectURL(b);const a=document.createElement('a');a.href=u;a.download=fn;a.click();URL.revokeObjectURL(u)}catch{}
}
function setProg(prefix,pct,msg,purple=false){
document.getElementById(prefix+'-pfill').style.width=pct+'%';
document.getElementById(prefix+'-pmsg').textContent=msg;
document.getElementById(prefix+'-ppct').textContent=pct+'%';
}
function showFileInfo(p,f){document.getElementById(p+'-info').style.display='block';document.getElementById(p+'-fname').textContent=f.name;document.getElementById(p+'-fsize').textContent=fmtBytes(f.size)}
function fmtBytes(b){if(b<1024)return b+' B';if(b<1048576)return (b/1024).toFixed(1)+' KB';return (b/1048576).toFixed(1)+' MB'}
function fmtDur(s){if(!s)return '—';return Math.floor(s/60)+'분 '+Math.floor(s%60)+'초'}
function fmtTime(s){const m=Math.floor(s/60),ss=Math.floor(s%60);return String(m).padStart(2,'0')+':'+String(ss).padStart(2,'0')}
function esc(s){return String(s||'').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;')}
async function copyText(text,btn){try{await navigator.clipboard.writeText(text);const o=btn.textContent;btn.textContent='복사됨 ✓';setTimeout(()=>btn.textContent=o,1500)}catch{}}
// ════════════════════════════════════════════════════════════
checkAuth();
</script>
</body>
</html>

97
app/tasks.py Normal file
View File

@@ -0,0 +1,97 @@
import os
from celery import Celery
# ocr_task를 직접 import해서 worker에 등록
from ocr_tasks import ocr_task # noqa: F401
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
MODEL_SIZE = os.getenv("WHISPER_MODEL", "medium")
DEVICE = os.getenv("WHISPER_DEVICE", "cpu")
COMPUTE_TYPE = os.getenv("WHISPER_COMPUTE_TYPE", "int8")
LANGUAGE = os.getenv("WHISPER_LANGUAGE", "ko") or None
BEAM_SIZE = int(os.getenv("WHISPER_BEAM_SIZE", "5"))
INITIAL_PROMPT = os.getenv("WHISPER_INITIAL_PROMPT", "") or None
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs")
celery_app = Celery("whisper_tasks", broker=REDIS_URL, backend=REDIS_URL)
celery_app.conf.update(
task_serializer="json",
result_serializer="json",
accept_content=["json"],
task_track_started=True,
result_expires=3600,
)
_model = None
def get_model():
global _model
if _model is None:
from faster_whisper import WhisperModel
print(f"[Whisper] 로딩: {MODEL_SIZE} / {DEVICE} / {COMPUTE_TYPE}")
_model = WhisperModel(MODEL_SIZE, device=DEVICE, compute_type=COMPUTE_TYPE)
print("[Whisper] 로드 완료")
return _model
@celery_app.task(bind=True, name="tasks.transcribe_task", queue="stt")
def transcribe_task(self, file_id: str, audio_path: str):
self.update_state(state="PROGRESS", meta={"progress": 5, "message": "모델 준비 중..."})
try:
model = get_model()
self.update_state(state="PROGRESS", meta={"progress": 15, "message": "오디오 분석 중..."})
segments_gen, info = model.transcribe(
audio_path,
language=LANGUAGE,
beam_size=BEAM_SIZE,
initial_prompt=INITIAL_PROMPT,
vad_filter=True,
vad_parameters=dict(min_silence_duration_ms=500),
word_timestamps=False,
)
self.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 변환 중..."})
segments, full_text_parts = [], []
duration = info.duration
for seg in segments_gen:
segments.append({"start": round(seg.start, 2),
"end": round(seg.end, 2),
"text": seg.text.strip()})
full_text_parts.append(seg.text.strip())
if duration > 0:
pct = 30 + int((seg.end / duration) * 60)
self.update_state(
state="PROGRESS",
meta={"progress": min(pct, 90),
"message": f"변환 중... {seg.end:.0f}s / {duration:.0f}s"},
)
full_text = "\n".join(full_text_parts)
self.update_state(state="PROGRESS", meta={"progress": 95, "message": "파일 저장 중..."})
os.makedirs(OUTPUT_DIR, exist_ok=True)
output_filename = f"{file_id}.txt"
with open(os.path.join(OUTPUT_DIR, output_filename), "w", encoding="utf-8") as f:
f.write(f"# 변환 결과\n# 언어: {info.language} | 재생 시간: {duration:.1f}\n\n")
f.write("## 전체 텍스트\n\n" + full_text + "\n\n")
f.write("## 타임스탬프별 세그먼트\n\n")
for seg in segments:
f.write(f"[{_fmt(seg['start'])}{_fmt(seg['end'])}] {seg['text']}\n")
try: os.remove(audio_path)
except: pass
return {"text": full_text, "segments": segments,
"language": info.language, "duration": round(duration, 1),
"output_file": output_filename}
except Exception as e:
raise Exception(f"변환 실패: {str(e)}")
def _fmt(s):
m, sec = divmod(int(s), 60)
return f"{m:02d}:{sec:02d}"