fix: --pool=solo SIGSEGV 해결 및 전체 설정 정리

2026-04-20 20:39:24 +09:00
commit 248ac1deea
13 changed files with 2979 additions and 0 deletions
--- a/app/Dockerfile
+++ b/app/Dockerfile
@@ -0,0 +1,34 @@
+FROM python:3.11-slim
+
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    libsndfile1 \
+    libgomp1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    libgl1 \
+    libgles2 \
+    libegl1 \
+    wget \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY requirements.txt .
+
+# PaddlePaddle CPU — PyPI 공식 서버
+RUN pip install --no-cache-dir paddlepaddle==3.0.0
+
+# 나머지 패키지
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+RUN mkdir -p /data/uploads /data/outputs
+
+EXPOSE 8000
+
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/app/auth.py
+++ b/app/auth.py
@@ -0,0 +1,146 @@
+"""
+인증 모듈 — 다중 사용자 JSON 파일 기반
+/data/users.json 에 사용자 정보 저장
+관리자(admin)는 환경변수 AUTH_USERNAME/AUTH_PASSWORD 기준으로 초기화
+"""
+import os, json, threading
+from pathlib import Path
+from datetime import datetime, timedelta
+
+from fastapi import Depends, HTTPException, status
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from jose import JWTError, jwt
+
+SECRET_KEY     = os.getenv("JWT_SECRET", "fallback-secret-change-this")
+ALGORITHM      = "HS256"
+EXPIRE_HOURS   = int(os.getenv("JWT_EXPIRE_HOURS", "12"))
+ADMIN_USERNAME = os.getenv("AUTH_USERNAME", "admin")
+ADMIN_PASSWORD = os.getenv("AUTH_PASSWORD", "changeme1234")
+
+DATA_DIR   = Path(os.getenv("UPLOAD_DIR", "/data/uploads")).parent
+USERS_FILE = DATA_DIR / "users.json"
+
+_lock = threading.Lock()
+bearer = HTTPBearer(auto_error=False)
+
+
+# ── 파일 I/O ───────────────────────────────────────────────────
+def _load() -> dict:
+    if not USERS_FILE.exists():
+        return {}
+    with open(USERS_FILE, "r", encoding="utf-8") as f:
+        return json.load(f)
+
+def _save(users: dict):
+    USERS_FILE.parent.mkdir(parents=True, exist_ok=True)
+    with open(USERS_FILE, "w", encoding="utf-8") as f:
+        json.dump(users, f, ensure_ascii=False, indent=2)
+
+
+# ── 초기화 (앱 시작 시 1회) ────────────────────────────────────
+def init_users():
+    with _lock:
+        users = _load()
+        # 관리자 계정은 항상 env var 기준으로 동기화
+        users[ADMIN_USERNAME] = {
+            "password":    ADMIN_PASSWORD,
+            "role":        "admin",
+            "permissions": {"stt": True, "ocr": True},
+        }
+        _save(users)
+
+
+# ── CRUD ──────────────────────────────────────────────────────
+def authenticate(username: str, password: str):
+    """성공 시 user dict, 실패 시 None"""
+    with _lock:
+        users = _load()
+    u = users.get(username)
+    if not u or u["password"] != password:
+        return None
+    return {"username": username, **u}
+
+def get_user(username: str):
+    with _lock:
+        return _load().get(username)
+
+def list_users() -> dict:
+    with _lock:
+        users = _load()
+    # 비밀번호 마스킹
+    return {k: {**{kk: vv for kk, vv in v.items() if kk != "password"}}
+            for k, v in users.items()}
+
+def create_user(username: str, password: str, permissions: dict) -> tuple:
+    with _lock:
+        users = _load()
+        if username in users:
+            return False, "이미 존재하는 사용자입니다"
+        users[username] = {"password": password, "role": "user",
+                           "permissions": permissions}
+        _save(users)
+    return True, "사용자가 생성되었습니다"
+
+def update_user(username: str, permissions: dict, password: str = None) -> tuple:
+    if username == ADMIN_USERNAME:
+        return False, "기본 관리자 계정은 수정할 수 없습니다"
+    with _lock:
+        users = _load()
+        if username not in users:
+            return False, "사용자를 찾을 수 없습니다"
+        users[username]["permissions"] = permissions
+        if password:
+            users[username]["password"] = password
+        _save(users)
+    return True, "업데이트되었습니다"
+
+def delete_user(username: str) -> tuple:
+    if username == ADMIN_USERNAME:
+        return False, "기본 관리자 계정은 삭제할 수 없습니다"
+    with _lock:
+        users = _load()
+        if username not in users:
+            return False, "사용자를 찾을 수 없습니다"
+        del users[username]
+        _save(users)
+    return True, "삭제되었습니다"
+
+
+# ── JWT ───────────────────────────────────────────────────────
+def create_access_token(username: str) -> str:
+    exp = datetime.utcnow() + timedelta(hours=EXPIRE_HOURS)
+    return jwt.encode({"sub": username, "exp": exp}, SECRET_KEY, algorithm=ALGORITHM)
+
+
+# ── FastAPI 의존성 ────────────────────────────────────────────
+def require_auth(credentials: HTTPAuthorizationCredentials = Depends(bearer)) -> dict:
+    if credentials is None:
+        raise HTTPException(401, "인증이 필요합니다",
+                            headers={"WWW-Authenticate": "Bearer"})
+    try:
+        payload = jwt.decode(credentials.credentials, SECRET_KEY, algorithms=[ALGORITHM])
+        username = payload.get("sub")
+        if not username:
+            raise JWTError()
+        u = get_user(username)
+        if not u:
+            raise JWTError()
+        return {"username": username, **u}
+    except JWTError:
+        raise HTTPException(401, "토큰이 유효하지 않거나 만료되었습니다",
+                            headers={"WWW-Authenticate": "Bearer"})
+
+def require_admin(user: dict = Depends(require_auth)) -> dict:
+    if user.get("role") != "admin":
+        raise HTTPException(403, "관리자 권한이 필요합니다")
+    return user
+
+def require_stt(user: dict = Depends(require_auth)) -> dict:
+    if not user.get("permissions", {}).get("stt", False):
+        raise HTTPException(403, "STT 사용 권한이 없습니다")
+    return user
+
+def require_ocr(user: dict = Depends(require_auth)) -> dict:
+    if not user.get("permissions", {}).get("ocr", False):
+        raise HTTPException(403, "OCR 사용 권한이 없습니다")
+    return user
--- a/app/main.py
+++ b/app/main.py
@@ -0,0 +1,275 @@
+import os, uuid, time, glob, json
+import httpx
+import aiofiles
+from pathlib import Path
+from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Form, Request
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import FileResponse
+from pydantic import BaseModel
+
+from auth import (authenticate, create_access_token, init_users,
+                  require_auth, require_admin, require_stt, require_ocr,
+                  list_users, create_user, update_user, delete_user)
+from tasks import celery_app, transcribe_task
+from ocr_tasks import ocr_task
+
+app = FastAPI(title="VoiceScript API")
+
+UPLOAD_DIR       = os.getenv("UPLOAD_DIR", "/data/uploads")
+OUTPUT_DIR       = os.getenv("OUTPUT_DIR", "/data/outputs")
+OLLAMA_URL       = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434")
+MAX_UPLOAD_BYTES = int(os.getenv("MAX_UPLOAD_MB", "500")) * 1024 * 1024
+OUTPUT_KEEP_SECS = int(os.getenv("OUTPUT_KEEP_HOURS", "48")) * 3600
+
+DATA_DIR      = Path(UPLOAD_DIR).parent
+SETTINGS_FILE = DATA_DIR / "settings.json"
+
+os.makedirs(UPLOAD_DIR, exist_ok=True)
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+AUDIO_EXT = {"mp3","mp4","wav","m4a","ogg","flac","aac","wma","webm","mkv","avi","mov"}
+IMAGE_EXT  = {"jpg","jpeg","png","bmp","tiff","tif","webp","gif"}
+
+
+# ── 설정 I/O ─────────────────────────────────────────────────
+def _load_settings() -> dict:
+    if not SETTINGS_FILE.exists():
+        return {"stt_ollama_model": "", "ocr_ollama_model": "granite3.2-vision:latest"}
+    with open(SETTINGS_FILE, "r", encoding="utf-8") as f:
+        return json.load(f)
+
+def _save_settings(data: dict):
+    SETTINGS_FILE.parent.mkdir(parents=True, exist_ok=True)
+    with open(SETTINGS_FILE, "w", encoding="utf-8") as f:
+        json.dump(data, f, ensure_ascii=False, indent=2)
+
+
+# ════════════════════════════════════════════════════════════════
+#  시작 이벤트
+# ════════════════════════════════════════════════════════════════
+@app.on_event("startup")
+async def on_startup():
+    init_users()
+    _cleanup_outputs()
+
+
+# ════════════════════════════════════════════════════════════════
+#  인증
+# ════════════════════════════════════════════════════════════════
+@app.post("/api/login")
+def login(username: str = Form(...), password: str = Form(...)):
+    user = authenticate(username, password)
+    if not user:
+        raise HTTPException(401, "아이디 또는 비밀번호가 올바르지 않습니다")
+    return {"access_token": create_access_token(username), "token_type": "bearer"}
+
+@app.get("/api/me")
+def me(user: dict = Depends(require_auth)):
+    return {
+        "username":    user["username"],
+        "role":        user.get("role", "user"),
+        "permissions": user.get("permissions", {"stt": False, "ocr": False}),
+    }
+
+
+# ════════════════════════════════════════════════════════════════
+#  STT
+# ════════════════════════════════════════════════════════════════
+@app.post("/api/transcribe")
+async def transcribe(
+    request:      Request,
+    file:         UploadFile = File(...),
+    use_ollama:   str        = Form("false"),
+    ollama_model: str        = Form(""),
+    user: dict = Depends(require_stt),
+):
+    _check_size(request)
+    ext = _ext(file.filename)
+    if ext not in AUDIO_EXT:
+        raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(AUDIO_EXT))}")
+    file_id   = str(uuid.uuid4())
+    save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
+    await _save(file, save_path)
+
+    _use_ollama = use_ollama.lower() == "true"
+    # 모델 미지정 시 설정에서 가져옴
+    if _use_ollama and not ollama_model.strip():
+        ollama_model = _load_settings().get("stt_ollama_model", "")
+
+    task = transcribe_task.delay(file_id, save_path, _use_ollama, ollama_model)
+    return {"task_id": task.id, "file_id": file_id, "filename": file.filename}
+
+
+# ════════════════════════════════════════════════════════════════
+#  OCR
+# ════════════════════════════════════════════════════════════════
+@app.post("/api/ocr")
+async def ocr(
+    request:       Request,
+    file:          UploadFile = File(...),
+    mode:          str        = Form("text"),
+    backend:       str        = Form("paddle"),
+    ollama_model:  str        = Form(""),
+    custom_prompt: str        = Form(""),
+    user: dict = Depends(require_ocr),
+):
+    _check_size(request)
+    ext = _ext(file.filename)
+    if ext not in IMAGE_EXT:
+        raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(IMAGE_EXT))}")
+    if mode    not in ("text", "structure"): mode    = "text"
+    if backend not in ("paddle", "ollama"):  backend = "paddle"
+
+    # 모델 미지정 시 설정에서 가져옴
+    if backend == "ollama" and not ollama_model.strip():
+        ollama_model = _load_settings().get("ocr_ollama_model", "granite3.2-vision:latest")
+
+    file_id   = str(uuid.uuid4())
+    save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
+    await _save(file, save_path)
+    task = ocr_task.delay(file_id, save_path, mode, backend, ollama_model, custom_prompt)
+    return {"task_id": task.id, "file_id": file_id,
+            "filename": file.filename, "mode": mode, "backend": backend}
+
+
+# ════════════════════════════════════════════════════════════════
+#  작업 상태 / 다운로드
+# ════════════════════════════════════════════════════════════════
+@app.get("/api/status/{task_id}")
+def get_status(task_id: str, user: dict = Depends(require_auth)):
+    r = celery_app.AsyncResult(task_id)
+    if r.state == "PENDING":  return {"state": "pending",  "progress": 0,   "message": "대기 중..."}
+    if r.state == "PROGRESS": m = r.info or {}; return {"state": "progress","progress": m.get("progress",0),"message": m.get("message","처리 중...")}
+    if r.state == "SUCCESS":  return {"state": "success",  "progress": 100, **r.result}
+    if r.state == "FAILURE":  return {"state": "failure",  "progress": 0,   "message": str(r.info)}
+    return {"state": r.state.lower(), "progress": 0}
+
+@app.get("/api/download/{filename}")
+def download(filename: str, user: dict = Depends(require_auth)):
+    if ".." in filename or "/" in filename:
+        raise HTTPException(400, "잘못된 파일명")
+    path = os.path.join(OUTPUT_DIR, filename)
+    if not os.path.exists(path):
+        raise HTTPException(404, "파일을 찾을 수 없습니다")
+    media = ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+             if filename.endswith(".xlsx") else "text/plain")
+    return FileResponse(path, media_type=media, filename=filename)
+
+
+# ════════════════════════════════════════════════════════════════
+#  Ollama 모델 목록
+# ════════════════════════════════════════════════════════════════
+@app.get("/api/ollama/models")
+def ollama_models(user: dict = Depends(require_auth)):
+    try:
+        resp = httpx.get(f"{OLLAMA_URL}/api/tags", timeout=8.0)
+        resp.raise_for_status()
+        models = [m["name"] for m in resp.json().get("models", [])]
+        return {"models": models, "connected": True}
+    except Exception as e:
+        return {"models": [], "connected": False, "error": str(e)}
+
+
+# ════════════════════════════════════════════════════════════════
+#  설정
+# ════════════════════════════════════════════════════════════════
+@app.get("/api/settings")
+def get_settings(user: dict = Depends(require_auth)):
+    return _load_settings()
+
+@app.post("/api/settings")
+def save_settings_endpoint(
+    stt_ollama_model: str = Form(""),
+    ocr_ollama_model: str = Form(""),
+    user: dict = Depends(require_auth),
+):
+    data = {"stt_ollama_model": stt_ollama_model,
+            "ocr_ollama_model": ocr_ollama_model}
+    _save_settings(data)
+    return {"ok": True, "settings": data}
+
+
+# ════════════════════════════════════════════════════════════════
+#  관리자 — 사용자 관리
+# ════════════════════════════════════════════════════════════════
+@app.get("/api/admin/users")
+def admin_list_users(user: dict = Depends(require_admin)):
+    return {"users": list_users()}
+
+@app.post("/api/admin/users")
+def admin_create_user(
+    username:   str  = Form(...),
+    password:   str  = Form(...),
+    perm_stt:   str  = Form("false"),
+    perm_ocr:   str  = Form("false"),
+    user: dict = Depends(require_admin),
+):
+    perms = {"stt": perm_stt.lower()=="true", "ocr": perm_ocr.lower()=="true"}
+    ok, msg = create_user(username, password, perms)
+    if not ok:
+        raise HTTPException(400, msg)
+    return {"ok": True, "message": msg}
+
+@app.put("/api/admin/users/{username}")
+def admin_update_user(
+    username:  str,
+    perm_stt:  str = Form("false"),
+    perm_ocr:  str = Form("false"),
+    password:  str = Form(""),
+    user: dict = Depends(require_admin),
+):
+    perms = {"stt": perm_stt.lower()=="true", "ocr": perm_ocr.lower()=="true"}
+    ok, msg = update_user(username, perms, password or None)
+    if not ok:
+        raise HTTPException(400, msg)
+    return {"ok": True, "message": msg}
+
+@app.delete("/api/admin/users/{username}")
+def admin_delete_user(username: str, user: dict = Depends(require_admin)):
+    ok, msg = delete_user(username)
+    if not ok:
+        raise HTTPException(400, msg)
+    return {"ok": True, "message": msg}
+
+
+# ════════════════════════════════════════════════════════════════
+#  정리
+# ════════════════════════════════════════════════════════════════
+@app.post("/api/cleanup")
+def cleanup(user: dict = Depends(require_auth)):
+    return {"removed": _cleanup_outputs()}
+
+
+# ════════════════════════════════════════════════════════════════
+#  유틸
+# ════════════════════════════════════════════════════════════════
+def _check_size(request: Request):
+    cl = request.headers.get("content-length")
+    if cl and int(cl) > MAX_UPLOAD_BYTES:
+        raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
+
+def _cleanup_outputs() -> int:
+    if OUTPUT_KEEP_SECS == 0:
+        return 0
+    cutoff = time.time() - OUTPUT_KEEP_SECS
+    removed = 0
+    for f in glob.glob(os.path.join(OUTPUT_DIR, "*")):
+        try:
+            if os.path.getmtime(f) < cutoff:
+                os.remove(f); removed += 1
+        except: pass
+    return removed
+
+def _ext(fn): return fn.rsplit(".", 1)[-1].lower() if "." in fn else ""
+
+async def _save(file: UploadFile, path: str):
+    written = 0
+    async with aiofiles.open(path, "wb") as f:
+        while chunk := await file.read(1024 * 1024):
+            written += len(chunk)
+            if written > MAX_UPLOAD_BYTES:
+                await f.close(); os.remove(path)
+                raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
+            await f.write(chunk)
+
+app.mount("/", StaticFiles(directory="static", html=True), name="static")
--- a/app/ocr_tasks.py
+++ b/app/ocr_tasks.py
@@ -0,0 +1,288 @@
+"""
+OCR Celery Tasks
+- PaddleOCR 3.x 호환 (use_gpu/show_log/cls 파라미터 제거, 결과구조 변경 반영)
+- backend="paddle"  → PaddleOCR 로컬 실행
+- backend="ollama"  → Ollama Vision API 호출
+"""
+import os
+import base64
+
+import httpx
+from celery import Celery
+import openpyxl
+from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
+
+REDIS_URL      = os.getenv("REDIS_URL", "redis://redis:6379/0")
+OUTPUT_DIR     = os.getenv("OUTPUT_DIR", "/data/outputs")
+OCR_LANG       = os.getenv("OCR_LANG", "korean")
+OLLAMA_URL     = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434")
+OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "180"))
+
+celery_app = Celery("ocr_tasks", broker=REDIS_URL, backend=REDIS_URL)
+celery_app.conf.update(
+    task_serializer="json",
+    result_serializer="json",
+    accept_content=["json"],
+    task_track_started=True,
+    result_expires=3600,
+)
+
+# PaddleOCR 싱글톤
+_ocr_engine    = None
+_struct_engine = None
+
+def get_ocr():
+    global _ocr_engine
+    if _ocr_engine is None:
+        from paddleocr import PaddleOCR
+        print(f"[PaddleOCR] 로딩 (lang={OCR_LANG})")
+        # PaddleOCR 3.x: use_gpu/show_log 파라미터 제거됨
+        _ocr_engine = PaddleOCR(use_angle_cls=True, lang=OCR_LANG)
+        print("[PaddleOCR] 완료")
+    return _ocr_engine
+
+def get_structure():
+    global _struct_engine
+    if _struct_engine is None:
+        from paddleocr import PPStructure
+        print("[PPStructure] 로딩")
+        _struct_engine = PPStructure(table=True, ocr=True, lang=OCR_LANG)
+        print("[PPStructure] 완료")
+    return _struct_engine
+
+
+# ════════════════════════════════════════════════════════════════
+#  메인 Task
+# ════════════════════════════════════════════════════════════════
+@celery_app.task(bind=True, name="tasks.ocr_task", queue="ocr")
+def ocr_task(self, file_id, image_path, mode="text",
+             backend="paddle", ollama_model="granite3.2-vision", custom_prompt=""):
+    self.update_state(state="PROGRESS", meta={"progress": 8, "message": "엔진 준비 중..."})
+    try:
+        if backend == "ollama":
+            result = _run_ollama(self, file_id, image_path, mode, ollama_model, custom_prompt)
+        else:
+            result = _run_paddle(self, file_id, image_path, mode)
+        try: os.remove(image_path)
+        except: pass
+        return result
+    except Exception as e:
+        try: os.remove(image_path)
+        except: pass
+        raise Exception(f"OCR 실패: {str(e)}")
+
+
+# ════════════════════════════════════════════════════════════════
+#  Ollama 백엔드
+# ════════════════════════════════════════════════════════════════
+_OLLAMA_PROMPTS = {
+    "text":      "이 이미지에서 모든 텍스트를 정확하게 추출해줘. 원본의 줄 구분과 단락 구조를 유지해줘.",
+    "structure": "이 이미지를 분석해서 표는 마크다운 표 형식으로, 나머지 텍스트는 원본 구조를 유지하며 추출해줘.",
+}
+
+def _run_ollama(task, file_id, image_path, mode, ollama_model, custom_prompt):
+    task.update_state(state="PROGRESS",
+                      meta={"progress": 15, "message": f"Ollama ({ollama_model}) 연결 중..."})
+    with open(image_path, "rb") as f:
+        img_b64 = base64.b64encode(f.read()).decode()
+    prompt = custom_prompt.strip() or _OLLAMA_PROMPTS.get(mode, _OLLAMA_PROMPTS["text"])
+    task.update_state(state="PROGRESS", meta={"progress": 30, "message": "모델 추론 중..."})
+    try:
+        resp = httpx.post(f"{OLLAMA_URL}/api/chat", json={
+            "model": ollama_model,
+            "messages": [{"role": "user", "content": prompt, "images": [img_b64]}],
+            "stream": False,
+            "options": {"temperature": 0.1},
+        }, timeout=float(OLLAMA_TIMEOUT))
+        resp.raise_for_status()
+    except httpx.ConnectError:
+        raise Exception(f"Ollama 서버 연결 실패 ({OLLAMA_URL})")
+    except httpx.TimeoutException:
+        raise Exception(f"Ollama 응답 시간 초과 ({OLLAMA_TIMEOUT}초). OLLAMA_TIMEOUT 값을 늘려주세요.")
+
+    task.update_state(state="PROGRESS", meta={"progress": 85, "message": "결과 저장 중..."})
+    full_text = resp.json().get("message", {}).get("content", "").strip()
+    if not full_text:
+        raise Exception("Ollama 빈 응답. 모델이 설치되어 있는지 확인하세요.")
+
+    tables = _parse_md_tables(full_text) if mode == "structure" else []
+    os.makedirs(OUTPUT_DIR, exist_ok=True)
+    txt_file = f"{file_id}_ocr.txt"
+    with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
+        f.write(f"# OCR 결과 (Ollama / {ollama_model})\n\n{full_text}")
+    xlsx_file = None
+    if tables:
+        xlsx_file = f"{file_id}_tables.xlsx"
+        _save_excel(tables, os.path.join(OUTPUT_DIR, xlsx_file))
+    tables_html = [_md_table_to_html(t) for t in tables]
+    lines = [{"text": l, "confidence": 1.0, "bbox": []}
+             for l in full_text.splitlines() if l.strip()]
+    return {
+        "mode": mode, "backend": "ollama", "ollama_model": ollama_model,
+        "full_text": full_text, "lines": lines, "line_count": len(lines),
+        "txt_file": txt_file,
+        "tables": [{"html": h, "rows": len(t),
+                    "cols": max(len(r) for r in t) if t else 0}
+                   for h, t in zip(tables_html, tables)],
+        "xlsx_file": xlsx_file,
+    }
+
+
+# ════════════════════════════════════════════════════════════════
+#  PaddleOCR 백엔드
+# ════════════════════════════════════════════════════════════════
+def _run_paddle(task, file_id, image_path, mode):
+    import cv2
+    img = cv2.imread(image_path)
+    if img is None:
+        raise ValueError("이미지를 읽을 수 없습니다")
+    os.makedirs(OUTPUT_DIR, exist_ok=True)
+    return _paddle_structure(task, file_id, img) if mode == "structure" \
+           else _paddle_text(task, file_id, img)
+
+
+def _paddle_text(task, file_id, img):
+    task.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 인식 중..."})
+    # PaddleOCR 3.x: cls 파라미터 제거, 결과 구조 변경
+    result = get_ocr().ocr(img)
+    task.update_state(state="PROGRESS", meta={"progress": 80, "message": "결과 정리 중..."})
+
+    lines = []
+    if result and len(result) > 0:
+        r = result[0]
+        # PaddleOCR 3.x 결과 구조: dict with rec_texts, rec_scores
+        if isinstance(r, dict):
+            texts  = r.get("rec_texts", [])
+            scores = r.get("rec_scores", [])
+            for text, conf in zip(texts, scores):
+                if text.strip():
+                    lines.append({"text": text,
+                                  "confidence": round(float(conf), 3),
+                                  "bbox": []})
+        # 구버전 호환 (list of [bbox, (text, conf)])
+        elif isinstance(r, list):
+            for item in r:
+                if item and len(item) == 2:
+                    _, (text, conf) = item
+                    if text.strip():
+                        lines.append({"text": text,
+                                      "confidence": round(float(conf), 3),
+                                      "bbox": []})
+
+    full_text = "\n".join(l["text"] for l in lines)
+    txt_file  = f"{file_id}_ocr.txt"
+    with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
+        f.write(full_text)
+    return {"mode": "text", "backend": "paddle",
+            "full_text": full_text, "lines": lines,
+            "line_count": len(lines), "txt_file": txt_file,
+            "tables": [], "xlsx_file": None}
+
+
+def _paddle_structure(task, file_id, img):
+    task.update_state(state="PROGRESS", meta={"progress": 20, "message": "레이아웃 분석 중..."})
+    result = get_structure()(img)
+    task.update_state(state="PROGRESS", meta={"progress": 60, "message": "표 구조 추출 중..."})
+
+    text_blocks, tables_html, tables_data = [], [], []
+    for region in result:
+        rtype = region.get("type", "").lower()
+        if rtype == "table":
+            html = region.get("res", {}).get("html", "")
+            if html:
+                tables_html.append(html)
+                tables_data.append(_html_table_to_list(html))
+        elif rtype in ("text", "title", "figure_caption"):
+            for line in (region.get("res", []) or []):
+                if isinstance(line, (list, tuple)) and len(line) == 2:
+                    _, (text, _conf) = line
+                    text_blocks.append(text)
+
+    full_text = "\n".join(text_blocks)
+    task.update_state(state="PROGRESS", meta={"progress": 80, "message": "Excel 생성 중..."})
+
+    xlsx_file = None
+    if tables_data:
+        xlsx_file = f"{file_id}_tables.xlsx"
+        _save_excel(tables_data, os.path.join(OUTPUT_DIR, xlsx_file))
+
+    txt_file = f"{file_id}_ocr.txt"
+    with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
+        f.write("# 텍스트\n\n" + full_text)
+
+    lines      = [{"text": t, "confidence": 1.0, "bbox": []} for t in text_blocks]
+    tables_meta = [{"html": h, "rows": len(d),
+                    "cols": max(len(r) for r in d) if d else 0}
+                   for h, d in zip(tables_html, tables_data)]
+    return {"mode": "structure", "backend": "paddle",
+            "full_text": full_text, "lines": lines,
+            "line_count": len(lines), "txt_file": txt_file,
+            "tables": tables_meta, "xlsx_file": xlsx_file}
+
+
+# ════════════════════════════════════════════════════════════════
+#  공통 유틸
+# ════════════════════════════════════════════════════════════════
+def _parse_md_tables(text):
+    tables, current = [], []
+    for line in text.splitlines():
+        s = line.strip()
+        if s.startswith("|") and s.endswith("|"):
+            if all(c in "| -:" for c in s): continue
+            current.append([c.strip() for c in s.strip("|").split("|")])
+        else:
+            if len(current) >= 2: tables.append(current)
+            current = []
+    if len(current) >= 2: tables.append(current)
+    return tables
+
+def _md_table_to_html(table):
+    if not table: return ""
+    rows = ""
+    for i, row in enumerate(table):
+        tag = "th" if i == 0 else "td"
+        cells = "".join(f"<{tag}>{c}</{tag}>" for c in row)
+        rows += f"<tr>{cells}</tr>"
+    return f"<table>{rows}</table>"
+
+def _html_table_to_list(html):
+    from html.parser import HTMLParser
+    class P(HTMLParser):
+        def __init__(self):
+            super().__init__()
+            self.rows, self._row, self._cell, self._in = [], [], [], False
+        def handle_starttag(self, tag, attrs):
+            if tag == "tr":          self._row = []
+            elif tag in ("td","th"): self._cell = []; self._in = True
+        def handle_endtag(self, tag):
+            if tag in ("td","th"):
+                self._row.append("".join(self._cell).strip()); self._in = False
+            elif tag == "tr":
+                if self._row: self.rows.append(self._row)
+        def handle_data(self, data):
+            if self._in: self._cell.append(data)
+    p = P(); p.feed(html); return p.rows
+
+def _save_excel(tables, path):
+    wb = openpyxl.Workbook()
+    wb.remove(wb.active)
+    for i, table in enumerate(tables, 1):
+        ws   = wb.create_sheet(f"표 {i}")
+        thin = Side(style="thin", color="2A2A33")
+        bdr  = Border(left=thin, right=thin, top=thin, bottom=thin)
+        for r_idx, row in enumerate(table, 1):
+            for c_idx, val in enumerate(row, 1):
+                cell = ws.cell(row=r_idx, column=c_idx, value=val)
+                cell.border    = bdr
+                cell.alignment = Alignment(horizontal="center",
+                                           vertical="center", wrap_text=True)
+                if r_idx == 1:
+                    cell.fill = PatternFill("solid", fgColor="1A1A2E")
+                    cell.font = Font(color="00E5A0", bold=True, size=10)
+                else:
+                    cell.font = Font(size=10)
+        for col in ws.columns:
+            w = max((len(str(c.value or "")) for c in col), default=8)
+            ws.column_dimensions[col[0].column_letter].width = min(w + 4, 40)
+    if not wb.sheetnames: wb.create_sheet("Sheet1")
+    wb.save(path)
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -0,0 +1,21 @@
+fastapi==0.115.0
+uvicorn[standard]==0.30.6
+python-multipart==0.0.9
+celery==5.4.0
+redis==5.0.8
+faster-whisper==1.0.3
+aiofiles==23.2.1
+
+# 인증 (bcrypt 제거 — 직접 비교 방식 사용)
+python-jose[cryptography]==3.3.0
+
+# PaddleOCR 3.x
+paddleocr>=3.0.0
+opencv-python-headless>=4.8.0
+
+# Ollama API 호출
+httpx>=0.27.0
+
+# Excel 출력
+openpyxl==3.1.2
+Pillow>=10.0.0
--- a/app/static/index.html
+++ b/app/static/index.html
--- a/app/tasks.py
+++ b/app/tasks.py
@@ -0,0 +1,155 @@
+import os
+import httpx
+from celery import Celery
+from ocr_tasks import ocr_task  # noqa: F401 — worker에 등록
+
+REDIS_URL      = os.getenv("REDIS_URL", "redis://redis:6379/0")
+MODEL_SIZE     = os.getenv("WHISPER_MODEL", "medium")
+DEVICE         = os.getenv("WHISPER_DEVICE", "cpu")
+COMPUTE_TYPE   = os.getenv("WHISPER_COMPUTE_TYPE", "int8")
+LANGUAGE       = os.getenv("WHISPER_LANGUAGE", "ko") or None
+BEAM_SIZE      = int(os.getenv("WHISPER_BEAM_SIZE", "5"))
+INITIAL_PROMPT = os.getenv("WHISPER_INITIAL_PROMPT", "") or None
+OUTPUT_DIR     = os.getenv("OUTPUT_DIR", "/data/outputs")
+OLLAMA_URL     = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434")
+OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "180"))
+
+celery_app = Celery("whisper_tasks", broker=REDIS_URL, backend=REDIS_URL)
+celery_app.conf.update(
+    task_serializer="json",
+    result_serializer="json",
+    accept_content=["json"],
+    task_track_started=True,
+    result_expires=3600,
+)
+
+_model = None
+
+def get_model():
+    global _model
+    if _model is None:
+        from faster_whisper import WhisperModel
+        print(f"[Whisper] 로딩: {MODEL_SIZE} / {DEVICE} / {COMPUTE_TYPE}")
+        _model = WhisperModel(MODEL_SIZE, device=DEVICE, compute_type=COMPUTE_TYPE)
+        print("[Whisper] 로드 완료")
+    return _model
+
+
+# ── Ollama 후처리 ─────────────────────────────────────────────
+def _ollama_postprocess(text: str, model: str) -> str:
+    """Whisper 결과를 Ollama로 후처리 (문장부호·맞춤법·자연스러운 문장)"""
+    if not model or not text.strip():
+        return text
+    prompt = (
+        "다음은 음성 인식으로 추출된 텍스트입니다. "
+        "내용은 절대 변경하지 말고, 문장 부호를 추가하고 자연스럽게 다듬어줘. "
+        "결과 텍스트만 출력하고 설명은 하지 마.\n\n"
+        f"{text}"
+    )
+    try:
+        resp = httpx.post(
+            f"{OLLAMA_URL}/api/chat",
+            json={
+                "model":    model,
+                "messages": [{"role": "user", "content": prompt}],
+                "stream":   False,
+                "options":  {"temperature": 0.1},
+            },
+            timeout=float(OLLAMA_TIMEOUT),
+        )
+        resp.raise_for_status()
+        result = resp.json().get("message", {}).get("content", "").strip()
+        return result if result else text
+    except Exception as e:
+        print(f"[Ollama 후처리 실패] {e} — 원본 텍스트 사용")
+        return text
+
+
+# ════════════════════════════════════════════════════════════════
+#  STT Celery Task
+# ════════════════════════════════════════════════════════════════
+@celery_app.task(bind=True, name="tasks.transcribe_task", queue="stt")
+def transcribe_task(
+    self,
+    file_id:      str,
+    audio_path:   str,
+    use_ollama:   bool = False,
+    ollama_model: str  = "",
+):
+    self.update_state(state="PROGRESS", meta={"progress": 5, "message": "모델 준비 중..."})
+    try:
+        model = get_model()
+        self.update_state(state="PROGRESS", meta={"progress": 15, "message": "오디오 분석 중..."})
+
+        segments_gen, info = model.transcribe(
+            audio_path,
+            language=LANGUAGE,
+            beam_size=BEAM_SIZE,
+            initial_prompt=INITIAL_PROMPT,
+            vad_filter=True,
+            vad_parameters=dict(min_silence_duration_ms=500),
+            word_timestamps=False,
+        )
+
+        self.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 변환 중..."})
+
+        segments, parts = [], []
+        duration = info.duration
+
+        for seg in segments_gen:
+            segments.append({"start": round(seg.start,2),
+                             "end":   round(seg.end,2),
+                             "text":  seg.text.strip()})
+            parts.append(seg.text.strip())
+            if duration > 0:
+                pct = 30 + int((seg.end / duration) * 50)
+                self.update_state(
+                    state="PROGRESS",
+                    meta={"progress": min(pct, 80),
+                          "message": f"변환 중... {seg.end:.0f}s / {duration:.0f}s"},
+                )
+
+        raw_text  = "\n".join(parts)
+        full_text = raw_text
+
+        # Ollama 후처리
+        if use_ollama and ollama_model:
+            self.update_state(state="PROGRESS",
+                              meta={"progress": 85,
+                                    "message": f"Ollama({ollama_model}) 후처리 중..."})
+            full_text = _ollama_postprocess(raw_text, ollama_model)
+
+        self.update_state(state="PROGRESS", meta={"progress": 95, "message": "파일 저장 중..."})
+        os.makedirs(OUTPUT_DIR, exist_ok=True)
+        output_filename = f"{file_id}.txt"
+
+        with open(os.path.join(OUTPUT_DIR, output_filename), "w", encoding="utf-8") as f:
+            f.write(f"# 변환 결과\n# 언어: {info.language}  |  재생 시간: {duration:.1f}초")
+            if use_ollama and ollama_model:
+                f.write(f"  |  Ollama 후처리: {ollama_model}")
+            f.write("\n\n## 전체 텍스트\n\n" + full_text + "\n\n")
+            f.write("## 타임스탬프별 세그먼트\n\n")
+            for seg in segments:
+                f.write(f"[{_fmt(seg['start'])} → {_fmt(seg['end'])}]  {seg['text']}\n")
+
+        try: os.remove(audio_path)
+        except: pass
+
+        return {
+            "text":         full_text,
+            "raw_text":     raw_text,
+            "segments":     segments,
+            "language":     info.language,
+            "duration":     round(duration, 1),
+            "output_file":  output_filename,
+            "ollama_used":  use_ollama and bool(ollama_model),
+            "ollama_model": ollama_model if (use_ollama and ollama_model) else "",
+        }
+
+    except Exception as e:
+        raise Exception(f"변환 실패: {str(e)}")
+
+
+def _fmt(s):
+    m, sec = divmod(int(s), 60)
+    return f"{m:02d}:{sec:02d}"