diff --git a/app/Dockerfile b/app/Dockerfile index fd2eb75..6da8ab2 100644 --- a/app/Dockerfile +++ b/app/Dockerfile @@ -9,8 +9,6 @@ RUN apt-get update && apt-get install -y \ libxext6 \ libxrender1 \ libgl1 \ - libgles2 \ - libegl1 \ wget \ curl \ && rm -rf /var/lib/apt/lists/* @@ -19,8 +17,9 @@ WORKDIR /app COPY requirements.txt . -# PaddlePaddle CPU — PyPI 공식 서버 -RUN pip install --no-cache-dir paddlepaddle==3.0.0 +# PaddlePaddle CPU (AMD64) — paddleocr 3.x 호환 +RUN pip install --no-cache-dir paddlepaddle==3.0.0 \ + -i https://pypi.tuna.tsinghua.edu.cn/simple # 나머지 패키지 RUN pip install --no-cache-dir -r requirements.txt diff --git a/app/main.py b/app/main.py index a409dbf..13c9ba4 100644 --- a/app/main.py +++ b/app/main.py @@ -1,11 +1,10 @@ -import os, uuid, time, glob, json -import httpx -import aiofiles +import os, uuid, time, glob, json, threading +import psutil, httpx, aiofiles from pathlib import Path +from datetime import datetime from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Form, Request from fastapi.staticfiles import StaticFiles from fastapi.responses import FileResponse -from pydantic import BaseModel from auth import (authenticate, create_access_token, init_users, require_auth, require_admin, require_stt, require_ocr, @@ -23,6 +22,8 @@ OUTPUT_KEEP_SECS = int(os.getenv("OUTPUT_KEEP_HOURS", "48")) * 3600 DATA_DIR = Path(UPLOAD_DIR).parent SETTINGS_FILE = DATA_DIR / "settings.json" +HISTORY_FILE = DATA_DIR / "history.json" +HISTORY_MAX = 300 os.makedirs(UPLOAD_DIR, exist_ok=True) os.makedirs(OUTPUT_DIR, exist_ok=True) @@ -30,13 +31,26 @@ os.makedirs(OUTPUT_DIR, exist_ok=True) AUDIO_EXT = {"mp3","mp4","wav","m4a","ogg","flac","aac","wma","webm","mkv","avi","mov"} IMAGE_EXT = {"jpg","jpeg","png","bmp","tiff","tif","webp","gif"} +_DEFAULT_SETTINGS = { + "stt_ollama_model": "", + "ocr_ollama_model": "granite3.2-vision:latest", + "cpu_threads": 0, + "stt_timeout": 0, # 0 = 무제한 + "ollama_timeout": 600, # 초 +} + +_hist_lock = threading.Lock() + # ── 설정 I/O ───────────────────────────────────────────────── def _load_settings() -> dict: if not SETTINGS_FILE.exists(): - return {"stt_ollama_model": "", "ocr_ollama_model": "granite3.2-vision:latest"} + return dict(_DEFAULT_SETTINGS) with open(SETTINGS_FILE, "r", encoding="utf-8") as f: - return json.load(f) + data = json.load(f) + for k, v in _DEFAULT_SETTINGS.items(): + data.setdefault(k, v) + return data def _save_settings(data: dict): SETTINGS_FILE.parent.mkdir(parents=True, exist_ok=True) @@ -44,6 +58,85 @@ def _save_settings(data: dict): json.dump(data, f, ensure_ascii=False, indent=2) +# ── 이력 I/O ───────────────────────────────────────────────── +def _load_history() -> list: + with _hist_lock: + if not HISTORY_FILE.exists(): return [] + try: + with open(HISTORY_FILE, "r", encoding="utf-8") as f: return json.load(f) + except: return [] + +def append_history(record: dict): + with _hist_lock: + try: + history = [] + if HISTORY_FILE.exists(): + with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f) + history.insert(0, record) + history = history[:HISTORY_MAX] + HISTORY_FILE.parent.mkdir(parents=True, exist_ok=True) + with open(HISTORY_FILE, "w", encoding="utf-8") as f: json.dump(history, f, ensure_ascii=False, indent=2) + except: pass + +def _update_history(file_id: str, result: dict): + with _hist_lock: + if not HISTORY_FILE.exists(): return + try: + with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f) + for h in history: + if h.get("id") == file_id and h.get("status") == "processing": + h["status"] = "success" + if h["type"] == "stt": + h["output"] = { + "filename": result.get("output_file",""), + "language": result.get("language",""), + "duration_s": result.get("duration", 0), + "segments": len(result.get("segments",[])), + "text_preview": (result.get("text","")[:200]+"…" if len(result.get("text",""))>200 else result.get("text","")), + "ollama_used": result.get("ollama_used", False), + "ollama_model": result.get("ollama_model",""), + } + else: + h["output"] = { + "txt_file": result.get("txt_file",""), + "xlsx_file": result.get("xlsx_file",""), + "line_count": result.get("line_count", 0), + "table_count": len(result.get("tables",[])), + "backend": result.get("backend",""), + "ollama_model": result.get("ollama_model",""), + "text_preview": (result.get("full_text","")[:200]+"…" if len(result.get("full_text",""))>200 else result.get("full_text","")), + } + break + with open(HISTORY_FILE, "w", encoding="utf-8") as f: json.dump(history, f, ensure_ascii=False, indent=2) + except: pass + +def _update_history_fail(file_id: str, error_msg: str): + with _hist_lock: + if not HISTORY_FILE.exists(): return + try: + with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f) + for h in history: + if h.get("id") == file_id and h.get("status") == "processing": + h["status"] = "failed"; h["output"] = {"error": error_msg[:300]}; break + with open(HISTORY_FILE, "w", encoding="utf-8") as f: json.dump(history, f, ensure_ascii=False, indent=2) + except: pass + +def delete_history_item(history_id: str) -> bool: + with _hist_lock: + if not HISTORY_FILE.exists(): return False + try: + with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f) + new = [h for h in history if h.get("id") != history_id] + if len(new) == len(history): return False + with open(HISTORY_FILE, "w", encoding="utf-8") as f: json.dump(new, f, ensure_ascii=False, indent=2) + return True + except: return False + +def clear_history(): + with _hist_lock: + if HISTORY_FILE.exists(): HISTORY_FILE.write_text("[]", encoding="utf-8") + + # ════════════════════════════════════════════════════════════════ # 시작 이벤트 # ════════════════════════════════════════════════════════════════ @@ -59,16 +152,36 @@ async def on_startup(): @app.post("/api/login") def login(username: str = Form(...), password: str = Form(...)): user = authenticate(username, password) - if not user: - raise HTTPException(401, "아이디 또는 비밀번호가 올바르지 않습니다") + if not user: raise HTTPException(401, "아이디 또는 비밀번호가 올바르지 않습니다") return {"access_token": create_access_token(username), "token_type": "bearer"} @app.get("/api/me") def me(user: dict = Depends(require_auth)): + return {"username": user["username"], "role": user.get("role","user"), + "permissions": user.get("permissions", {"stt":False,"ocr":False})} + + +# ════════════════════════════════════════════════════════════════ +# 시스템 정보 +# ════════════════════════════════════════════════════════════════ +@app.get("/api/system") +def system_info(user: dict = Depends(require_auth)): + mem = psutil.virtual_memory() + swap = psutil.swap_memory() + s = _load_settings() return { - "username": user["username"], - "role": user.get("role", "user"), - "permissions": user.get("permissions", {"stt": False, "ocr": False}), + "ram_total_gb": round(mem.total / 1024**3, 1), + "ram_used_gb": round(mem.used / 1024**3, 1), + "ram_avail_gb": round(mem.available / 1024**3, 1), + "ram_percent": mem.percent, + "swap_total_gb": round(swap.total / 1024**3, 1), + "swap_used_gb": round(swap.used / 1024**3, 1), + "cpu_logical": psutil.cpu_count(logical=True), + "cpu_physical": psutil.cpu_count(logical=False), + "cpu_percent": psutil.cpu_percent(interval=0.3), + "cpu_threads_setting": s.get("cpu_threads", 0), + "stt_timeout": s.get("stt_timeout", 0), + "ollama_timeout":s.get("ollama_timeout", 600), } @@ -77,25 +190,28 @@ def me(user: dict = Depends(require_auth)): # ════════════════════════════════════════════════════════════════ @app.post("/api/transcribe") async def transcribe( - request: Request, - file: UploadFile = File(...), - use_ollama: str = Form("false"), - ollama_model: str = Form(""), + request: Request, file: UploadFile = File(...), + use_ollama: str = Form("false"), ollama_model: str = Form(""), user: dict = Depends(require_stt), ): _check_size(request) ext = _ext(file.filename) - if ext not in AUDIO_EXT: - raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(AUDIO_EXT))}") - file_id = str(uuid.uuid4()) + if ext not in AUDIO_EXT: raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(AUDIO_EXT))}") + file_id = str(uuid.uuid4()) save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}") await _save(file, save_path) - + file_size = os.path.getsize(save_path) _use_ollama = use_ollama.lower() == "true" - # 모델 미지정 시 설정에서 가져옴 - if _use_ollama and not ollama_model.strip(): - ollama_model = _load_settings().get("stt_ollama_model", "") - + s = _load_settings() + if _use_ollama and not ollama_model.strip(): ollama_model = s.get("stt_ollama_model","") + append_history({"id": file_id, "type": "stt", "status": "processing", + "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "username": user["username"], + "input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()}, + "settings": {"model": os.getenv("WHISPER_MODEL","medium"), "language": os.getenv("WHISPER_LANGUAGE","ko"), + "compute_type": os.getenv("WHISPER_COMPUTE_TYPE","int8"), "cpu_threads": s.get("cpu_threads",0), + "stt_timeout": s.get("stt_timeout",0), "use_ollama": _use_ollama, + "ollama_model": ollama_model if _use_ollama else ""}, + "output": None}) task = transcribe_task.delay(file_id, save_path, _use_ollama, ollama_model) return {"task_id": task.id, "file_id": file_id, "filename": file.filename} @@ -105,167 +221,160 @@ async def transcribe( # ════════════════════════════════════════════════════════════════ @app.post("/api/ocr") async def ocr( - request: Request, - file: UploadFile = File(...), - mode: str = Form("text"), - backend: str = Form("paddle"), - ollama_model: str = Form(""), - custom_prompt: str = Form(""), + request: Request, file: UploadFile = File(...), + mode: str = Form("text"), backend: str = Form("paddle"), + ollama_model: str = Form(""), custom_prompt: str = Form(""), user: dict = Depends(require_ocr), ): _check_size(request) ext = _ext(file.filename) - if ext not in IMAGE_EXT: - raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(IMAGE_EXT))}") - if mode not in ("text", "structure"): mode = "text" - if backend not in ("paddle", "ollama"): backend = "paddle" - - # 모델 미지정 시 설정에서 가져옴 - if backend == "ollama" and not ollama_model.strip(): - ollama_model = _load_settings().get("ocr_ollama_model", "granite3.2-vision:latest") - - file_id = str(uuid.uuid4()) + if ext not in IMAGE_EXT: raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(IMAGE_EXT))}") + if mode not in ("text","structure"): mode = "text" + if backend not in ("paddle","ollama"): backend = "paddle" + s = _load_settings() + if backend == "ollama" and not ollama_model.strip(): ollama_model = s.get("ocr_ollama_model","granite3.2-vision:latest") + file_id = str(uuid.uuid4()) save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}") await _save(file, save_path) + file_size = os.path.getsize(save_path) + append_history({"id": file_id, "type": "ocr", "status": "processing", + "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "username": user["username"], + "input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()}, + "settings": {"backend": backend, "mode": mode, "ocr_lang": os.getenv("OCR_LANG","korean"), + "ollama_model": ollama_model if backend=="ollama" else "", + "ollama_timeout": s.get("ollama_timeout",600), + "custom_prompt": custom_prompt[:200] if custom_prompt else ""}, + "output": None}) task = ocr_task.delay(file_id, save_path, mode, backend, ollama_model, custom_prompt) - return {"task_id": task.id, "file_id": file_id, - "filename": file.filename, "mode": mode, "backend": backend} + return {"task_id": task.id, "file_id": file_id, "filename": file.filename, "mode": mode, "backend": backend} # ════════════════════════════════════════════════════════════════ -# 작업 상태 / 다운로드 +# 상태 # ════════════════════════════════════════════════════════════════ @app.get("/api/status/{task_id}") def get_status(task_id: str, user: dict = Depends(require_auth)): r = celery_app.AsyncResult(task_id) - if r.state == "PENDING": return {"state": "pending", "progress": 0, "message": "대기 중..."} - if r.state == "PROGRESS": m = r.info or {}; return {"state": "progress","progress": m.get("progress",0),"message": m.get("message","처리 중...")} - if r.state == "SUCCESS": return {"state": "success", "progress": 100, **r.result} - if r.state == "FAILURE": return {"state": "failure", "progress": 0, "message": str(r.info)} - return {"state": r.state.lower(), "progress": 0} + if r.state == "PENDING": return {"state":"pending", "progress":0, "message":"대기 중..."} + if r.state == "PROGRESS": m=r.info or {}; return {"state":"progress","progress":m.get("progress",0),"message":m.get("message","처리 중...")} + if r.state == "SUCCESS": _update_history(task_id, r.result or {}); return {"state":"success","progress":100,**(r.result or {})} + if r.state == "FAILURE": _update_history_fail(task_id, str(r.info)); return {"state":"failure","progress":0,"message":str(r.info)} + return {"state":r.state.lower(),"progress":0} + +# ════════════════════════════════════════════════════════════════ +# 이력 +# ════════════════════════════════════════════════════════════════ +@app.get("/api/history") +def get_history(page: int=1, per_page: int=15, type_: str="", user: dict=Depends(require_auth)): + history = _load_history() + if user.get("role") != "admin": history = [h for h in history if h.get("username")==user["username"]] + if type_ in ("stt","ocr"): history = [h for h in history if h.get("type")==type_] + total = len(history); start = (page-1)*per_page + return {"total":total,"page":page,"per_page":per_page,"items":history[start:start+per_page]} + +@app.delete("/api/history/{history_id}") +def delete_history(history_id: str, user: dict=Depends(require_auth)): + if not delete_history_item(history_id): raise HTTPException(404,"이력을 찾을 수 없습니다") + return {"ok":True} + +@app.delete("/api/history") +def clear_all_history(user: dict=Depends(require_admin)): + clear_history(); return {"ok":True} + + +# ════════════════════════════════════════════════════════════════ +# 다운로드 / Ollama / 설정 / 관리자 +# ════════════════════════════════════════════════════════════════ @app.get("/api/download/{filename}") -def download(filename: str, user: dict = Depends(require_auth)): - if ".." in filename or "/" in filename: - raise HTTPException(400, "잘못된 파일명") +def download(filename: str, user: dict=Depends(require_auth)): + if ".." in filename or "/" in filename: raise HTTPException(400,"잘못된 파일명") path = os.path.join(OUTPUT_DIR, filename) - if not os.path.exists(path): - raise HTTPException(404, "파일을 찾을 수 없습니다") - media = ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" - if filename.endswith(".xlsx") else "text/plain") + if not os.path.exists(path): raise HTTPException(404,"파일을 찾을 수 없습니다") + media = ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" if filename.endswith(".xlsx") else "text/plain") return FileResponse(path, media_type=media, filename=filename) - -# ════════════════════════════════════════════════════════════════ -# Ollama 모델 목록 -# ════════════════════════════════════════════════════════════════ @app.get("/api/ollama/models") -def ollama_models(user: dict = Depends(require_auth)): +def ollama_models(user: dict=Depends(require_auth)): try: - resp = httpx.get(f"{OLLAMA_URL}/api/tags", timeout=8.0) - resp.raise_for_status() - models = [m["name"] for m in resp.json().get("models", [])] - return {"models": models, "connected": True} - except Exception as e: - return {"models": [], "connected": False, "error": str(e)} + resp = httpx.get(f"{OLLAMA_URL}/api/tags", timeout=8.0); resp.raise_for_status() + return {"models":[m["name"] for m in resp.json().get("models",[])], "connected":True} + except Exception as e: return {"models":[], "connected":False, "error":str(e)} - -# ════════════════════════════════════════════════════════════════ -# 설정 -# ════════════════════════════════════════════════════════════════ @app.get("/api/settings") -def get_settings(user: dict = Depends(require_auth)): - return _load_settings() +def get_settings(user: dict=Depends(require_auth)): return _load_settings() @app.post("/api/settings") def save_settings_endpoint( stt_ollama_model: str = Form(""), ocr_ollama_model: str = Form(""), + cpu_threads: str = Form("0"), + stt_timeout: str = Form("0"), + ollama_timeout: str = Form("600"), user: dict = Depends(require_auth), ): - data = {"stt_ollama_model": stt_ollama_model, - "ocr_ollama_model": ocr_ollama_model} + def _int(v, default): + try: return max(0, int(v)) + except: return default + data = { + "stt_ollama_model": stt_ollama_model, + "ocr_ollama_model": ocr_ollama_model, + "cpu_threads": _int(cpu_threads, 0), + "stt_timeout": _int(stt_timeout, 0), + "ollama_timeout": _int(ollama_timeout, 600), + } _save_settings(data) - return {"ok": True, "settings": data} + return {"ok":True, "settings":data} - -# ════════════════════════════════════════════════════════════════ -# 관리자 — 사용자 관리 -# ════════════════════════════════════════════════════════════════ @app.get("/api/admin/users") -def admin_list_users(user: dict = Depends(require_admin)): - return {"users": list_users()} +def admin_list_users(user: dict=Depends(require_admin)): return {"users":list_users()} @app.post("/api/admin/users") -def admin_create_user( - username: str = Form(...), - password: str = Form(...), - perm_stt: str = Form("false"), - perm_ocr: str = Form("false"), - user: dict = Depends(require_admin), -): - perms = {"stt": perm_stt.lower()=="true", "ocr": perm_ocr.lower()=="true"} - ok, msg = create_user(username, password, perms) - if not ok: - raise HTTPException(400, msg) - return {"ok": True, "message": msg} +def admin_create_user(username:str=Form(...),password:str=Form(...),perm_stt:str=Form("false"),perm_ocr:str=Form("false"),user:dict=Depends(require_admin)): + perms={"stt":perm_stt.lower()=="true","ocr":perm_ocr.lower()=="true"} + ok,msg=create_user(username,password,perms) + if not ok: raise HTTPException(400,msg) + return {"ok":True,"message":msg} @app.put("/api/admin/users/{username}") -def admin_update_user( - username: str, - perm_stt: str = Form("false"), - perm_ocr: str = Form("false"), - password: str = Form(""), - user: dict = Depends(require_admin), -): - perms = {"stt": perm_stt.lower()=="true", "ocr": perm_ocr.lower()=="true"} - ok, msg = update_user(username, perms, password or None) - if not ok: - raise HTTPException(400, msg) - return {"ok": True, "message": msg} +def admin_update_user(username:str,perm_stt:str=Form("false"),perm_ocr:str=Form("false"),password:str=Form(""),user:dict=Depends(require_admin)): + perms={"stt":perm_stt.lower()=="true","ocr":perm_ocr.lower()=="true"} + ok,msg=update_user(username,perms,password or None) + if not ok: raise HTTPException(400,msg) + return {"ok":True,"message":msg} @app.delete("/api/admin/users/{username}") -def admin_delete_user(username: str, user: dict = Depends(require_admin)): - ok, msg = delete_user(username) - if not ok: - raise HTTPException(400, msg) - return {"ok": True, "message": msg} +def admin_delete_user(username:str,user:dict=Depends(require_admin)): + ok,msg=delete_user(username) + if not ok: raise HTTPException(400,msg) + return {"ok":True,"message":msg} - -# ════════════════════════════════════════════════════════════════ -# 정리 -# ════════════════════════════════════════════════════════════════ @app.post("/api/cleanup") -def cleanup(user: dict = Depends(require_auth)): - return {"removed": _cleanup_outputs()} +def cleanup(user:dict=Depends(require_auth)): return {"removed":_cleanup_outputs()} # ════════════════════════════════════════════════════════════════ # 유틸 # ════════════════════════════════════════════════════════════════ -def _check_size(request: Request): +def _check_size(request): cl = request.headers.get("content-length") - if cl and int(cl) > MAX_UPLOAD_BYTES: - raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB") + if cl and int(cl) > MAX_UPLOAD_BYTES: raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB") -def _cleanup_outputs() -> int: - if OUTPUT_KEEP_SECS == 0: - return 0 - cutoff = time.time() - OUTPUT_KEEP_SECS - removed = 0 - for f in glob.glob(os.path.join(OUTPUT_DIR, "*")): +def _cleanup_outputs(): + if OUTPUT_KEEP_SECS == 0: return 0 + cutoff = time.time() - OUTPUT_KEEP_SECS; removed = 0 + for f in glob.glob(os.path.join(OUTPUT_DIR,"*")): try: - if os.path.getmtime(f) < cutoff: - os.remove(f); removed += 1 + if os.path.getmtime(f) < cutoff: os.remove(f); removed += 1 except: pass return removed -def _ext(fn): return fn.rsplit(".", 1)[-1].lower() if "." in fn else "" +def _ext(fn): return fn.rsplit(".",1)[-1].lower() if "." in fn else "" -async def _save(file: UploadFile, path: str): +async def _save(file, path): written = 0 - async with aiofiles.open(path, "wb") as f: - while chunk := await file.read(1024 * 1024): + async with aiofiles.open(path,"wb") as f: + while chunk := await file.read(1024*1024): written += len(chunk) if written > MAX_UPLOAD_BYTES: await f.close(); os.remove(path) diff --git a/app/ocr_tasks.py b/app/ocr_tasks.py index c5bc231..ff3c846 100644 --- a/app/ocr_tasks.py +++ b/app/ocr_tasks.py @@ -1,8 +1,5 @@ """ -OCR Celery Tasks -- PaddleOCR 3.x 호환 (use_gpu/show_log/cls 파라미터 제거, 결과구조 변경 반영) -- backend="paddle" → PaddleOCR 로컬 실행 -- backend="ollama" → Ollama Vision API 호출 +OCR Celery Tasks — PaddleOCR 3.x + Ollama Vision """ import os import base64 @@ -16,7 +13,7 @@ REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0") OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs") OCR_LANG = os.getenv("OCR_LANG", "korean") OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434") -OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "180")) +OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "600")) celery_app = Celery("ocr_tasks", broker=REDIS_URL, backend=REDIS_URL) celery_app.conf.update( @@ -27,7 +24,6 @@ celery_app.conf.update( result_expires=3600, ) -# PaddleOCR 싱글톤 _ocr_engine = None _struct_engine = None @@ -36,7 +32,6 @@ def get_ocr(): if _ocr_engine is None: from paddleocr import PaddleOCR print(f"[PaddleOCR] 로딩 (lang={OCR_LANG})") - # PaddleOCR 3.x: use_gpu/show_log 파라미터 제거됨 _ocr_engine = PaddleOCR(use_angle_cls=True, lang=OCR_LANG) print("[PaddleOCR] 완료") return _ocr_engine @@ -51,9 +46,6 @@ def get_structure(): return _struct_engine -# ════════════════════════════════════════════════════════════════ -# 메인 Task -# ════════════════════════════════════════════════════════════════ @celery_app.task(bind=True, name="tasks.ocr_task", queue="ocr") def ocr_task(self, file_id, image_path, mode="text", backend="paddle", ollama_model="granite3.2-vision", custom_prompt=""): @@ -72,9 +64,6 @@ def ocr_task(self, file_id, image_path, mode="text", raise Exception(f"OCR 실패: {str(e)}") -# ════════════════════════════════════════════════════════════════ -# Ollama 백엔드 -# ════════════════════════════════════════════════════════════════ _OLLAMA_PROMPTS = { "text": "이 이미지에서 모든 텍스트를 정확하게 추출해줘. 원본의 줄 구분과 단락 구조를 유지해줘.", "structure": "이 이미지를 분석해서 표는 마크다운 표 형식으로, 나머지 텍스트는 원본 구조를 유지하며 추출해줘.", @@ -91,8 +80,7 @@ def _run_ollama(task, file_id, image_path, mode, ollama_model, custom_prompt): resp = httpx.post(f"{OLLAMA_URL}/api/chat", json={ "model": ollama_model, "messages": [{"role": "user", "content": prompt, "images": [img_b64]}], - "stream": False, - "options": {"temperature": 0.1}, + "stream": False, "options": {"temperature": 0.1}, }, timeout=float(OLLAMA_TIMEOUT)) resp.raise_for_status() except httpx.ConnectError: @@ -121,16 +109,12 @@ def _run_ollama(task, file_id, image_path, mode, ollama_model, custom_prompt): "mode": mode, "backend": "ollama", "ollama_model": ollama_model, "full_text": full_text, "lines": lines, "line_count": len(lines), "txt_file": txt_file, - "tables": [{"html": h, "rows": len(t), - "cols": max(len(r) for r in t) if t else 0} + "tables": [{"html": h, "rows": len(t), "cols": max(len(r) for r in t) if t else 0} for h, t in zip(tables_html, tables)], "xlsx_file": xlsx_file, } -# ════════════════════════════════════════════════════════════════ -# PaddleOCR 백엔드 -# ════════════════════════════════════════════════════════════════ def _run_paddle(task, file_id, image_path, mode): import cv2 img = cv2.imread(image_path) @@ -140,50 +124,38 @@ def _run_paddle(task, file_id, image_path, mode): return _paddle_structure(task, file_id, img) if mode == "structure" \ else _paddle_text(task, file_id, img) - def _paddle_text(task, file_id, img): task.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 인식 중..."}) - # PaddleOCR 3.x: cls 파라미터 제거, 결과 구조 변경 result = get_ocr().ocr(img) task.update_state(state="PROGRESS", meta={"progress": 80, "message": "결과 정리 중..."}) - lines = [] if result and len(result) > 0: r = result[0] - # PaddleOCR 3.x 결과 구조: dict with rec_texts, rec_scores if isinstance(r, dict): texts = r.get("rec_texts", []) scores = r.get("rec_scores", []) for text, conf in zip(texts, scores): if text.strip(): - lines.append({"text": text, - "confidence": round(float(conf), 3), - "bbox": []}) - # 구버전 호환 (list of [bbox, (text, conf)]) + lines.append({"text": text, "confidence": round(float(conf), 3), "bbox": []}) elif isinstance(r, list): for item in r: if item and len(item) == 2: _, (text, conf) = item if text.strip(): - lines.append({"text": text, - "confidence": round(float(conf), 3), - "bbox": []}) - + lines.append({"text": text, "confidence": round(float(conf), 3), "bbox": []}) full_text = "\n".join(l["text"] for l in lines) txt_file = f"{file_id}_ocr.txt" with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f: f.write(full_text) - return {"mode": "text", "backend": "paddle", + return {"mode": "text", "backend": "paddle", "ollama_model": "", "full_text": full_text, "lines": lines, "line_count": len(lines), "txt_file": txt_file, "tables": [], "xlsx_file": None} - def _paddle_structure(task, file_id, img): task.update_state(state="PROGRESS", meta={"progress": 20, "message": "레이아웃 분석 중..."}) result = get_structure()(img) task.update_state(state="PROGRESS", meta={"progress": 60, "message": "표 구조 추출 중..."}) - text_blocks, tables_html, tables_data = [], [], [] for region in result: rtype = region.get("type", "").lower() @@ -197,32 +169,24 @@ def _paddle_structure(task, file_id, img): if isinstance(line, (list, tuple)) and len(line) == 2: _, (text, _conf) = line text_blocks.append(text) - full_text = "\n".join(text_blocks) task.update_state(state="PROGRESS", meta={"progress": 80, "message": "Excel 생성 중..."}) - xlsx_file = None if tables_data: xlsx_file = f"{file_id}_tables.xlsx" _save_excel(tables_data, os.path.join(OUTPUT_DIR, xlsx_file)) - txt_file = f"{file_id}_ocr.txt" with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f: f.write("# 텍스트\n\n" + full_text) - - lines = [{"text": t, "confidence": 1.0, "bbox": []} for t in text_blocks] - tables_meta = [{"html": h, "rows": len(d), - "cols": max(len(r) for r in d) if d else 0} + lines = [{"text": t, "confidence": 1.0, "bbox": []} for t in text_blocks] + tables_meta = [{"html": h, "rows": len(d), "cols": max(len(r) for r in d) if d else 0} for h, d in zip(tables_html, tables_data)] - return {"mode": "structure", "backend": "paddle", + return {"mode": "structure", "backend": "paddle", "ollama_model": "", "full_text": full_text, "lines": lines, "line_count": len(lines), "txt_file": txt_file, "tables": tables_meta, "xlsx_file": xlsx_file} -# ════════════════════════════════════════════════════════════════ -# 공통 유틸 -# ════════════════════════════════════════════════════════════════ def _parse_md_tables(text): tables, current = [], [] for line in text.splitlines(): @@ -241,8 +205,7 @@ def _md_table_to_html(table): rows = "" for i, row in enumerate(table): tag = "th" if i == 0 else "td" - cells = "".join(f"<{tag}>{c}" for c in row) - rows += f"{cells}" + rows += "" + "".join(f"<{tag}>{c}" for c in row) + "" return f"{rows}
" def _html_table_to_list(html): @@ -252,11 +215,10 @@ def _html_table_to_list(html): super().__init__() self.rows, self._row, self._cell, self._in = [], [], [], False def handle_starttag(self, tag, attrs): - if tag == "tr": self._row = [] + if tag == "tr": self._row = [] elif tag in ("td","th"): self._cell = []; self._in = True def handle_endtag(self, tag): - if tag in ("td","th"): - self._row.append("".join(self._cell).strip()); self._in = False + if tag in ("td","th"): self._row.append("".join(self._cell).strip()); self._in = False elif tag == "tr": if self._row: self.rows.append(self._row) def handle_data(self, data): @@ -264,18 +226,16 @@ def _html_table_to_list(html): p = P(); p.feed(html); return p.rows def _save_excel(tables, path): - wb = openpyxl.Workbook() - wb.remove(wb.active) + wb = openpyxl.Workbook(); wb.remove(wb.active) for i, table in enumerate(tables, 1): - ws = wb.create_sheet(f"표 {i}") + ws = wb.create_sheet(f"표 {i}") thin = Side(style="thin", color="2A2A33") bdr = Border(left=thin, right=thin, top=thin, bottom=thin) for r_idx, row in enumerate(table, 1): for c_idx, val in enumerate(row, 1): cell = ws.cell(row=r_idx, column=c_idx, value=val) - cell.border = bdr - cell.alignment = Alignment(horizontal="center", - vertical="center", wrap_text=True) + cell.border = bdr + cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True) if r_idx == 1: cell.fill = PatternFill("solid", fgColor="1A1A2E") cell.font = Font(color="00E5A0", bold=True, size=10) diff --git a/app/requirements.txt b/app/requirements.txt index 65bc35e..7f5133a 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -6,7 +6,7 @@ redis==5.0.8 faster-whisper==1.0.3 aiofiles==23.2.1 -# 인증 (bcrypt 제거 — 직접 비교 방식 사용) +# 인증 python-jose[cryptography]==3.3.0 # PaddleOCR 3.x @@ -19,3 +19,6 @@ httpx>=0.27.0 # Excel 출력 openpyxl==3.1.2 Pillow>=10.0.0 + +# 시스템 모니터링 +psutil>=5.9.0 diff --git a/app/static/index.html b/app/static/index.html index 773ca3a..8514a0f 100644 --- a/app/static/index.html +++ b/app/static/index.html @@ -2,8 +2,8 @@ - -VoiceScript — STT & OCR + +VoiceScript @@ -244,7 +323,7 @@ textarea.cprompt:focus{border-color:#7c6cd4}
-
+
@@ -255,22 +334,29 @@ textarea.cprompt:focus{border-color:#7c6cd4}

VoiceScript

+
+ RAM +
+ + +
- + - +
- - + +
+
+
+

📋 변환 이력

+
+ + + +
+ + +
+
불러오는 중...
+ +
+
+ +
-
-

설정

+
+

⚙️ 설정

- +
+ +
+

📊 시스템 리소스

+
+
RAM 사용량
로딩 중
+
CPU 사용률
로딩 중
+
Swap
+
CPU 스레드
worker 재시작 후 반영
+
+
+ + +
+

🖥️ CPU 스레드

+ +
+ + 0 (자동) +
+
+ + +
+

⏱️ 변환 타임아웃

+
+
+ + +
0 = 무제한 · 대용량 파일은 크게 설정
+
+
+ + +
11b 이상 모델은 300+ 권장
+
+
+
+ +

🎙 STT Ollama 후처리 기본 모델

-
-
- Whisper 변환 후 Ollama로 교정할 때 사용할 기본 모델 - STT 페이지에서 모델 미선택 시 이 모델이 사용됩니다 -
-
- + +
+

🔍 OCR Ollama 기본 모델

-
-
- OCR에서 Ollama Vision 엔진 선택 시 사용할 기본 모델 - OCR 페이지에서 모델 미선택 시 이 모델이 사용됩니다 -
-
- + +
-
- - +
+ +
- +
-

👤 사용자 관리

- - +

👤 사용자 관리

-
-

사용자 목록

- -
- - - - - - - - - - - -
사용자명역할STTOCR관리
+

사용자 목록

+
사용자명역할STTOCR관리
- -

신규 사용자 추가

-
- - -
-
- - -
-
-
- -
- - -
-
-
- +
+
+
+
@@ -530,601 +594,300 @@ textarea.cprompt:focus{border-color:#7c6cd4} diff --git a/app/tasks.py b/app/tasks.py index cb9952f..d24d8fb 100644 --- a/app/tasks.py +++ b/app/tasks.py @@ -1,7 +1,7 @@ import os import httpx from celery import Celery -from ocr_tasks import ocr_task # noqa: F401 — worker에 등록 +from ocr_tasks import ocr_task # noqa: F401 REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0") MODEL_SIZE = os.getenv("WHISPER_MODEL", "medium") @@ -12,7 +12,10 @@ BEAM_SIZE = int(os.getenv("WHISPER_BEAM_SIZE", "5")) INITIAL_PROMPT = os.getenv("WHISPER_INITIAL_PROMPT", "") or None OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs") OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434") -OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "180")) +OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "600")) + +_cpu_threads_env = int(os.getenv("CPU_THREADS", "0")) +CPU_THREADS = _cpu_threads_env if _cpu_threads_env > 0 else None # None = auto celery_app = Celery("whisper_tasks", broker=REDIS_URL, backend=REDIS_URL) celery_app.conf.update( @@ -29,15 +32,16 @@ def get_model(): global _model if _model is None: from faster_whisper import WhisperModel - print(f"[Whisper] 로딩: {MODEL_SIZE} / {DEVICE} / {COMPUTE_TYPE}") - _model = WhisperModel(MODEL_SIZE, device=DEVICE, compute_type=COMPUTE_TYPE) + kwargs = dict(device=DEVICE, compute_type=COMPUTE_TYPE) + if CPU_THREADS is not None: + kwargs["cpu_threads"] = CPU_THREADS + print(f"[Whisper] 로딩: {MODEL_SIZE} / {DEVICE} / {COMPUTE_TYPE} / threads={CPU_THREADS or 'auto'}") + _model = WhisperModel(MODEL_SIZE, **kwargs) print("[Whisper] 로드 완료") return _model -# ── Ollama 후처리 ───────────────────────────────────────────── def _ollama_postprocess(text: str, model: str) -> str: - """Whisper 결과를 Ollama로 후처리 (문장부호·맞춤법·자연스러운 문장)""" if not model or not text.strip(): return text prompt = ( @@ -49,33 +53,22 @@ def _ollama_postprocess(text: str, model: str) -> str: try: resp = httpx.post( f"{OLLAMA_URL}/api/chat", - json={ - "model": model, - "messages": [{"role": "user", "content": prompt}], - "stream": False, - "options": {"temperature": 0.1}, - }, + json={"model": model, + "messages": [{"role": "user", "content": prompt}], + "stream": False, "options": {"temperature": 0.1}}, timeout=float(OLLAMA_TIMEOUT), ) resp.raise_for_status() result = resp.json().get("message", {}).get("content", "").strip() return result if result else text except Exception as e: - print(f"[Ollama 후처리 실패] {e} — 원본 텍스트 사용") + print(f"[Ollama 후처리 실패] {e}") return text -# ════════════════════════════════════════════════════════════════ -# STT Celery Task -# ════════════════════════════════════════════════════════════════ @celery_app.task(bind=True, name="tasks.transcribe_task", queue="stt") -def transcribe_task( - self, - file_id: str, - audio_path: str, - use_ollama: bool = False, - ollama_model: str = "", -): +def transcribe_task(self, file_id: str, audio_path: str, + use_ollama: bool = False, ollama_model: str = ""): self.update_state(state="PROGRESS", meta={"progress": 5, "message": "모델 준비 중..."}) try: model = get_model() @@ -97,8 +90,8 @@ def transcribe_task( duration = info.duration for seg in segments_gen: - segments.append({"start": round(seg.start,2), - "end": round(seg.end,2), + segments.append({"start": round(seg.start, 2), + "end": round(seg.end, 2), "text": seg.text.strip()}) parts.append(seg.text.strip()) if duration > 0: @@ -112,7 +105,6 @@ def transcribe_task( raw_text = "\n".join(parts) full_text = raw_text - # Ollama 후처리 if use_ollama and ollama_model: self.update_state(state="PROGRESS", meta={"progress": 85, diff --git a/docker-compose.yml b/docker-compose.yml index 99ffd7d..e00dd0e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,7 +3,6 @@ services: image: redis:7-alpine container_name: whisper_redis restart: unless-stopped - # RDB 스냅샷 저장 실패 시에도 쓰기 허용 (Celery 브로커 용도) command: redis-server --stop-writes-on-bgsave-error no environment: - TZ=Asia/Seoul @@ -19,13 +18,13 @@ services: container_name: whisper_app restart: unless-stopped ports: - - "8800:8000" # 호스트 Nginx가 리버스 프록시 + - "8800:8000" environment: - TZ=Asia/Seoul # ── 인증 (반드시 변경) ────────────────────────────── - - AUTH_USERNAME=byun - - AUTH_PASSWORD=admin + - AUTH_USERNAME=admin + - AUTH_PASSWORD=changeme1234 - JWT_SECRET=your-very-secret-key-change-this - JWT_EXPIRE_HOURS=12 @@ -33,24 +32,31 @@ services: - REDIS_URL=redis://redis:6379/0 - UPLOAD_DIR=/data/uploads - OUTPUT_DIR=/data/outputs - - WHISPER_MODEL=medium # tiny/base/small/medium/large-v3 + - WHISPER_MODEL=medium - WHISPER_DEVICE=cpu - WHISPER_COMPUTE_TYPE=int8 - WHISPER_LANGUAGE=ko - WHISPER_BEAM_SIZE=5 - - WHISPER_INITIAL_PROMPT= # 예: "고객 상담 녹취록입니다." + - WHISPER_INITIAL_PROMPT= + + # ── 타임아웃 ───────────────────────────────────────── + # STT: Celery 태스크 소프트 타임아웃 (초) — 0=무제한 + - STT_TIMEOUT=0 + # Ollama: Vision/후처리 API 응답 대기 (초) + - OLLAMA_TIMEOUT=600 # ── 파일 관리 ──────────────────────────────────────── - MAX_UPLOAD_MB=500 - OUTPUT_KEEP_HOURS=48 # ── PaddleOCR ──────────────────────────────────────── - - OCR_LANG=korean # korean/en/japan/chinese_cht/ch + - OCR_LANG=korean - # ── Ollama OCR ─────────────────────────────────────── - # 호스트 실제 LAN IP 사용 (host.docker.internal은 Linux에서 불안정) + # ── Ollama ─────────────────────────────────────────── - OLLAMA_URL=http://192.168.0.126:11434 - - OLLAMA_TIMEOUT=600 # 11b 이상 모델은 300 이상 권장 + + # ── CPU 스레드 ─────────────────────────────────────── + - CPU_THREADS=0 volumes: - stt_data:/data @@ -67,9 +73,6 @@ services: dockerfile: Dockerfile container_name: whisper_worker restart: unless-stopped - # --pool=solo : CTranslate2(faster-whisper)가 prefork 방식과 충돌(SIGSEGV) 발생 - # solo 모드로 포크 없이 실행하여 해결 - # --max-tasks-per-child=50 : Whisper/Paddle 모델 메모리 누수 방지 command: > celery -A tasks worker --loglevel=info @@ -87,11 +90,13 @@ services: - WHISPER_LANGUAGE=ko - WHISPER_BEAM_SIZE=5 - WHISPER_INITIAL_PROMPT= + - STT_TIMEOUT=0 + - OLLAMA_TIMEOUT=600 - MAX_UPLOAD_MB=500 - OUTPUT_KEEP_HOURS=48 - OCR_LANG=korean - OLLAMA_URL=http://192.168.0.126:11434 - - OLLAMA_TIMEOUT=600 + - CPU_THREADS=0 - JWT_SECRET=your-very-secret-key-change-this volumes: - stt_data:/data diff --git a/docker-compose.yml.bak1 b/docker-compose.yml.bak1 new file mode 100644 index 0000000..9baf9b0 --- /dev/null +++ b/docker-compose.yml.bak1 @@ -0,0 +1,114 @@ +services: + redis: + image: redis:7-alpine + container_name: whisper_redis + restart: unless-stopped + command: redis-server --stop-writes-on-bgsave-error no + environment: + - TZ=Asia/Seoul + volumes: + - redis_data:/data + networks: + - whisper_net + + app: + build: + context: ./app + dockerfile: Dockerfile + container_name: whisper_app + restart: unless-stopped + ports: + - "8800:8000" + environment: + - TZ=Asia/Seoul + + # ── 인증 (반드시 변경) ────────────────────────────── + - AUTH_USERNAME=admin + - AUTH_PASSWORD=changeme1234 + - JWT_SECRET=your-very-secret-key-change-this + - JWT_EXPIRE_HOURS=12 + + # ── Whisper STT ───────────────────────────────────── + - REDIS_URL=redis://redis:6379/0 + - UPLOAD_DIR=/data/uploads + - OUTPUT_DIR=/data/outputs + - WHISPER_MODEL=medium + - WHISPER_DEVICE=cpu + - WHISPER_COMPUTE_TYPE=int8 + - WHISPER_LANGUAGE=ko + - WHISPER_BEAM_SIZE=5 + - WHISPER_INITIAL_PROMPT= + + # ── CPU 스레드 설정 ────────────────────────────────── + # 5825u: 8코어 16스레드 → 8~12 권장 + # 0 = 자동(시스템 전체 코어 사용) + - CPU_THREADS=0 + + # ── 파일 관리 ──────────────────────────────────────── + - MAX_UPLOAD_MB=500 + - OUTPUT_KEEP_HOURS=48 + + # ── PaddleOCR ──────────────────────────────────────── + - OCR_LANG=korean + + # ── Ollama ─────────────────────────────────────────── + - OLLAMA_URL=http://192.168.0.126:11434 + - OLLAMA_TIMEOUT=600 + + volumes: + - stt_data:/data + - whisper_models:/root/.cache/huggingface + - paddle_models:/root/.paddlex + depends_on: + - redis + networks: + - whisper_net + + worker: + build: + context: ./app + dockerfile: Dockerfile + container_name: whisper_worker + restart: unless-stopped + command: > + celery -A tasks worker + --loglevel=info + --pool=solo + --max-tasks-per-child=50 + -Q stt,ocr + environment: + - TZ=Asia/Seoul + - REDIS_URL=redis://redis:6379/0 + - UPLOAD_DIR=/data/uploads + - OUTPUT_DIR=/data/outputs + - WHISPER_MODEL=medium + - WHISPER_DEVICE=cpu + - WHISPER_COMPUTE_TYPE=int8 + - WHISPER_LANGUAGE=ko + - WHISPER_BEAM_SIZE=5 + - WHISPER_INITIAL_PROMPT= + - CPU_THREADS=0 + - MAX_UPLOAD_MB=500 + - OUTPUT_KEEP_HOURS=48 + - OCR_LANG=korean + - OLLAMA_URL=http://192.168.0.126:11434 + - OLLAMA_TIMEOUT=600 + - JWT_SECRET=your-very-secret-key-change-this + volumes: + - stt_data:/data + - whisper_models:/root/.cache/huggingface + - paddle_models:/root/.paddlex + depends_on: + - redis + networks: + - whisper_net + +volumes: + redis_data: + stt_data: + whisper_models: + paddle_models: + +networks: + whisper_net: + driver: bridge diff --git a/whisper-stt.tar.gz b/whisper-stt.tar.gz new file mode 100755 index 0000000..c19654a Binary files /dev/null and b/whisper-stt.tar.gz differ