diff --git a/app/Dockerfile b/app/Dockerfile index a2bbb0c..6da8ab2 100644 --- a/app/Dockerfile +++ b/app/Dockerfile @@ -9,8 +9,6 @@ RUN apt-get update && apt-get install -y \ libxext6 \ libxrender1 \ libgl1 \ - libgles2 \ - libegl1 \ wget \ curl \ && rm -rf /var/lib/apt/lists/* @@ -19,8 +17,11 @@ WORKDIR /app COPY requirements.txt . -RUN pip install --no-cache-dir paddlepaddle==3.0.0 +# PaddlePaddle CPU (AMD64) — paddleocr 3.x 호환 +RUN pip install --no-cache-dir paddlepaddle==3.0.0 \ + -i https://pypi.tuna.tsinghua.edu.cn/simple +# 나머지 패키지 RUN pip install --no-cache-dir -r requirements.txt COPY . . diff --git a/app/main.py b/app/main.py index 0d510a0..8095b92 100644 --- a/app/main.py +++ b/app/main.py @@ -32,11 +32,16 @@ AUDIO_EXT = {"mp3","mp4","wav","m4a","ogg","flac","aac","wma","webm","mkv","avi" IMAGE_EXT = {"jpg","jpeg","png","bmp","tiff","tif","webp","gif"} _DEFAULT_SETTINGS = { - "stt_ollama_model": "", - "ocr_ollama_model": "granite3.2-vision:latest", - "cpu_threads": 0, - "stt_timeout": 0, - "ollama_timeout": 600, + "stt_ollama_model": "", + "ocr_ollama_model": "granite3.2-vision:latest", + "cpu_threads": 0, + "stt_timeout": 0, + "ollama_timeout": 600, + # OpenRouter + "openrouter_url": "https://openrouter.ai/api/v1", + "openrouter_api_key": "", + "openrouter_stt_model": "", + "openrouter_ocr_model": "", } _hist_lock = threading.Lock() @@ -50,7 +55,8 @@ def _load_settings() -> dict: def _save_settings(data: dict): SETTINGS_FILE.parent.mkdir(parents=True, exist_ok=True) - with open(SETTINGS_FILE, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2) + with open(SETTINGS_FILE, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=2) # ── 이력 I/O ───────────────────────────────────────────────── @@ -77,19 +83,15 @@ def append_history(record: dict): except: pass def _update_history_by_task(task_id: str, result: dict, success: bool, error_msg: str = ""): - """task_id로 이력을 찾아 결과 업데이트 — 핵심 버그 수정""" with _hist_lock: if not HISTORY_FILE.exists(): return try: with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f) for h in history: - # task_id 필드로 매칭 if h.get("task_id") != task_id: continue if h.get("status") != "processing": break if not success: - h["status"] = "failed" - h["output"] = {"error": error_msg[:300]} - break + h["status"] = "failed"; h["output"] = {"error": error_msg[:300]}; break h["status"] = "success" if h["type"] == "stt": h["output"] = { @@ -97,12 +99,14 @@ def _update_history_by_task(task_id: str, result: dict, success: bool, error_msg "language": result.get("language", ""), "duration_s": result.get("duration", 0), "segments": len(result.get("segments", [])), - "text_preview": result.get("text", "")[:200] + ("…" if len(result.get("text",""))>200 else ""), + "text_preview": result.get("text","")[:200] + ("…" if len(result.get("text",""))>200 else ""), "ollama_used": result.get("ollama_used", False), "ollama_model": result.get("ollama_model", ""), + "openrouter_used": result.get("openrouter_used", False), + "openrouter_model": result.get("openrouter_model", ""), } else: - full_text = result.get("full_text", "") + ft = result.get("full_text", "") h["output"] = { "txt_file": result.get("txt_file", ""), "xlsx_file": result.get("xlsx_file", ""), @@ -110,7 +114,8 @@ def _update_history_by_task(task_id: str, result: dict, success: bool, error_msg "table_count": len(result.get("tables", [])), "backend": result.get("backend", ""), "ollama_model": result.get("ollama_model", ""), - "text_preview": full_text[:200] + ("…" if len(full_text)>200 else ""), + "openrouter_model": result.get("openrouter_model", ""), + "text_preview": ft[:200] + ("…" if len(ft)>200 else ""), } break _write_history(history) @@ -132,12 +137,11 @@ def clear_history(): # ════════════════════════════════════════════════════════════════ -# 시작 이벤트 +# 시작 # ════════════════════════════════════════════════════════════════ @app.on_event("startup") async def on_startup(): - init_users() - _cleanup_outputs() + init_users(); _cleanup_outputs() # ════════════════════════════════════════════════════════════════ @@ -183,7 +187,10 @@ def system_info(user: dict = Depends(require_auth)): @app.post("/api/transcribe") async def transcribe( request: Request, file: UploadFile = File(...), - use_ollama: str = Form("false"), ollama_model: str = Form(""), + use_ollama: str = Form("false"), + ollama_model: str = Form(""), + use_openrouter: str = Form("false"), + openrouter_model: str = Form(""), user: dict = Depends(require_stt), ): _check_size(request) @@ -193,29 +200,39 @@ async def transcribe( save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}") await _save(file, save_path) file_size = os.path.getsize(save_path) - _use_ollama = use_ollama.lower() == "true" + s = _load_settings() - if _use_ollama and not ollama_model.strip(): ollama_model = s.get("stt_ollama_model", "") + _use_ollama = use_ollama.lower() == "true" + _use_openrouter = use_openrouter.lower() == "true" - task = transcribe_task.delay(file_id, save_path, _use_ollama, ollama_model) + if _use_ollama and not ollama_model.strip(): + ollama_model = s.get("stt_ollama_model", "") + if _use_openrouter and not openrouter_model.strip(): + openrouter_model = s.get("openrouter_stt_model", "") + + task = transcribe_task.delay( + file_id, save_path, + _use_ollama, ollama_model, + _use_openrouter, openrouter_model, + s.get("openrouter_url", ""), s.get("openrouter_api_key", ""), + ) - # ★ task_id를 이력에 함께 저장 append_history({ - "id": file_id, - "task_id": task.id, # ← 업데이트 매칭 키 - "type": "stt", - "status": "processing", + "id": file_id, "task_id": task.id, "type": "stt", + "status": "processing", "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - "username": user["username"], - "input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()}, - "settings": { - "model": os.getenv("WHISPER_MODEL", "medium"), - "language": os.getenv("WHISPER_LANGUAGE", "ko"), - "compute_type": os.getenv("WHISPER_COMPUTE_TYPE", "int8"), - "cpu_threads": s.get("cpu_threads", 0), - "stt_timeout": s.get("stt_timeout", 0), - "use_ollama": _use_ollama, - "ollama_model": ollama_model if _use_ollama else "", + "username": user["username"], + "input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()}, + "settings": { + "model": os.getenv("WHISPER_MODEL","medium"), + "language": os.getenv("WHISPER_LANGUAGE","ko"), + "compute_type": os.getenv("WHISPER_COMPUTE_TYPE","int8"), + "cpu_threads": s.get("cpu_threads",0), + "stt_timeout": s.get("stt_timeout",0), + "use_ollama": _use_ollama, + "ollama_model": ollama_model if _use_ollama else "", + "use_openrouter": _use_openrouter, + "openrouter_model": openrouter_model if _use_openrouter else "", }, "output": None, }) @@ -228,211 +245,297 @@ async def transcribe( @app.post("/api/ocr") async def ocr( request: Request, file: UploadFile = File(...), - mode: str = Form("text"), backend: str = Form("paddle"), - ollama_model: str = Form(""), custom_prompt: str = Form(""), + mode: str = Form("text"), + backend: str = Form("paddle"), # paddle | ollama | openrouter + ollama_model: str = Form(""), + openrouter_model: str = Form(""), + custom_prompt: str = Form(""), user: dict = Depends(require_ocr), ): _check_size(request) ext = _ext(file.filename) if ext not in IMAGE_EXT: raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(IMAGE_EXT))}") if mode not in ("text","structure"): mode = "text" - if backend not in ("paddle","ollama"): backend = "paddle" + if backend not in ("paddle","ollama","openrouter"): backend = "paddle" + s = _load_settings() - if backend == "ollama" and not ollama_model.strip(): ollama_model = s.get("ocr_ollama_model","granite3.2-vision:latest") + if backend == "ollama" and not ollama_model.strip(): + ollama_model = s.get("ocr_ollama_model","granite3.2-vision:latest") + if backend == "openrouter" and not openrouter_model.strip(): + openrouter_model = s.get("openrouter_ocr_model","") + file_id = str(uuid.uuid4()) save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}") await _save(file, save_path) file_size = os.path.getsize(save_path) - task = ocr_task.delay(file_id, save_path, mode, backend, ollama_model, custom_prompt) + task = ocr_task.delay( + file_id, save_path, mode, backend, + ollama_model, openrouter_model, + s.get("openrouter_url",""), s.get("openrouter_api_key",""), + custom_prompt, + ) - # ★ task_id를 이력에 함께 저장 append_history({ - "id": file_id, - "task_id": task.id, # ← 업데이트 매칭 키 - "type": "ocr", - "status": "processing", + "id": file_id, "task_id": task.id, "type": "ocr", + "status": "processing", "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - "username": user["username"], - "input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()}, - "settings": { - "backend": backend, - "mode": mode, - "ocr_lang": os.getenv("OCR_LANG", "korean"), - "ollama_model": ollama_model if backend == "ollama" else "", - "ollama_timeout":s.get("ollama_timeout", 600), - "custom_prompt": custom_prompt[:200] if custom_prompt else "", + "username": user["username"], + "input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()}, + "settings": { + "backend": backend, + "mode": mode, + "ocr_lang": os.getenv("OCR_LANG","korean"), + "ollama_model": ollama_model if backend=="ollama" else "", + "openrouter_model": openrouter_model if backend=="openrouter" else "", + "ollama_timeout": s.get("ollama_timeout",600), + "custom_prompt": custom_prompt[:200] if custom_prompt else "", }, "output": None, }) - return {"task_id": task.id, "file_id": file_id, "filename": file.filename, "mode": mode, "backend": backend} + return {"task_id": task.id, "file_id": file_id, + "filename": file.filename, "mode": mode, "backend": backend} # ════════════════════════════════════════════════════════════════ -# 상태 — task_id 기준으로 이력 업데이트 +# 상태 # ════════════════════════════════════════════════════════════════ @app.get("/api/status/{task_id}") def get_status(task_id: str, user: dict = Depends(require_auth)): r = celery_app.AsyncResult(task_id) - if r.state == "PENDING": - return {"state": "pending", "progress": 0, "message": "대기 중..."} - if r.state == "PROGRESS": - m = r.info or {} - return {"state": "progress", "progress": m.get("progress",0), "message": m.get("message","처리 중...")} - if r.state == "SUCCESS": - result = r.result or {} - # ★ task_id로 이력 업데이트 (file_id 아님) - _update_history_by_task(task_id, result, success=True) - return {"state": "success", "progress": 100, **result} - if r.state == "FAILURE": - _update_history_by_task(task_id, {}, success=False, error_msg=str(r.info)) - return {"state": "failure", "progress": 0, "message": str(r.info)} - return {"state": r.state.lower(), "progress": 0} + if r.state == "PENDING": return {"state":"pending", "progress":0, "message":"대기 중..."} + if r.state == "PROGRESS": m=r.info or {}; return {"state":"progress","progress":m.get("progress",0),"message":m.get("message","처리 중...")} + if r.state == "SUCCESS": _update_history_by_task(task_id, r.result or {}, True); return {"state":"success","progress":100,**(r.result or {})} + if r.state == "FAILURE": _update_history_by_task(task_id, {}, False, str(r.info)); return {"state":"failure","progress":0,"message":str(r.info)} + return {"state":r.state.lower(),"progress":0} # ════════════════════════════════════════════════════════════════ # 이력 # ════════════════════════════════════════════════════════════════ @app.get("/api/history") -def get_history(page: int=1, per_page: int=15, type_: str="", user: dict=Depends(require_auth)): +def get_history(page:int=1,per_page:int=15,type_:str="",user:dict=Depends(require_auth)): history = _load_history() if user.get("role") != "admin": history = [h for h in history if h.get("username")==user["username"]] if type_ in ("stt","ocr"): history = [h for h in history if h.get("type")==type_] total = len(history); start = (page-1)*per_page - return {"total": total, "page": page, "per_page": per_page, "items": history[start:start+per_page]} + return {"total":total,"page":page,"per_page":per_page,"items":history[start:start+per_page]} @app.delete("/api/history/{history_id}") -def delete_history(history_id: str, user: dict=Depends(require_auth)): - if not delete_history_item(history_id): raise HTTPException(404, "이력을 찾을 수 없습니다") - return {"ok": True} +def delete_history(history_id:str,user:dict=Depends(require_auth)): + if not delete_history_item(history_id): raise HTTPException(404,"이력을 찾을 수 없습니다") + return {"ok":True} @app.delete("/api/history") -def clear_all_history(user: dict=Depends(require_admin)): - clear_history(); return {"ok": True} +def clear_all_history(user:dict=Depends(require_admin)): + clear_history(); return {"ok":True} # ════════════════════════════════════════════════════════════════ -# 다운로드 / Ollama / 설정 / 관리자 +# 다운로드 # ════════════════════════════════════════════════════════════════ @app.get("/api/download/{filename}") -def download(filename: str, user: dict=Depends(require_auth)): - if ".." in filename or "/" in filename: raise HTTPException(400, "잘못된 파일명") +def download(filename:str,user:dict=Depends(require_auth)): + if ".." in filename or "/" in filename: raise HTTPException(400,"잘못된 파일명") path = os.path.join(OUTPUT_DIR, filename) - if not os.path.exists(path): raise HTTPException(404, "파일을 찾을 수 없습니다") + if not os.path.exists(path): raise HTTPException(404,"파일을 찾을 수 없습니다") media = ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" if filename.endswith(".xlsx") else "text/plain") return FileResponse(path, media_type=media, filename=filename) + +# ════════════════════════════════════════════════════════════════ +# Ollama 모델 목록 +# ════════════════════════════════════════════════════════════════ @app.get("/api/ollama/models") -def ollama_models(user: dict=Depends(require_auth)): +def ollama_models(user:dict=Depends(require_auth)): try: resp = httpx.get(f"{OLLAMA_URL}/api/tags", timeout=8.0); resp.raise_for_status() - return {"models": [m["name"] for m in resp.json().get("models",[])], "connected": True} + return {"models":[m["name"] for m in resp.json().get("models",[])], "connected":True} except Exception as e: - return {"models": [], "connected": False, "error": str(e)} + return {"models":[], "connected":False, "error":str(e)} + +# ════════════════════════════════════════════════════════════════ +# OpenRouter 모델 목록 & 연결 테스트 +# ════════════════════════════════════════════════════════════════ +@app.get("/api/openrouter/models") +def openrouter_models(user: dict = Depends(require_auth)): + s = _load_settings() + api_key = s.get("openrouter_api_key", "") + base_url = s.get("openrouter_url", "https://openrouter.ai/api/v1").rstrip("/") + if not api_key: + return {"models": [], "connected": False, "error": "API 키가 설정되지 않았습니다"} + try: + resp = httpx.get( + f"{base_url}/models", + headers={"Authorization": f"Bearer {api_key}", + "HTTP-Referer": "https://voicescript.local"}, + timeout=12.0, + ) + resp.raise_for_status() + data = resp.json() + # Vision 모델 필터링 (multimodal 지원 모델) + all_models = data.get("data", []) + vision = [m["id"] for m in all_models + if any(k in str(m.get("architecture", {}).get("modality","")).lower() + for k in ["image","vision","multimodal"]) + or any(k in m["id"].lower() + for k in ["vision","claude-3","gemini","gpt-4o","llava","pixtral","qwen-vl","intern","deepseek-vl"])] + text = [m["id"] for m in all_models if m["id"] not in vision] + return { + "models": [m["id"] for m in all_models], + "vision_models": vision, + "text_models": text, + "connected": True, + "total": len(all_models), + } + except httpx.HTTPStatusError as e: + return {"models":[], "connected":False, "error":f"HTTP {e.response.status_code}: API 키를 확인하세요"} + except Exception as e: + return {"models":[], "connected":False, "error":str(e)} + +@app.post("/api/openrouter/test") +def openrouter_test( + api_key: str = Form(...), + base_url: str = Form("https://openrouter.ai/api/v1"), + user: dict = Depends(require_auth), +): + """API 키 연결 테스트""" + try: + resp = httpx.get( + f"{base_url.rstrip('/')}/models", + headers={"Authorization": f"Bearer {api_key}", + "HTTP-Referer": "https://voicescript.local"}, + timeout=10.0, + ) + resp.raise_for_status() + count = len(resp.json().get("data", [])) + return {"ok": True, "message": f"연결 성공 — {count}개 모델 사용 가능"} + except httpx.HTTPStatusError as e: + return {"ok": False, "message": f"인증 실패 (HTTP {e.response.status_code}) — API 키를 확인하세요"} + except Exception as e: + return {"ok": False, "message": f"연결 실패: {str(e)}"} + + +# ════════════════════════════════════════════════════════════════ +# 설정 +# ════════════════════════════════════════════════════════════════ @app.get("/api/settings") -def get_settings(user: dict=Depends(require_auth)): return _load_settings() +def get_settings(user: dict = Depends(require_auth)): + s = _load_settings() + # API 키는 마스킹해서 반환 + result = dict(s) + if result.get("openrouter_api_key"): + key = result["openrouter_api_key"] + result["openrouter_api_key_masked"] = key[:8] + "..." + key[-4:] if len(key) > 12 else "****" + else: + result["openrouter_api_key_masked"] = "" + result["openrouter_api_key"] = "" # 평문은 반환 안 함 + return result @app.post("/api/settings") def save_settings_endpoint( - stt_ollama_model: str = Form(""), - ocr_ollama_model: str = Form(""), - cpu_threads: str = Form("0"), - stt_timeout: str = Form("0"), - ollama_timeout: str = Form("600"), + stt_ollama_model: str = Form(""), + ocr_ollama_model: str = Form(""), + cpu_threads: str = Form("0"), + stt_timeout: str = Form("0"), + ollama_timeout: str = Form("600"), + openrouter_url: str = Form("https://openrouter.ai/api/v1"), + openrouter_api_key: str = Form(""), + openrouter_stt_model: str = Form(""), + openrouter_ocr_model: str = Form(""), user: dict = Depends(require_auth), ): def _int(v, d): try: return max(0, int(v)) except: return d + + current = _load_settings() + # API 키가 비어있으면 기존 값 유지 + final_key = openrouter_api_key.strip() if openrouter_api_key.strip() else current.get("openrouter_api_key","") + data = { - "stt_ollama_model": stt_ollama_model, - "ocr_ollama_model": ocr_ollama_model, - "cpu_threads": _int(cpu_threads, 0), - "stt_timeout": _int(stt_timeout, 0), - "ollama_timeout": _int(ollama_timeout, 600), + "stt_ollama_model": stt_ollama_model, + "ocr_ollama_model": ocr_ollama_model, + "cpu_threads": _int(cpu_threads, 0), + "stt_timeout": _int(stt_timeout, 0), + "ollama_timeout": _int(ollama_timeout, 600), + "openrouter_url": openrouter_url.strip() or "https://openrouter.ai/api/v1", + "openrouter_api_key": final_key, + "openrouter_stt_model": openrouter_stt_model, + "openrouter_ocr_model": openrouter_ocr_model, } _save_settings(data) - return {"ok": True, "settings": data} + return {"ok": True, "settings": {k: v for k, v in data.items() if k != "openrouter_api_key"}} + +# ════════════════════════════════════════════════════════════════ +# 관리자 +# ════════════════════════════════════════════════════════════════ @app.get("/api/admin/users") -def admin_list_users(user: dict=Depends(require_admin)): return {"users": list_users()} +def admin_list_users(user:dict=Depends(require_admin)): return {"users":list_users()} @app.post("/api/admin/users") def admin_create_user( - username: str = Form(...), - password: str = Form(...), - perm_stt: str = Form("false"), - perm_ocr: str = Form("false"), - allowed_stt_models: str = Form(""), # 콤마 구분 모델명 - allowed_ocr_models: str = Form(""), - user: dict = Depends(require_admin), + username:str=Form(...),password:str=Form(...), + perm_stt:str=Form("false"),perm_ocr:str=Form("false"), + allowed_stt_models:str=Form(""),allowed_ocr_models:str=Form(""), + user:dict=Depends(require_admin), ): - def _parse_models(s): return [m.strip() for m in s.split(",") if m.strip()] - perms = { - "stt": perm_stt.lower() == "true", - "ocr": perm_ocr.lower() == "true", - "allowed_stt_models": _parse_models(allowed_stt_models), - "allowed_ocr_models": _parse_models(allowed_ocr_models), - } - ok, msg = create_user(username, password, perms) - if not ok: raise HTTPException(400, msg) - return {"ok": True, "message": msg} + def _p(s): return [m.strip() for m in s.split(",") if m.strip()] + perms={"stt":perm_stt.lower()=="true","ocr":perm_ocr.lower()=="true", + "allowed_stt_models":_p(allowed_stt_models),"allowed_ocr_models":_p(allowed_ocr_models)} + ok,msg=create_user(username,password,perms) + if not ok: raise HTTPException(400,msg) + return {"ok":True,"message":msg} @app.put("/api/admin/users/{username}") def admin_update_user( - username: str, - perm_stt: str = Form("false"), - perm_ocr: str = Form("false"), - password: str = Form(""), - allowed_stt_models: str = Form(""), - allowed_ocr_models: str = Form(""), - user: dict = Depends(require_admin), + username:str,perm_stt:str=Form("false"),perm_ocr:str=Form("false"), + password:str=Form(""),allowed_stt_models:str=Form(""),allowed_ocr_models:str=Form(""), + user:dict=Depends(require_admin), ): - def _parse_models(s): return [m.strip() for m in s.split(",") if m.strip()] - perms = { - "stt": perm_stt.lower() == "true", - "ocr": perm_ocr.lower() == "true", - "allowed_stt_models": _parse_models(allowed_stt_models), - "allowed_ocr_models": _parse_models(allowed_ocr_models), - } - ok, msg = update_user(username, perms, password or None) - if not ok: raise HTTPException(400, msg) - return {"ok": True, "message": msg} + def _p(s): return [m.strip() for m in s.split(",") if m.strip()] + perms={"stt":perm_stt.lower()=="true","ocr":perm_ocr.lower()=="true", + "allowed_stt_models":_p(allowed_stt_models),"allowed_ocr_models":_p(allowed_ocr_models)} + ok,msg=update_user(username,perms,password or None) + if not ok: raise HTTPException(400,msg) + return {"ok":True,"message":msg} @app.delete("/api/admin/users/{username}") -def admin_delete_user(username: str, user: dict=Depends(require_admin)): - ok, msg = delete_user(username) - if not ok: raise HTTPException(400, msg) - return {"ok": True, "message": msg} +def admin_delete_user(username:str,user:dict=Depends(require_admin)): + ok,msg=delete_user(username) + if not ok: raise HTTPException(400,msg) + return {"ok":True,"message":msg} @app.post("/api/cleanup") -def cleanup(user: dict=Depends(require_auth)): return {"removed": _cleanup_outputs()} +def cleanup(user:dict=Depends(require_auth)): return {"removed":_cleanup_outputs()} # ════════════════════════════════════════════════════════════════ # 유틸 # ════════════════════════════════════════════════════════════════ -def _check_size(request: Request): +def _check_size(request): cl = request.headers.get("content-length") - if cl and int(cl) > MAX_UPLOAD_BYTES: raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB") + if cl and int(cl) > MAX_UPLOAD_BYTES: + raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB") -def _cleanup_outputs() -> int: +def _cleanup_outputs(): if OUTPUT_KEEP_SECS == 0: return 0 cutoff = time.time() - OUTPUT_KEEP_SECS; removed = 0 - for f in glob.glob(os.path.join(OUTPUT_DIR, "*")): + for f in glob.glob(os.path.join(OUTPUT_DIR,"*")): try: if os.path.getmtime(f) < cutoff: os.remove(f); removed += 1 except: pass return removed -def _ext(fn): return fn.rsplit(".", 1)[-1].lower() if "." in fn else "" +def _ext(fn): return fn.rsplit(".",1)[-1].lower() if "." in fn else "" async def _save(file, path): written = 0 - async with aiofiles.open(path, "wb") as f: - while chunk := await file.read(1024 * 1024): + async with aiofiles.open(path,"wb") as f: + while chunk := await file.read(1024*1024): written += len(chunk) if written > MAX_UPLOAD_BYTES: await f.close(); os.remove(path) diff --git a/app/ocr_tasks.py b/app/ocr_tasks.py index ff3c846..bd433e0 100644 --- a/app/ocr_tasks.py +++ b/app/ocr_tasks.py @@ -1,9 +1,8 @@ """ -OCR Celery Tasks — PaddleOCR 3.x + Ollama Vision +OCR Celery Tasks +backend: paddle | ollama | openrouter """ -import os -import base64 - +import os, base64 import httpx from celery import Celery import openpyxl @@ -17,11 +16,8 @@ OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "600")) celery_app = Celery("ocr_tasks", broker=REDIS_URL, backend=REDIS_URL) celery_app.conf.update( - task_serializer="json", - result_serializer="json", - accept_content=["json"], - task_track_started=True, - result_expires=3600, + task_serializer="json", result_serializer="json", + accept_content=["json"], task_track_started=True, result_expires=3600, ) _ocr_engine = None @@ -46,12 +42,28 @@ def get_structure(): return _struct_engine +# ════════════════════════════════════════════════════════════════ +# 메인 Task +# ════════════════════════════════════════════════════════════════ @celery_app.task(bind=True, name="tasks.ocr_task", queue="ocr") -def ocr_task(self, file_id, image_path, mode="text", - backend="paddle", ollama_model="granite3.2-vision", custom_prompt=""): - self.update_state(state="PROGRESS", meta={"progress": 8, "message": "엔진 준비 중..."}) +def ocr_task( + self, + file_id: str, + image_path: str, + mode: str = "text", + backend: str = "paddle", + ollama_model: str = "granite3.2-vision", + openrouter_model: str = "", + openrouter_url: str = "", + openrouter_key: str = "", + custom_prompt: str = "", +): + self.update_state(state="PROGRESS", meta={"progress":8,"message":"엔진 준비 중..."}) try: - if backend == "ollama": + if backend == "openrouter": + result = _run_openrouter(self, file_id, image_path, mode, + openrouter_model, openrouter_url, openrouter_key, custom_prompt) + elif backend == "ollama": result = _run_ollama(self, file_id, image_path, mode, ollama_model, custom_prompt) else: result = _run_paddle(self, file_id, image_path, mode) @@ -64,34 +76,124 @@ def ocr_task(self, file_id, image_path, mode="text", raise Exception(f"OCR 실패: {str(e)}") -_OLLAMA_PROMPTS = { +# ════════════════════════════════════════════════════════════════ +# OpenRouter Vision 백엔드 (OpenAI 호환) +# ════════════════════════════════════════════════════════════════ +_PROMPTS = { "text": "이 이미지에서 모든 텍스트를 정확하게 추출해줘. 원본의 줄 구분과 단락 구조를 유지해줘.", "structure": "이 이미지를 분석해서 표는 마크다운 표 형식으로, 나머지 텍스트는 원본 구조를 유지하며 추출해줘.", } +def _run_openrouter(task, file_id, image_path, mode, + model, base_url, api_key, custom_prompt): + if not api_key: + raise Exception("OpenRouter API 키가 설정되지 않았습니다") + if not model: + raise Exception("OpenRouter 모델이 선택되지 않았습니다") + + task.update_state(state="PROGRESS", + meta={"progress":15,"message":f"OpenRouter ({model}) 연결 중..."}) + + with open(image_path, "rb") as f: + raw = f.read() + + # 이미지 MIME 타입 감지 + ext = image_path.rsplit(".", 1)[-1].lower() + mime = {"jpg":"image/jpeg","jpeg":"image/jpeg","png":"image/png", + "bmp":"image/bmp","gif":"image/gif","webp":"image/webp"}.get(ext, "image/jpeg") + b64 = base64.b64encode(raw).decode() + data_url = f"data:{mime};base64,{b64}" + + prompt = custom_prompt.strip() or _PROMPTS.get(mode, _PROMPTS["text"]) + + task.update_state(state="PROGRESS", meta={"progress":30,"message":"모델 추론 중..."}) + + try: + resp = httpx.post( + f"{base_url.rstrip('/')}/chat/completions", + headers={ + "Authorization": f"Bearer {api_key}", + "HTTP-Referer": "https://voicescript.local", + "X-Title": "VoiceScript", + "Content-Type": "application/json", + }, + json={ + "model": model, + "messages": [{ + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": data_url}}, + {"type": "text", "text": prompt}, + ], + }], + "temperature": 0.1, + }, + timeout=float(OLLAMA_TIMEOUT), + ) + resp.raise_for_status() + except httpx.HTTPStatusError as e: + body = "" + try: body = e.response.json().get("error",{}).get("message","") + except: pass + if e.response.status_code == 400: + raise Exception(f"이 모델은 이미지를 지원하지 않습니다 — Vision 모델을 선택하세요\n({model})") + raise Exception(f"OpenRouter 오류 ({e.response.status_code}): {body or str(e)}") + except httpx.TimeoutException: + raise Exception(f"OpenRouter 응답 시간 초과. OLLAMA_TIMEOUT 값을 늘려주세요.") + + task.update_state(state="PROGRESS", meta={"progress":85,"message":"결과 저장 중..."}) + + full_text = resp.json()["choices"][0]["message"]["content"].strip() + if not full_text: + raise Exception("OpenRouter 빈 응답") + + tables = _parse_md_tables(full_text) if mode == "structure" else [] + os.makedirs(OUTPUT_DIR, exist_ok=True) + txt_file = f"{file_id}_ocr.txt" + with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f: + f.write(f"# OCR 결과 (OpenRouter / {model})\n\n{full_text}") + xlsx_file = None + if tables: + xlsx_file = f"{file_id}_tables.xlsx" + _save_excel(tables, os.path.join(OUTPUT_DIR, xlsx_file)) + tables_html = [_md_table_to_html(t) for t in tables] + lines = [{"text":l,"confidence":1.0,"bbox":[]} for l in full_text.splitlines() if l.strip()] + return { + "mode": mode, "backend": "openrouter", "openrouter_model": model, + "ollama_model": "", + "full_text": full_text, "lines": lines, "line_count": len(lines), + "txt_file": txt_file, + "tables": [{"html":h,"rows":len(t),"cols":max(len(r) for r in t) if t else 0} + for h, t in zip(tables_html, tables)], + "xlsx_file": xlsx_file, + } + + +# ════════════════════════════════════════════════════════════════ +# Ollama Vision 백엔드 +# ════════════════════════════════════════════════════════════════ def _run_ollama(task, file_id, image_path, mode, ollama_model, custom_prompt): task.update_state(state="PROGRESS", - meta={"progress": 15, "message": f"Ollama ({ollama_model}) 연결 중..."}) + meta={"progress":15,"message":f"Ollama ({ollama_model}) 연결 중..."}) with open(image_path, "rb") as f: img_b64 = base64.b64encode(f.read()).decode() - prompt = custom_prompt.strip() or _OLLAMA_PROMPTS.get(mode, _OLLAMA_PROMPTS["text"]) - task.update_state(state="PROGRESS", meta={"progress": 30, "message": "모델 추론 중..."}) + prompt = custom_prompt.strip() or _PROMPTS.get(mode, _PROMPTS["text"]) + task.update_state(state="PROGRESS", meta={"progress":30,"message":"모델 추론 중..."}) try: resp = httpx.post(f"{OLLAMA_URL}/api/chat", json={ "model": ollama_model, - "messages": [{"role": "user", "content": prompt, "images": [img_b64]}], - "stream": False, "options": {"temperature": 0.1}, + "messages": [{"role":"user","content":prompt,"images":[img_b64]}], + "stream": False, "options": {"temperature":0.1}, }, timeout=float(OLLAMA_TIMEOUT)) resp.raise_for_status() except httpx.ConnectError: raise Exception(f"Ollama 서버 연결 실패 ({OLLAMA_URL})") except httpx.TimeoutException: - raise Exception(f"Ollama 응답 시간 초과 ({OLLAMA_TIMEOUT}초). OLLAMA_TIMEOUT 값을 늘려주세요.") + raise Exception(f"Ollama 응답 시간 초과 ({OLLAMA_TIMEOUT}초)") - task.update_state(state="PROGRESS", meta={"progress": 85, "message": "결과 저장 중..."}) - full_text = resp.json().get("message", {}).get("content", "").strip() - if not full_text: - raise Exception("Ollama 빈 응답. 모델이 설치되어 있는지 확인하세요.") + task.update_state(state="PROGRESS", meta={"progress":85,"message":"결과 저장 중..."}) + full_text = resp.json().get("message",{}).get("content","").strip() + if not full_text: raise Exception("Ollama 빈 응답. 모델이 Vision을 지원하는지 확인하세요.") tables = _parse_md_tables(full_text) if mode == "structure" else [] os.makedirs(OUTPUT_DIR, exist_ok=True) @@ -103,74 +205,66 @@ def _run_ollama(task, file_id, image_path, mode, ollama_model, custom_prompt): xlsx_file = f"{file_id}_tables.xlsx" _save_excel(tables, os.path.join(OUTPUT_DIR, xlsx_file)) tables_html = [_md_table_to_html(t) for t in tables] - lines = [{"text": l, "confidence": 1.0, "bbox": []} - for l in full_text.splitlines() if l.strip()] + lines = [{"text":l,"confidence":1.0,"bbox":[]} for l in full_text.splitlines() if l.strip()] return { "mode": mode, "backend": "ollama", "ollama_model": ollama_model, + "openrouter_model": "", "full_text": full_text, "lines": lines, "line_count": len(lines), "txt_file": txt_file, - "tables": [{"html": h, "rows": len(t), "cols": max(len(r) for r in t) if t else 0} + "tables": [{"html":h,"rows":len(t),"cols":max(len(r) for r in t) if t else 0} for h, t in zip(tables_html, tables)], "xlsx_file": xlsx_file, } +# ════════════════════════════════════════════════════════════════ +# PaddleOCR 백엔드 +# ════════════════════════════════════════════════════════════════ def _run_paddle(task, file_id, image_path, mode): import cv2 img = cv2.imread(image_path) - if img is None: - raise ValueError("이미지를 읽을 수 없습니다") + if img is None: raise ValueError("이미지를 읽을 수 없습니다") os.makedirs(OUTPUT_DIR, exist_ok=True) - return _paddle_structure(task, file_id, img) if mode == "structure" \ - else _paddle_text(task, file_id, img) + return _paddle_structure(task, file_id, img) if mode == "structure" else _paddle_text(task, file_id, img) def _paddle_text(task, file_id, img): - task.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 인식 중..."}) + task.update_state(state="PROGRESS", meta={"progress":30,"message":"텍스트 인식 중..."}) result = get_ocr().ocr(img) - task.update_state(state="PROGRESS", meta={"progress": 80, "message": "결과 정리 중..."}) + task.update_state(state="PROGRESS", meta={"progress":80,"message":"결과 정리 중..."}) lines = [] if result and len(result) > 0: r = result[0] if isinstance(r, dict): - texts = r.get("rec_texts", []) - scores = r.get("rec_scores", []) - for text, conf in zip(texts, scores): - if text.strip(): - lines.append({"text": text, "confidence": round(float(conf), 3), "bbox": []}) + for text, conf in zip(r.get("rec_texts",[]), r.get("rec_scores",[])): + if text.strip(): lines.append({"text":text,"confidence":round(float(conf),3),"bbox":[]}) elif isinstance(r, list): for item in r: - if item and len(item) == 2: + if item and len(item)==2: _, (text, conf) = item - if text.strip(): - lines.append({"text": text, "confidence": round(float(conf), 3), "bbox": []}) + if text.strip(): lines.append({"text":text,"confidence":round(float(conf),3),"bbox":[]}) full_text = "\n".join(l["text"] for l in lines) - txt_file = f"{file_id}_ocr.txt" - with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f: - f.write(full_text) - return {"mode": "text", "backend": "paddle", "ollama_model": "", - "full_text": full_text, "lines": lines, - "line_count": len(lines), "txt_file": txt_file, - "tables": [], "xlsx_file": None} + txt_file = f"{file_id}_ocr.txt" + with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f: f.write(full_text) + return {"mode":"text","backend":"paddle","ollama_model":"","openrouter_model":"", + "full_text":full_text,"lines":lines,"line_count":len(lines), + "txt_file":txt_file,"tables":[],"xlsx_file":None} def _paddle_structure(task, file_id, img): - task.update_state(state="PROGRESS", meta={"progress": 20, "message": "레이아웃 분석 중..."}) + task.update_state(state="PROGRESS", meta={"progress":20,"message":"레이아웃 분석 중..."}) result = get_structure()(img) - task.update_state(state="PROGRESS", meta={"progress": 60, "message": "표 구조 추출 중..."}) + task.update_state(state="PROGRESS", meta={"progress":60,"message":"표 구조 추출 중..."}) text_blocks, tables_html, tables_data = [], [], [] for region in result: - rtype = region.get("type", "").lower() + rtype = region.get("type","").lower() if rtype == "table": - html = region.get("res", {}).get("html", "") - if html: - tables_html.append(html) - tables_data.append(_html_table_to_list(html)) - elif rtype in ("text", "title", "figure_caption"): - for line in (region.get("res", []) or []): - if isinstance(line, (list, tuple)) and len(line) == 2: - _, (text, _conf) = line - text_blocks.append(text) + html = region.get("res",{}).get("html","") + if html: tables_html.append(html); tables_data.append(_html_table_to_list(html)) + elif rtype in ("text","title","figure_caption"): + for line in (region.get("res",[]) or []): + if isinstance(line,(list,tuple)) and len(line)==2: + _, (text, _conf) = line; text_blocks.append(text) full_text = "\n".join(text_blocks) - task.update_state(state="PROGRESS", meta={"progress": 80, "message": "Excel 생성 중..."}) + task.update_state(state="PROGRESS", meta={"progress":80,"message":"Excel 생성 중..."}) xlsx_file = None if tables_data: xlsx_file = f"{file_id}_tables.xlsx" @@ -178,15 +272,17 @@ def _paddle_structure(task, file_id, img): txt_file = f"{file_id}_ocr.txt" with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f: f.write("# 텍스트\n\n" + full_text) - lines = [{"text": t, "confidence": 1.0, "bbox": []} for t in text_blocks] - tables_meta = [{"html": h, "rows": len(d), "cols": max(len(r) for r in d) if d else 0} + lines = [{"text":t,"confidence":1.0,"bbox":[]} for t in text_blocks] + tables_meta = [{"html":h,"rows":len(d),"cols":max(len(r) for r in d) if d else 0} for h, d in zip(tables_html, tables_data)] - return {"mode": "structure", "backend": "paddle", "ollama_model": "", - "full_text": full_text, "lines": lines, - "line_count": len(lines), "txt_file": txt_file, - "tables": tables_meta, "xlsx_file": xlsx_file} + return {"mode":"structure","backend":"paddle","ollama_model":"","openrouter_model":"", + "full_text":full_text,"lines":lines,"line_count":len(lines), + "txt_file":txt_file,"tables":tables_meta,"xlsx_file":xlsx_file} +# ════════════════════════════════════════════════════════════════ +# 공통 유틸 +# ════════════════════════════════════════════════════════════════ def _parse_md_tables(text): tables, current = [], [] for line in text.splitlines(): @@ -204,8 +300,8 @@ def _md_table_to_html(table): if not table: return "" rows = "" for i, row in enumerate(table): - tag = "th" if i == 0 else "td" - rows += "