diff --git a/app/Dockerfile b/app/Dockerfile index a2bbb0c..6da8ab2 100644 --- a/app/Dockerfile +++ b/app/Dockerfile @@ -9,8 +9,6 @@ RUN apt-get update && apt-get install -y \ libxext6 \ libxrender1 \ libgl1 \ - libgles2 \ - libegl1 \ wget \ curl \ && rm -rf /var/lib/apt/lists/* @@ -19,8 +17,11 @@ WORKDIR /app COPY requirements.txt . -RUN pip install --no-cache-dir paddlepaddle==3.0.0 +# PaddlePaddle CPU (AMD64) — paddleocr 3.x 호환 +RUN pip install --no-cache-dir paddlepaddle==3.0.0 \ + -i https://pypi.tuna.tsinghua.edu.cn/simple +# 나머지 패키지 RUN pip install --no-cache-dir -r requirements.txt COPY . . diff --git a/app/main.py b/app/main.py index 0d510a0..8095b92 100644 --- a/app/main.py +++ b/app/main.py @@ -32,11 +32,16 @@ AUDIO_EXT = {"mp3","mp4","wav","m4a","ogg","flac","aac","wma","webm","mkv","avi" IMAGE_EXT = {"jpg","jpeg","png","bmp","tiff","tif","webp","gif"} _DEFAULT_SETTINGS = { - "stt_ollama_model": "", - "ocr_ollama_model": "granite3.2-vision:latest", - "cpu_threads": 0, - "stt_timeout": 0, - "ollama_timeout": 600, + "stt_ollama_model": "", + "ocr_ollama_model": "granite3.2-vision:latest", + "cpu_threads": 0, + "stt_timeout": 0, + "ollama_timeout": 600, + # OpenRouter + "openrouter_url": "https://openrouter.ai/api/v1", + "openrouter_api_key": "", + "openrouter_stt_model": "", + "openrouter_ocr_model": "", } _hist_lock = threading.Lock() @@ -50,7 +55,8 @@ def _load_settings() -> dict: def _save_settings(data: dict): SETTINGS_FILE.parent.mkdir(parents=True, exist_ok=True) - with open(SETTINGS_FILE, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2) + with open(SETTINGS_FILE, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=2) # ── 이력 I/O ───────────────────────────────────────────────── @@ -77,19 +83,15 @@ def append_history(record: dict): except: pass def _update_history_by_task(task_id: str, result: dict, success: bool, error_msg: str = ""): - """task_id로 이력을 찾아 결과 업데이트 — 핵심 버그 수정""" with _hist_lock: if not HISTORY_FILE.exists(): return try: with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f) for h in history: - # task_id 필드로 매칭 if h.get("task_id") != task_id: continue if h.get("status") != "processing": break if not success: - h["status"] = "failed" - h["output"] = {"error": error_msg[:300]} - break + h["status"] = "failed"; h["output"] = {"error": error_msg[:300]}; break h["status"] = "success" if h["type"] == "stt": h["output"] = { @@ -97,12 +99,14 @@ def _update_history_by_task(task_id: str, result: dict, success: bool, error_msg "language": result.get("language", ""), "duration_s": result.get("duration", 0), "segments": len(result.get("segments", [])), - "text_preview": result.get("text", "")[:200] + ("…" if len(result.get("text",""))>200 else ""), + "text_preview": result.get("text","")[:200] + ("…" if len(result.get("text",""))>200 else ""), "ollama_used": result.get("ollama_used", False), "ollama_model": result.get("ollama_model", ""), + "openrouter_used": result.get("openrouter_used", False), + "openrouter_model": result.get("openrouter_model", ""), } else: - full_text = result.get("full_text", "") + ft = result.get("full_text", "") h["output"] = { "txt_file": result.get("txt_file", ""), "xlsx_file": result.get("xlsx_file", ""), @@ -110,7 +114,8 @@ def _update_history_by_task(task_id: str, result: dict, success: bool, error_msg "table_count": len(result.get("tables", [])), "backend": result.get("backend", ""), "ollama_model": result.get("ollama_model", ""), - "text_preview": full_text[:200] + ("…" if len(full_text)>200 else ""), + "openrouter_model": result.get("openrouter_model", ""), + "text_preview": ft[:200] + ("…" if len(ft)>200 else ""), } break _write_history(history) @@ -132,12 +137,11 @@ def clear_history(): # ════════════════════════════════════════════════════════════════ -# 시작 이벤트 +# 시작 # ════════════════════════════════════════════════════════════════ @app.on_event("startup") async def on_startup(): - init_users() - _cleanup_outputs() + init_users(); _cleanup_outputs() # ════════════════════════════════════════════════════════════════ @@ -183,7 +187,10 @@ def system_info(user: dict = Depends(require_auth)): @app.post("/api/transcribe") async def transcribe( request: Request, file: UploadFile = File(...), - use_ollama: str = Form("false"), ollama_model: str = Form(""), + use_ollama: str = Form("false"), + ollama_model: str = Form(""), + use_openrouter: str = Form("false"), + openrouter_model: str = Form(""), user: dict = Depends(require_stt), ): _check_size(request) @@ -193,29 +200,39 @@ async def transcribe( save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}") await _save(file, save_path) file_size = os.path.getsize(save_path) - _use_ollama = use_ollama.lower() == "true" + s = _load_settings() - if _use_ollama and not ollama_model.strip(): ollama_model = s.get("stt_ollama_model", "") + _use_ollama = use_ollama.lower() == "true" + _use_openrouter = use_openrouter.lower() == "true" - task = transcribe_task.delay(file_id, save_path, _use_ollama, ollama_model) + if _use_ollama and not ollama_model.strip(): + ollama_model = s.get("stt_ollama_model", "") + if _use_openrouter and not openrouter_model.strip(): + openrouter_model = s.get("openrouter_stt_model", "") + + task = transcribe_task.delay( + file_id, save_path, + _use_ollama, ollama_model, + _use_openrouter, openrouter_model, + s.get("openrouter_url", ""), s.get("openrouter_api_key", ""), + ) - # ★ task_id를 이력에 함께 저장 append_history({ - "id": file_id, - "task_id": task.id, # ← 업데이트 매칭 키 - "type": "stt", - "status": "processing", + "id": file_id, "task_id": task.id, "type": "stt", + "status": "processing", "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - "username": user["username"], - "input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()}, - "settings": { - "model": os.getenv("WHISPER_MODEL", "medium"), - "language": os.getenv("WHISPER_LANGUAGE", "ko"), - "compute_type": os.getenv("WHISPER_COMPUTE_TYPE", "int8"), - "cpu_threads": s.get("cpu_threads", 0), - "stt_timeout": s.get("stt_timeout", 0), - "use_ollama": _use_ollama, - "ollama_model": ollama_model if _use_ollama else "", + "username": user["username"], + "input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()}, + "settings": { + "model": os.getenv("WHISPER_MODEL","medium"), + "language": os.getenv("WHISPER_LANGUAGE","ko"), + "compute_type": os.getenv("WHISPER_COMPUTE_TYPE","int8"), + "cpu_threads": s.get("cpu_threads",0), + "stt_timeout": s.get("stt_timeout",0), + "use_ollama": _use_ollama, + "ollama_model": ollama_model if _use_ollama else "", + "use_openrouter": _use_openrouter, + "openrouter_model": openrouter_model if _use_openrouter else "", }, "output": None, }) @@ -228,211 +245,297 @@ async def transcribe( @app.post("/api/ocr") async def ocr( request: Request, file: UploadFile = File(...), - mode: str = Form("text"), backend: str = Form("paddle"), - ollama_model: str = Form(""), custom_prompt: str = Form(""), + mode: str = Form("text"), + backend: str = Form("paddle"), # paddle | ollama | openrouter + ollama_model: str = Form(""), + openrouter_model: str = Form(""), + custom_prompt: str = Form(""), user: dict = Depends(require_ocr), ): _check_size(request) ext = _ext(file.filename) if ext not in IMAGE_EXT: raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(IMAGE_EXT))}") if mode not in ("text","structure"): mode = "text" - if backend not in ("paddle","ollama"): backend = "paddle" + if backend not in ("paddle","ollama","openrouter"): backend = "paddle" + s = _load_settings() - if backend == "ollama" and not ollama_model.strip(): ollama_model = s.get("ocr_ollama_model","granite3.2-vision:latest") + if backend == "ollama" and not ollama_model.strip(): + ollama_model = s.get("ocr_ollama_model","granite3.2-vision:latest") + if backend == "openrouter" and not openrouter_model.strip(): + openrouter_model = s.get("openrouter_ocr_model","") + file_id = str(uuid.uuid4()) save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}") await _save(file, save_path) file_size = os.path.getsize(save_path) - task = ocr_task.delay(file_id, save_path, mode, backend, ollama_model, custom_prompt) + task = ocr_task.delay( + file_id, save_path, mode, backend, + ollama_model, openrouter_model, + s.get("openrouter_url",""), s.get("openrouter_api_key",""), + custom_prompt, + ) - # ★ task_id를 이력에 함께 저장 append_history({ - "id": file_id, - "task_id": task.id, # ← 업데이트 매칭 키 - "type": "ocr", - "status": "processing", + "id": file_id, "task_id": task.id, "type": "ocr", + "status": "processing", "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - "username": user["username"], - "input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()}, - "settings": { - "backend": backend, - "mode": mode, - "ocr_lang": os.getenv("OCR_LANG", "korean"), - "ollama_model": ollama_model if backend == "ollama" else "", - "ollama_timeout":s.get("ollama_timeout", 600), - "custom_prompt": custom_prompt[:200] if custom_prompt else "", + "username": user["username"], + "input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()}, + "settings": { + "backend": backend, + "mode": mode, + "ocr_lang": os.getenv("OCR_LANG","korean"), + "ollama_model": ollama_model if backend=="ollama" else "", + "openrouter_model": openrouter_model if backend=="openrouter" else "", + "ollama_timeout": s.get("ollama_timeout",600), + "custom_prompt": custom_prompt[:200] if custom_prompt else "", }, "output": None, }) - return {"task_id": task.id, "file_id": file_id, "filename": file.filename, "mode": mode, "backend": backend} + return {"task_id": task.id, "file_id": file_id, + "filename": file.filename, "mode": mode, "backend": backend} # ════════════════════════════════════════════════════════════════ -# 상태 — task_id 기준으로 이력 업데이트 +# 상태 # ════════════════════════════════════════════════════════════════ @app.get("/api/status/{task_id}") def get_status(task_id: str, user: dict = Depends(require_auth)): r = celery_app.AsyncResult(task_id) - if r.state == "PENDING": - return {"state": "pending", "progress": 0, "message": "대기 중..."} - if r.state == "PROGRESS": - m = r.info or {} - return {"state": "progress", "progress": m.get("progress",0), "message": m.get("message","처리 중...")} - if r.state == "SUCCESS": - result = r.result or {} - # ★ task_id로 이력 업데이트 (file_id 아님) - _update_history_by_task(task_id, result, success=True) - return {"state": "success", "progress": 100, **result} - if r.state == "FAILURE": - _update_history_by_task(task_id, {}, success=False, error_msg=str(r.info)) - return {"state": "failure", "progress": 0, "message": str(r.info)} - return {"state": r.state.lower(), "progress": 0} + if r.state == "PENDING": return {"state":"pending", "progress":0, "message":"대기 중..."} + if r.state == "PROGRESS": m=r.info or {}; return {"state":"progress","progress":m.get("progress",0),"message":m.get("message","처리 중...")} + if r.state == "SUCCESS": _update_history_by_task(task_id, r.result or {}, True); return {"state":"success","progress":100,**(r.result or {})} + if r.state == "FAILURE": _update_history_by_task(task_id, {}, False, str(r.info)); return {"state":"failure","progress":0,"message":str(r.info)} + return {"state":r.state.lower(),"progress":0} # ════════════════════════════════════════════════════════════════ # 이력 # ════════════════════════════════════════════════════════════════ @app.get("/api/history") -def get_history(page: int=1, per_page: int=15, type_: str="", user: dict=Depends(require_auth)): +def get_history(page:int=1,per_page:int=15,type_:str="",user:dict=Depends(require_auth)): history = _load_history() if user.get("role") != "admin": history = [h for h in history if h.get("username")==user["username"]] if type_ in ("stt","ocr"): history = [h for h in history if h.get("type")==type_] total = len(history); start = (page-1)*per_page - return {"total": total, "page": page, "per_page": per_page, "items": history[start:start+per_page]} + return {"total":total,"page":page,"per_page":per_page,"items":history[start:start+per_page]} @app.delete("/api/history/{history_id}") -def delete_history(history_id: str, user: dict=Depends(require_auth)): - if not delete_history_item(history_id): raise HTTPException(404, "이력을 찾을 수 없습니다") - return {"ok": True} +def delete_history(history_id:str,user:dict=Depends(require_auth)): + if not delete_history_item(history_id): raise HTTPException(404,"이력을 찾을 수 없습니다") + return {"ok":True} @app.delete("/api/history") -def clear_all_history(user: dict=Depends(require_admin)): - clear_history(); return {"ok": True} +def clear_all_history(user:dict=Depends(require_admin)): + clear_history(); return {"ok":True} # ════════════════════════════════════════════════════════════════ -# 다운로드 / Ollama / 설정 / 관리자 +# 다운로드 # ════════════════════════════════════════════════════════════════ @app.get("/api/download/{filename}") -def download(filename: str, user: dict=Depends(require_auth)): - if ".." in filename or "/" in filename: raise HTTPException(400, "잘못된 파일명") +def download(filename:str,user:dict=Depends(require_auth)): + if ".." in filename or "/" in filename: raise HTTPException(400,"잘못된 파일명") path = os.path.join(OUTPUT_DIR, filename) - if not os.path.exists(path): raise HTTPException(404, "파일을 찾을 수 없습니다") + if not os.path.exists(path): raise HTTPException(404,"파일을 찾을 수 없습니다") media = ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" if filename.endswith(".xlsx") else "text/plain") return FileResponse(path, media_type=media, filename=filename) + +# ════════════════════════════════════════════════════════════════ +# Ollama 모델 목록 +# ════════════════════════════════════════════════════════════════ @app.get("/api/ollama/models") -def ollama_models(user: dict=Depends(require_auth)): +def ollama_models(user:dict=Depends(require_auth)): try: resp = httpx.get(f"{OLLAMA_URL}/api/tags", timeout=8.0); resp.raise_for_status() - return {"models": [m["name"] for m in resp.json().get("models",[])], "connected": True} + return {"models":[m["name"] for m in resp.json().get("models",[])], "connected":True} except Exception as e: - return {"models": [], "connected": False, "error": str(e)} + return {"models":[], "connected":False, "error":str(e)} + +# ════════════════════════════════════════════════════════════════ +# OpenRouter 모델 목록 & 연결 테스트 +# ════════════════════════════════════════════════════════════════ +@app.get("/api/openrouter/models") +def openrouter_models(user: dict = Depends(require_auth)): + s = _load_settings() + api_key = s.get("openrouter_api_key", "") + base_url = s.get("openrouter_url", "https://openrouter.ai/api/v1").rstrip("/") + if not api_key: + return {"models": [], "connected": False, "error": "API 키가 설정되지 않았습니다"} + try: + resp = httpx.get( + f"{base_url}/models", + headers={"Authorization": f"Bearer {api_key}", + "HTTP-Referer": "https://voicescript.local"}, + timeout=12.0, + ) + resp.raise_for_status() + data = resp.json() + # Vision 모델 필터링 (multimodal 지원 모델) + all_models = data.get("data", []) + vision = [m["id"] for m in all_models + if any(k in str(m.get("architecture", {}).get("modality","")).lower() + for k in ["image","vision","multimodal"]) + or any(k in m["id"].lower() + for k in ["vision","claude-3","gemini","gpt-4o","llava","pixtral","qwen-vl","intern","deepseek-vl"])] + text = [m["id"] for m in all_models if m["id"] not in vision] + return { + "models": [m["id"] for m in all_models], + "vision_models": vision, + "text_models": text, + "connected": True, + "total": len(all_models), + } + except httpx.HTTPStatusError as e: + return {"models":[], "connected":False, "error":f"HTTP {e.response.status_code}: API 키를 확인하세요"} + except Exception as e: + return {"models":[], "connected":False, "error":str(e)} + +@app.post("/api/openrouter/test") +def openrouter_test( + api_key: str = Form(...), + base_url: str = Form("https://openrouter.ai/api/v1"), + user: dict = Depends(require_auth), +): + """API 키 연결 테스트""" + try: + resp = httpx.get( + f"{base_url.rstrip('/')}/models", + headers={"Authorization": f"Bearer {api_key}", + "HTTP-Referer": "https://voicescript.local"}, + timeout=10.0, + ) + resp.raise_for_status() + count = len(resp.json().get("data", [])) + return {"ok": True, "message": f"연결 성공 — {count}개 모델 사용 가능"} + except httpx.HTTPStatusError as e: + return {"ok": False, "message": f"인증 실패 (HTTP {e.response.status_code}) — API 키를 확인하세요"} + except Exception as e: + return {"ok": False, "message": f"연결 실패: {str(e)}"} + + +# ════════════════════════════════════════════════════════════════ +# 설정 +# ════════════════════════════════════════════════════════════════ @app.get("/api/settings") -def get_settings(user: dict=Depends(require_auth)): return _load_settings() +def get_settings(user: dict = Depends(require_auth)): + s = _load_settings() + # API 키는 마스킹해서 반환 + result = dict(s) + if result.get("openrouter_api_key"): + key = result["openrouter_api_key"] + result["openrouter_api_key_masked"] = key[:8] + "..." + key[-4:] if len(key) > 12 else "****" + else: + result["openrouter_api_key_masked"] = "" + result["openrouter_api_key"] = "" # 평문은 반환 안 함 + return result @app.post("/api/settings") def save_settings_endpoint( - stt_ollama_model: str = Form(""), - ocr_ollama_model: str = Form(""), - cpu_threads: str = Form("0"), - stt_timeout: str = Form("0"), - ollama_timeout: str = Form("600"), + stt_ollama_model: str = Form(""), + ocr_ollama_model: str = Form(""), + cpu_threads: str = Form("0"), + stt_timeout: str = Form("0"), + ollama_timeout: str = Form("600"), + openrouter_url: str = Form("https://openrouter.ai/api/v1"), + openrouter_api_key: str = Form(""), + openrouter_stt_model: str = Form(""), + openrouter_ocr_model: str = Form(""), user: dict = Depends(require_auth), ): def _int(v, d): try: return max(0, int(v)) except: return d + + current = _load_settings() + # API 키가 비어있으면 기존 값 유지 + final_key = openrouter_api_key.strip() if openrouter_api_key.strip() else current.get("openrouter_api_key","") + data = { - "stt_ollama_model": stt_ollama_model, - "ocr_ollama_model": ocr_ollama_model, - "cpu_threads": _int(cpu_threads, 0), - "stt_timeout": _int(stt_timeout, 0), - "ollama_timeout": _int(ollama_timeout, 600), + "stt_ollama_model": stt_ollama_model, + "ocr_ollama_model": ocr_ollama_model, + "cpu_threads": _int(cpu_threads, 0), + "stt_timeout": _int(stt_timeout, 0), + "ollama_timeout": _int(ollama_timeout, 600), + "openrouter_url": openrouter_url.strip() or "https://openrouter.ai/api/v1", + "openrouter_api_key": final_key, + "openrouter_stt_model": openrouter_stt_model, + "openrouter_ocr_model": openrouter_ocr_model, } _save_settings(data) - return {"ok": True, "settings": data} + return {"ok": True, "settings": {k: v for k, v in data.items() if k != "openrouter_api_key"}} + +# ════════════════════════════════════════════════════════════════ +# 관리자 +# ════════════════════════════════════════════════════════════════ @app.get("/api/admin/users") -def admin_list_users(user: dict=Depends(require_admin)): return {"users": list_users()} +def admin_list_users(user:dict=Depends(require_admin)): return {"users":list_users()} @app.post("/api/admin/users") def admin_create_user( - username: str = Form(...), - password: str = Form(...), - perm_stt: str = Form("false"), - perm_ocr: str = Form("false"), - allowed_stt_models: str = Form(""), # 콤마 구분 모델명 - allowed_ocr_models: str = Form(""), - user: dict = Depends(require_admin), + username:str=Form(...),password:str=Form(...), + perm_stt:str=Form("false"),perm_ocr:str=Form("false"), + allowed_stt_models:str=Form(""),allowed_ocr_models:str=Form(""), + user:dict=Depends(require_admin), ): - def _parse_models(s): return [m.strip() for m in s.split(",") if m.strip()] - perms = { - "stt": perm_stt.lower() == "true", - "ocr": perm_ocr.lower() == "true", - "allowed_stt_models": _parse_models(allowed_stt_models), - "allowed_ocr_models": _parse_models(allowed_ocr_models), - } - ok, msg = create_user(username, password, perms) - if not ok: raise HTTPException(400, msg) - return {"ok": True, "message": msg} + def _p(s): return [m.strip() for m in s.split(",") if m.strip()] + perms={"stt":perm_stt.lower()=="true","ocr":perm_ocr.lower()=="true", + "allowed_stt_models":_p(allowed_stt_models),"allowed_ocr_models":_p(allowed_ocr_models)} + ok,msg=create_user(username,password,perms) + if not ok: raise HTTPException(400,msg) + return {"ok":True,"message":msg} @app.put("/api/admin/users/{username}") def admin_update_user( - username: str, - perm_stt: str = Form("false"), - perm_ocr: str = Form("false"), - password: str = Form(""), - allowed_stt_models: str = Form(""), - allowed_ocr_models: str = Form(""), - user: dict = Depends(require_admin), + username:str,perm_stt:str=Form("false"),perm_ocr:str=Form("false"), + password:str=Form(""),allowed_stt_models:str=Form(""),allowed_ocr_models:str=Form(""), + user:dict=Depends(require_admin), ): - def _parse_models(s): return [m.strip() for m in s.split(",") if m.strip()] - perms = { - "stt": perm_stt.lower() == "true", - "ocr": perm_ocr.lower() == "true", - "allowed_stt_models": _parse_models(allowed_stt_models), - "allowed_ocr_models": _parse_models(allowed_ocr_models), - } - ok, msg = update_user(username, perms, password or None) - if not ok: raise HTTPException(400, msg) - return {"ok": True, "message": msg} + def _p(s): return [m.strip() for m in s.split(",") if m.strip()] + perms={"stt":perm_stt.lower()=="true","ocr":perm_ocr.lower()=="true", + "allowed_stt_models":_p(allowed_stt_models),"allowed_ocr_models":_p(allowed_ocr_models)} + ok,msg=update_user(username,perms,password or None) + if not ok: raise HTTPException(400,msg) + return {"ok":True,"message":msg} @app.delete("/api/admin/users/{username}") -def admin_delete_user(username: str, user: dict=Depends(require_admin)): - ok, msg = delete_user(username) - if not ok: raise HTTPException(400, msg) - return {"ok": True, "message": msg} +def admin_delete_user(username:str,user:dict=Depends(require_admin)): + ok,msg=delete_user(username) + if not ok: raise HTTPException(400,msg) + return {"ok":True,"message":msg} @app.post("/api/cleanup") -def cleanup(user: dict=Depends(require_auth)): return {"removed": _cleanup_outputs()} +def cleanup(user:dict=Depends(require_auth)): return {"removed":_cleanup_outputs()} # ════════════════════════════════════════════════════════════════ # 유틸 # ════════════════════════════════════════════════════════════════ -def _check_size(request: Request): +def _check_size(request): cl = request.headers.get("content-length") - if cl and int(cl) > MAX_UPLOAD_BYTES: raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB") + if cl and int(cl) > MAX_UPLOAD_BYTES: + raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB") -def _cleanup_outputs() -> int: +def _cleanup_outputs(): if OUTPUT_KEEP_SECS == 0: return 0 cutoff = time.time() - OUTPUT_KEEP_SECS; removed = 0 - for f in glob.glob(os.path.join(OUTPUT_DIR, "*")): + for f in glob.glob(os.path.join(OUTPUT_DIR,"*")): try: if os.path.getmtime(f) < cutoff: os.remove(f); removed += 1 except: pass return removed -def _ext(fn): return fn.rsplit(".", 1)[-1].lower() if "." in fn else "" +def _ext(fn): return fn.rsplit(".",1)[-1].lower() if "." in fn else "" async def _save(file, path): written = 0 - async with aiofiles.open(path, "wb") as f: - while chunk := await file.read(1024 * 1024): + async with aiofiles.open(path,"wb") as f: + while chunk := await file.read(1024*1024): written += len(chunk) if written > MAX_UPLOAD_BYTES: await f.close(); os.remove(path) diff --git a/app/ocr_tasks.py b/app/ocr_tasks.py index ff3c846..bd433e0 100644 --- a/app/ocr_tasks.py +++ b/app/ocr_tasks.py @@ -1,9 +1,8 @@ """ -OCR Celery Tasks — PaddleOCR 3.x + Ollama Vision +OCR Celery Tasks +backend: paddle | ollama | openrouter """ -import os -import base64 - +import os, base64 import httpx from celery import Celery import openpyxl @@ -17,11 +16,8 @@ OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "600")) celery_app = Celery("ocr_tasks", broker=REDIS_URL, backend=REDIS_URL) celery_app.conf.update( - task_serializer="json", - result_serializer="json", - accept_content=["json"], - task_track_started=True, - result_expires=3600, + task_serializer="json", result_serializer="json", + accept_content=["json"], task_track_started=True, result_expires=3600, ) _ocr_engine = None @@ -46,12 +42,28 @@ def get_structure(): return _struct_engine +# ════════════════════════════════════════════════════════════════ +# 메인 Task +# ════════════════════════════════════════════════════════════════ @celery_app.task(bind=True, name="tasks.ocr_task", queue="ocr") -def ocr_task(self, file_id, image_path, mode="text", - backend="paddle", ollama_model="granite3.2-vision", custom_prompt=""): - self.update_state(state="PROGRESS", meta={"progress": 8, "message": "엔진 준비 중..."}) +def ocr_task( + self, + file_id: str, + image_path: str, + mode: str = "text", + backend: str = "paddle", + ollama_model: str = "granite3.2-vision", + openrouter_model: str = "", + openrouter_url: str = "", + openrouter_key: str = "", + custom_prompt: str = "", +): + self.update_state(state="PROGRESS", meta={"progress":8,"message":"엔진 준비 중..."}) try: - if backend == "ollama": + if backend == "openrouter": + result = _run_openrouter(self, file_id, image_path, mode, + openrouter_model, openrouter_url, openrouter_key, custom_prompt) + elif backend == "ollama": result = _run_ollama(self, file_id, image_path, mode, ollama_model, custom_prompt) else: result = _run_paddle(self, file_id, image_path, mode) @@ -64,34 +76,124 @@ def ocr_task(self, file_id, image_path, mode="text", raise Exception(f"OCR 실패: {str(e)}") -_OLLAMA_PROMPTS = { +# ════════════════════════════════════════════════════════════════ +# OpenRouter Vision 백엔드 (OpenAI 호환) +# ════════════════════════════════════════════════════════════════ +_PROMPTS = { "text": "이 이미지에서 모든 텍스트를 정확하게 추출해줘. 원본의 줄 구분과 단락 구조를 유지해줘.", "structure": "이 이미지를 분석해서 표는 마크다운 표 형식으로, 나머지 텍스트는 원본 구조를 유지하며 추출해줘.", } +def _run_openrouter(task, file_id, image_path, mode, + model, base_url, api_key, custom_prompt): + if not api_key: + raise Exception("OpenRouter API 키가 설정되지 않았습니다") + if not model: + raise Exception("OpenRouter 모델이 선택되지 않았습니다") + + task.update_state(state="PROGRESS", + meta={"progress":15,"message":f"OpenRouter ({model}) 연결 중..."}) + + with open(image_path, "rb") as f: + raw = f.read() + + # 이미지 MIME 타입 감지 + ext = image_path.rsplit(".", 1)[-1].lower() + mime = {"jpg":"image/jpeg","jpeg":"image/jpeg","png":"image/png", + "bmp":"image/bmp","gif":"image/gif","webp":"image/webp"}.get(ext, "image/jpeg") + b64 = base64.b64encode(raw).decode() + data_url = f"data:{mime};base64,{b64}" + + prompt = custom_prompt.strip() or _PROMPTS.get(mode, _PROMPTS["text"]) + + task.update_state(state="PROGRESS", meta={"progress":30,"message":"모델 추론 중..."}) + + try: + resp = httpx.post( + f"{base_url.rstrip('/')}/chat/completions", + headers={ + "Authorization": f"Bearer {api_key}", + "HTTP-Referer": "https://voicescript.local", + "X-Title": "VoiceScript", + "Content-Type": "application/json", + }, + json={ + "model": model, + "messages": [{ + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": data_url}}, + {"type": "text", "text": prompt}, + ], + }], + "temperature": 0.1, + }, + timeout=float(OLLAMA_TIMEOUT), + ) + resp.raise_for_status() + except httpx.HTTPStatusError as e: + body = "" + try: body = e.response.json().get("error",{}).get("message","") + except: pass + if e.response.status_code == 400: + raise Exception(f"이 모델은 이미지를 지원하지 않습니다 — Vision 모델을 선택하세요\n({model})") + raise Exception(f"OpenRouter 오류 ({e.response.status_code}): {body or str(e)}") + except httpx.TimeoutException: + raise Exception(f"OpenRouter 응답 시간 초과. OLLAMA_TIMEOUT 값을 늘려주세요.") + + task.update_state(state="PROGRESS", meta={"progress":85,"message":"결과 저장 중..."}) + + full_text = resp.json()["choices"][0]["message"]["content"].strip() + if not full_text: + raise Exception("OpenRouter 빈 응답") + + tables = _parse_md_tables(full_text) if mode == "structure" else [] + os.makedirs(OUTPUT_DIR, exist_ok=True) + txt_file = f"{file_id}_ocr.txt" + with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f: + f.write(f"# OCR 결과 (OpenRouter / {model})\n\n{full_text}") + xlsx_file = None + if tables: + xlsx_file = f"{file_id}_tables.xlsx" + _save_excel(tables, os.path.join(OUTPUT_DIR, xlsx_file)) + tables_html = [_md_table_to_html(t) for t in tables] + lines = [{"text":l,"confidence":1.0,"bbox":[]} for l in full_text.splitlines() if l.strip()] + return { + "mode": mode, "backend": "openrouter", "openrouter_model": model, + "ollama_model": "", + "full_text": full_text, "lines": lines, "line_count": len(lines), + "txt_file": txt_file, + "tables": [{"html":h,"rows":len(t),"cols":max(len(r) for r in t) if t else 0} + for h, t in zip(tables_html, tables)], + "xlsx_file": xlsx_file, + } + + +# ════════════════════════════════════════════════════════════════ +# Ollama Vision 백엔드 +# ════════════════════════════════════════════════════════════════ def _run_ollama(task, file_id, image_path, mode, ollama_model, custom_prompt): task.update_state(state="PROGRESS", - meta={"progress": 15, "message": f"Ollama ({ollama_model}) 연결 중..."}) + meta={"progress":15,"message":f"Ollama ({ollama_model}) 연결 중..."}) with open(image_path, "rb") as f: img_b64 = base64.b64encode(f.read()).decode() - prompt = custom_prompt.strip() or _OLLAMA_PROMPTS.get(mode, _OLLAMA_PROMPTS["text"]) - task.update_state(state="PROGRESS", meta={"progress": 30, "message": "모델 추론 중..."}) + prompt = custom_prompt.strip() or _PROMPTS.get(mode, _PROMPTS["text"]) + task.update_state(state="PROGRESS", meta={"progress":30,"message":"모델 추론 중..."}) try: resp = httpx.post(f"{OLLAMA_URL}/api/chat", json={ "model": ollama_model, - "messages": [{"role": "user", "content": prompt, "images": [img_b64]}], - "stream": False, "options": {"temperature": 0.1}, + "messages": [{"role":"user","content":prompt,"images":[img_b64]}], + "stream": False, "options": {"temperature":0.1}, }, timeout=float(OLLAMA_TIMEOUT)) resp.raise_for_status() except httpx.ConnectError: raise Exception(f"Ollama 서버 연결 실패 ({OLLAMA_URL})") except httpx.TimeoutException: - raise Exception(f"Ollama 응답 시간 초과 ({OLLAMA_TIMEOUT}초). OLLAMA_TIMEOUT 값을 늘려주세요.") + raise Exception(f"Ollama 응답 시간 초과 ({OLLAMA_TIMEOUT}초)") - task.update_state(state="PROGRESS", meta={"progress": 85, "message": "결과 저장 중..."}) - full_text = resp.json().get("message", {}).get("content", "").strip() - if not full_text: - raise Exception("Ollama 빈 응답. 모델이 설치되어 있는지 확인하세요.") + task.update_state(state="PROGRESS", meta={"progress":85,"message":"결과 저장 중..."}) + full_text = resp.json().get("message",{}).get("content","").strip() + if not full_text: raise Exception("Ollama 빈 응답. 모델이 Vision을 지원하는지 확인하세요.") tables = _parse_md_tables(full_text) if mode == "structure" else [] os.makedirs(OUTPUT_DIR, exist_ok=True) @@ -103,74 +205,66 @@ def _run_ollama(task, file_id, image_path, mode, ollama_model, custom_prompt): xlsx_file = f"{file_id}_tables.xlsx" _save_excel(tables, os.path.join(OUTPUT_DIR, xlsx_file)) tables_html = [_md_table_to_html(t) for t in tables] - lines = [{"text": l, "confidence": 1.0, "bbox": []} - for l in full_text.splitlines() if l.strip()] + lines = [{"text":l,"confidence":1.0,"bbox":[]} for l in full_text.splitlines() if l.strip()] return { "mode": mode, "backend": "ollama", "ollama_model": ollama_model, + "openrouter_model": "", "full_text": full_text, "lines": lines, "line_count": len(lines), "txt_file": txt_file, - "tables": [{"html": h, "rows": len(t), "cols": max(len(r) for r in t) if t else 0} + "tables": [{"html":h,"rows":len(t),"cols":max(len(r) for r in t) if t else 0} for h, t in zip(tables_html, tables)], "xlsx_file": xlsx_file, } +# ════════════════════════════════════════════════════════════════ +# PaddleOCR 백엔드 +# ════════════════════════════════════════════════════════════════ def _run_paddle(task, file_id, image_path, mode): import cv2 img = cv2.imread(image_path) - if img is None: - raise ValueError("이미지를 읽을 수 없습니다") + if img is None: raise ValueError("이미지를 읽을 수 없습니다") os.makedirs(OUTPUT_DIR, exist_ok=True) - return _paddle_structure(task, file_id, img) if mode == "structure" \ - else _paddle_text(task, file_id, img) + return _paddle_structure(task, file_id, img) if mode == "structure" else _paddle_text(task, file_id, img) def _paddle_text(task, file_id, img): - task.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 인식 중..."}) + task.update_state(state="PROGRESS", meta={"progress":30,"message":"텍스트 인식 중..."}) result = get_ocr().ocr(img) - task.update_state(state="PROGRESS", meta={"progress": 80, "message": "결과 정리 중..."}) + task.update_state(state="PROGRESS", meta={"progress":80,"message":"결과 정리 중..."}) lines = [] if result and len(result) > 0: r = result[0] if isinstance(r, dict): - texts = r.get("rec_texts", []) - scores = r.get("rec_scores", []) - for text, conf in zip(texts, scores): - if text.strip(): - lines.append({"text": text, "confidence": round(float(conf), 3), "bbox": []}) + for text, conf in zip(r.get("rec_texts",[]), r.get("rec_scores",[])): + if text.strip(): lines.append({"text":text,"confidence":round(float(conf),3),"bbox":[]}) elif isinstance(r, list): for item in r: - if item and len(item) == 2: + if item and len(item)==2: _, (text, conf) = item - if text.strip(): - lines.append({"text": text, "confidence": round(float(conf), 3), "bbox": []}) + if text.strip(): lines.append({"text":text,"confidence":round(float(conf),3),"bbox":[]}) full_text = "\n".join(l["text"] for l in lines) - txt_file = f"{file_id}_ocr.txt" - with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f: - f.write(full_text) - return {"mode": "text", "backend": "paddle", "ollama_model": "", - "full_text": full_text, "lines": lines, - "line_count": len(lines), "txt_file": txt_file, - "tables": [], "xlsx_file": None} + txt_file = f"{file_id}_ocr.txt" + with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f: f.write(full_text) + return {"mode":"text","backend":"paddle","ollama_model":"","openrouter_model":"", + "full_text":full_text,"lines":lines,"line_count":len(lines), + "txt_file":txt_file,"tables":[],"xlsx_file":None} def _paddle_structure(task, file_id, img): - task.update_state(state="PROGRESS", meta={"progress": 20, "message": "레이아웃 분석 중..."}) + task.update_state(state="PROGRESS", meta={"progress":20,"message":"레이아웃 분석 중..."}) result = get_structure()(img) - task.update_state(state="PROGRESS", meta={"progress": 60, "message": "표 구조 추출 중..."}) + task.update_state(state="PROGRESS", meta={"progress":60,"message":"표 구조 추출 중..."}) text_blocks, tables_html, tables_data = [], [], [] for region in result: - rtype = region.get("type", "").lower() + rtype = region.get("type","").lower() if rtype == "table": - html = region.get("res", {}).get("html", "") - if html: - tables_html.append(html) - tables_data.append(_html_table_to_list(html)) - elif rtype in ("text", "title", "figure_caption"): - for line in (region.get("res", []) or []): - if isinstance(line, (list, tuple)) and len(line) == 2: - _, (text, _conf) = line - text_blocks.append(text) + html = region.get("res",{}).get("html","") + if html: tables_html.append(html); tables_data.append(_html_table_to_list(html)) + elif rtype in ("text","title","figure_caption"): + for line in (region.get("res",[]) or []): + if isinstance(line,(list,tuple)) and len(line)==2: + _, (text, _conf) = line; text_blocks.append(text) full_text = "\n".join(text_blocks) - task.update_state(state="PROGRESS", meta={"progress": 80, "message": "Excel 생성 중..."}) + task.update_state(state="PROGRESS", meta={"progress":80,"message":"Excel 생성 중..."}) xlsx_file = None if tables_data: xlsx_file = f"{file_id}_tables.xlsx" @@ -178,15 +272,17 @@ def _paddle_structure(task, file_id, img): txt_file = f"{file_id}_ocr.txt" with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f: f.write("# 텍스트\n\n" + full_text) - lines = [{"text": t, "confidence": 1.0, "bbox": []} for t in text_blocks] - tables_meta = [{"html": h, "rows": len(d), "cols": max(len(r) for r in d) if d else 0} + lines = [{"text":t,"confidence":1.0,"bbox":[]} for t in text_blocks] + tables_meta = [{"html":h,"rows":len(d),"cols":max(len(r) for r in d) if d else 0} for h, d in zip(tables_html, tables_data)] - return {"mode": "structure", "backend": "paddle", "ollama_model": "", - "full_text": full_text, "lines": lines, - "line_count": len(lines), "txt_file": txt_file, - "tables": tables_meta, "xlsx_file": xlsx_file} + return {"mode":"structure","backend":"paddle","ollama_model":"","openrouter_model":"", + "full_text":full_text,"lines":lines,"line_count":len(lines), + "txt_file":txt_file,"tables":tables_meta,"xlsx_file":xlsx_file} +# ════════════════════════════════════════════════════════════════ +# 공통 유틸 +# ════════════════════════════════════════════════════════════════ def _parse_md_tables(text): tables, current = [], [] for line in text.splitlines(): @@ -204,8 +300,8 @@ def _md_table_to_html(table): if not table: return "" rows = "" for i, row in enumerate(table): - tag = "th" if i == 0 else "td" - rows += "" + "".join(f"<{tag}>{c}" for c in row) + "" + tag = "th" if i==0 else "td" + rows += ""+"".join(f"<{tag}>{c}" for c in row)+"" return f"{rows}
" def _html_table_to_list(html): @@ -213,36 +309,31 @@ def _html_table_to_list(html): class P(HTMLParser): def __init__(self): super().__init__() - self.rows, self._row, self._cell, self._in = [], [], [], False - def handle_starttag(self, tag, attrs): - if tag == "tr": self._row = [] - elif tag in ("td","th"): self._cell = []; self._in = True - def handle_endtag(self, tag): - if tag in ("td","th"): self._row.append("".join(self._cell).strip()); self._in = False - elif tag == "tr": + self.rows,self._row,self._cell,self._in=[],[],[],False + def handle_starttag(self,tag,attrs): + if tag=="tr": self._row=[] + elif tag in("td","th"): self._cell=[];self._in=True + def handle_endtag(self,tag): + if tag in("td","th"): self._row.append("".join(self._cell).strip());self._in=False + elif tag=="tr": if self._row: self.rows.append(self._row) - def handle_data(self, data): + def handle_data(self,data): if self._in: self._cell.append(data) - p = P(); p.feed(html); return p.rows + p=P();p.feed(html);return p.rows def _save_excel(tables, path): - wb = openpyxl.Workbook(); wb.remove(wb.active) - for i, table in enumerate(tables, 1): - ws = wb.create_sheet(f"표 {i}") - thin = Side(style="thin", color="2A2A33") - bdr = Border(left=thin, right=thin, top=thin, bottom=thin) - for r_idx, row in enumerate(table, 1): - for c_idx, val in enumerate(row, 1): - cell = ws.cell(row=r_idx, column=c_idx, value=val) - cell.border = bdr - cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True) - if r_idx == 1: - cell.fill = PatternFill("solid", fgColor="1A1A2E") - cell.font = Font(color="00E5A0", bold=True, size=10) - else: - cell.font = Font(size=10) + wb=openpyxl.Workbook();wb.remove(wb.active) + for i,table in enumerate(tables,1): + ws=wb.create_sheet(f"표 {i}") + thin=Side(style="thin",color="2A2A33");bdr=Border(left=thin,right=thin,top=thin,bottom=thin) + for r_idx,row in enumerate(table,1): + for c_idx,val in enumerate(row,1): + cell=ws.cell(row=r_idx,column=c_idx,value=val) + cell.border=bdr;cell.alignment=Alignment(horizontal="center",vertical="center",wrap_text=True) + if r_idx==1: cell.fill=PatternFill("solid",fgColor="1A1A2E");cell.font=Font(color="00E5A0",bold=True,size=10) + else: cell.font=Font(size=10) for col in ws.columns: - w = max((len(str(c.value or "")) for c in col), default=8) - ws.column_dimensions[col[0].column_letter].width = min(w + 4, 40) + w=max((len(str(c.value or "")) for c in col),default=8) + ws.column_dimensions[col[0].column_letter].width=min(w+4,40) if not wb.sheetnames: wb.create_sheet("Sheet1") wb.save(path) diff --git a/app/static/index.html b/app/static/index.html index 9da9725..e6d41cb 100644 --- a/app/static/index.html +++ b/app/static/index.html @@ -251,6 +251,17 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border .ollama-status{font-family:var(--mono);font-size:.63rem;padding:4px 9px;border-radius:2px} .ollama-status.ok{background:rgba(0,229,160,.1);color:var(--accent);border:1px solid rgba(0,229,160,.2)} .ollama-status.fail{background:rgba(255,107,53,.1);color:var(--warn);border:1px solid rgba(255,107,53,.2)} +.openrouter-status.ok{background:rgba(77,166,255,.1);color:var(--blue);border:1px solid rgba(77,166,255,.2)} +.openrouter-status.fail{background:rgba(255,107,53,.1);color:var(--warn);border:1px solid rgba(255,107,53,.2)} +.or-section{margin-top:10px;padding:12px;background:var(--surf2);border:1px solid #1c2840;border-radius:4px} +.key-input-wrap{display:flex;gap:6px;margin-top:6px} +.key-input-wrap input{flex:1;background:var(--surf);border:1px solid var(--border2);color:var(--text);padding:9px 10px;border-radius:3px;font-family:var(--mono);font-size:.78rem;outline:none;-webkit-appearance:none} +.key-input-wrap input:focus{border-color:var(--blue)} +.btn-test{padding:9px 14px;background:none;border:1px solid #3a7cc4;color:var(--blue);border-radius:3px;font-family:var(--mono);font-size:.68rem;cursor:pointer;white-space:nowrap;transition:all .15s} +.btn-test:hover{background:rgba(77,166,255,.08)} +.or-model-tabs{display:flex;gap:5px;margin-top:8px;flex-wrap:wrap} +.or-model-tab{font-family:var(--mono);font-size:.6rem;padding:4px 10px;border:1px solid var(--border2);background:none;color:var(--muted);border-radius:2px;cursor:pointer;transition:all .12s;text-transform:uppercase} +.or-model-tab.active{border-color:var(--blue);color:var(--blue);background:rgba(77,166,255,.07)} /* ── ADMIN ── */ #page-admin{display:none;flex-direction:column} @@ -372,11 +383,17 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
+
후처리 모델
+
+
OpenRouter 후처리 모델
+ +
⚙️ 설정 → OpenRouter에서 API 키 및 기본 모델을 설정하세요
+
처리 중...0%
@@ -428,6 +445,7 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
+
Vision 모델
@@ -435,6 +453,13 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border ▶ 커스텀 프롬프트
+
+
OpenRouter Vision 모델
+ + ▶ 커스텀 프롬프트 + +
⚠️ Vision 기능을 지원하는 모델만 이미지 처리 가능 (Claude-3, GPT-4o, Gemini 등)
+
인식 모드
@@ -560,6 +585,40 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
+ +
+

🌐 OpenRouter 외부 AI 연동

+ +
+ + +
+ + + + + + +
+
@@ -643,6 +702,7 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border // ══ STATE ══ let token=null,currentUser=null,ollamaModels=[],appSettings={}; let sttFile=null,sttOutputFile=null,sttEngine='whisper'; +let orModels=[],orVisionModels=[],orTextModels=[]; let ocrFile=null,ocrOutputTxt=null,ocrOutputXlsx=null,ocrEngine='paddle',ocrMode='text'; let editTarget=null,sysTimer=null; let histPage=1,histType='',histTotal=0; @@ -721,6 +781,8 @@ function populateModelSelects(){ fill(document.getElementById('ocr-ollama-model'),appSettings.ocr_ollama_model,'설정 기본 모델 사용'); fill(document.getElementById('setting-stt-model'),appSettings.stt_ollama_model,'(없음)'); fill(document.getElementById('setting-ocr-model'),appSettings.ocr_ollama_model,'(없음)'); + // OpenRouter 드롭다운 + populateOrSelects(); } // ══ 설정 ══ @@ -729,7 +791,11 @@ async function loadSettings(){ const th=appSettings.cpu_threads||0;cpuSlider.value=th;cpuDisplay.textContent=th===0?'0 (자동)':th+' 스레드'; document.getElementById('stt-timeout').value=appSettings.stt_timeout||0; document.getElementById('ollama-timeout').value=appSettings.ollama_timeout||600; - populateModelSelects()}catch{} + if(appSettings.openrouter_url)document.getElementById('or-url').value=appSettings.openrouter_url; + if(appSettings.openrouter_api_key_masked)document.getElementById('or-api-key').placeholder='저장된 키: '+appSettings.openrouter_api_key_masked; + populateModelSelects(); + // 기존 OR 모델 로드 + if(appSettings.openrouter_api_key_masked)loadOrModels();}catch{} } document.getElementById('btn-save-settings').addEventListener('click',async()=>{ const fd=new FormData(); @@ -738,6 +804,10 @@ document.getElementById('btn-save-settings').addEventListener('click',async()=>{ fd.append('cpu_threads',cpuSlider.value); fd.append('stt_timeout',document.getElementById('stt-timeout').value||'0'); fd.append('ollama_timeout',document.getElementById('ollama-timeout').value||'600'); + fd.append('openrouter_url',document.getElementById('or-url').value||'https://openrouter.ai/api/v1'); + const orKey=document.getElementById('or-api-key').value.trim();if(orKey)fd.append('openrouter_api_key',orKey); + fd.append('openrouter_stt_model',document.getElementById('setting-or-stt-model').value); + fd.append('openrouter_ocr_model',document.getElementById('setting-or-ocr-model').value); try{const r=await api('POST','/api/settings',fd);if(r.ok){appSettings=(await r.json()).settings;const msg=document.getElementById('settings-msg');msg.style.display='block';setTimeout(()=>msg.style.display='none',3500)}}catch{} }); document.getElementById('btn-refresh-models').addEventListener('click',loadOllamaModels); @@ -762,10 +832,11 @@ sttDrop.addEventListener('dragover',e=>{e.preventDefault();sttDrop.classList.add sttDrop.addEventListener('dragleave',()=>sttDrop.classList.remove('dragover')); sttDrop.addEventListener('drop',e=>{e.preventDefault();sttDrop.classList.remove('dragover');setSttFile(e.dataTransfer.files[0])}); function setSttFile(f){if(!f)return;sttFile=f;showFileInfo('stt',f);document.getElementById('stt-btn').disabled=false;document.getElementById('stt-err').style.display='none'} -document.querySelectorAll('#page-stt .engine-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('#page-stt .engine-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');sttEngine=btn.dataset.engine;document.getElementById('stt-ollama-opts').classList.toggle('visible',sttEngine==='whisper+ollama');document.getElementById('stt-btn').className='btn-start '+(sttEngine==='whisper+ollama'?'purple':'green')})}); +document.querySelectorAll('#page-stt .engine-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('#page-stt .engine-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');sttEngine=btn.dataset.engine;document.getElementById('stt-ollama-opts').classList.toggle('visible',sttEngine==='whisper+ollama');document.getElementById('stt-or-opts').classList.toggle('visible',sttEngine==='whisper+openrouter');const isOr=sttEngine==='whisper+openrouter',isOllama=sttEngine==='whisper+ollama';document.getElementById('stt-btn').className='btn-start '+(isOr||isOllama?'purple':'green')})}); document.getElementById('stt-btn').addEventListener('click',async()=>{ if(!sttFile)return;document.getElementById('stt-err').style.display='none';setSttLoading(true); const fd=new FormData();fd.append('file',sttFile);fd.append('use_ollama',sttEngine==='whisper+ollama'?'true':'false');fd.append('ollama_model',document.getElementById('stt-ollama-model').value||''); + fd.append('use_openrouter',sttEngine==='whisper+openrouter'?'true':'false');fd.append('openrouter_model',document.getElementById('stt-or-model').value||''); try{const r=await api('POST','/api/transcribe',fd);const d=await r.json();if(!r.ok)throw new Error(d.detail||'업로드 실패');pollTask(d.task_id,dt=>setProg('stt',dt.progress||0,dt.message||'처리 중...'),showSttResult,e=>{showErr('stt-err',e);setSttLoading(false)})} catch(e){showErr('stt-err',e.message);setSttLoading(false)} }); @@ -793,12 +864,13 @@ ocrDrop.addEventListener('dragover',e=>{e.preventDefault();ocrDrop.classList.add ocrDrop.addEventListener('dragleave',()=>ocrDrop.classList.remove('dragover')); ocrDrop.addEventListener('drop',e=>{e.preventDefault();ocrDrop.classList.remove('dragover');setOcrFile(e.dataTransfer.files[0])}); function setOcrFile(f){if(!f)return;ocrFile=f;showFileInfo('ocr',f);document.getElementById('ocr-btn').disabled=false;document.getElementById('ocr-err').style.display='none';const p=document.getElementById('ocr-preview'),w=document.getElementById('ocr-preview-wrap');p.src=URL.createObjectURL(f);w.style.display='block'} -document.querySelectorAll('#page-ocr .engine-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('#page-ocr .engine-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');ocrEngine=btn.dataset.engine;document.getElementById('ocr-ollama-opts').classList.toggle('visible',ocrEngine==='ollama');document.getElementById('ocr-btn').className='btn-start '+(ocrEngine==='ollama'?'purple':'green')})}); +document.querySelectorAll('#page-ocr .engine-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('#page-ocr .engine-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');ocrEngine=btn.dataset.engine;document.getElementById('ocr-ollama-opts').classList.toggle('visible',ocrEngine==='ollama');document.getElementById('ocr-or-opts').classList.toggle('visible',ocrEngine==='openrouter');const isOr=ocrEngine==='openrouter',isOllama=ocrEngine==='ollama';document.getElementById('ocr-btn').className='btn-start '+(isOr||isOllama?'purple':'green')})}); document.getElementById('cprompt-toggle').addEventListener('click',()=>{const ta=document.getElementById('custom-prompt');const open=ta.style.display!=='block';ta.style.display=open?'block':'none';document.getElementById('cprompt-toggle').textContent=(open?'▼':'▶')+' 커스텀 프롬프트'}); +document.getElementById('cprompt-toggle-or').addEventListener('click',()=>{const ta=document.getElementById('custom-prompt-or');const open=ta.style.display!=='block';ta.style.display=open?'block':'none';document.getElementById('cprompt-toggle-or').textContent=(open?'▼':'▶')+' 커스텀 프롬프트'}); document.querySelectorAll('.mode-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('.mode-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');ocrMode=btn.dataset.mode;document.getElementById('mode-desc').textContent=ocrMode==='structure'?'표 구조를 감지하고 Excel로 저장합니다':'일반 텍스트와 글자를 인식합니다'})}); document.getElementById('ocr-btn').addEventListener('click',async()=>{ if(!ocrFile)return;document.getElementById('ocr-err').style.display='none';setOcrLoading(true); - const fd=new FormData();fd.append('file',ocrFile);fd.append('mode',ocrMode);fd.append('backend',ocrEngine);fd.append('ollama_model',document.getElementById('ocr-ollama-model').value||'');fd.append('custom_prompt',document.getElementById('custom-prompt').value||''); + const fd=new FormData();fd.append('file',ocrFile);fd.append('mode',ocrMode);fd.append('ollama_model',document.getElementById('ocr-ollama-model').value||'');fd.append('custom_prompt',document.getElementById('custom-prompt').value||''); try{const r=await api('POST','/api/ocr',fd);const d=await r.json();if(!r.ok)throw new Error(d.detail||'업로드 실패');pollTask(d.task_id,dt=>setProg('ocr',dt.progress||0,dt.message||'처리 중...'),showOcrResult,e=>{showErr('ocr-err',e);setOcrLoading(false)})} catch(e){showErr('ocr-err',e.message);setOcrLoading(false)} }); @@ -1067,6 +1139,72 @@ function fmtTime(s){const m=Math.floor(s/60),ss=Math.floor(s%60);return String(m function esc(s){return String(s||'').replace(/&/g,'&').replace(//g,'>')} async function copyText(text,btn){try{await navigator.clipboard.writeText(text);const o=btn.textContent;btn.textContent='복사됨 ✓';setTimeout(()=>btn.textContent=o,1500)}catch{}} +// ══ OPENROUTER ══ +async function loadOrModels(){ + try{ + const r=await api('GET','/api/openrouter/models');const d=await r.json(); + const wrap=document.getElementById('or-models-wrap'); + if(d.connected){ + orModels=d.models||[];orVisionModels=d.vision_models||[];orTextModels=d.text_models||[]; + wrap.style.display='block'; + document.getElementById('or-connected-badge').textContent=`✓ 연결됨 — Vision ${orVisionModels.length}개 / 전체 ${orModels.length}개`; + populateOrSelects('vision'); + } else { + wrap.style.display='none'; + } + }catch{} +} + +let orFilter='vision'; +document.querySelectorAll('.or-model-tab').forEach(btn=>{ + btn.addEventListener('click',()=>{ + document.querySelectorAll('.or-model-tab').forEach(b=>b.classList.remove('active')); + btn.classList.add('active');orFilter=btn.dataset.filter;populateOrSelects(orFilter); + }); +}); + +function populateOrSelects(filter){ + filter=filter||orFilter; + const list = filter==='vision'?orVisionModels:filter==='text'?orTextModels:orModels; + const fillOr=(sel,def)=>{ + const cur=sel.value||def||''; + sel.innerHTML=''; + list.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;sel.appendChild(o)}); + }; + const sttSel=document.getElementById('setting-or-stt-model'); + const ocrSel=document.getElementById('setting-or-ocr-model'); + const sttPage=document.getElementById('stt-or-model'); + const ocrPage=document.getElementById('ocr-or-model'); + if(sttSel)fillOr(sttSel,appSettings.openrouter_stt_model); + if(ocrSel){ + // OCR은 Vision만 + const vlist=filter==='text'?[]:orVisionModels; + const cur=ocrSel.value||appSettings.openrouter_ocr_model||''; + ocrSel.innerHTML=''; + vlist.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;ocrSel.appendChild(o)}); + } + if(sttPage)fillOr(sttPage,appSettings.openrouter_stt_model); + if(ocrPage){ + const cur=ocrPage.value||appSettings.openrouter_ocr_model||''; + ocrPage.innerHTML=''; + orVisionModels.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;ocrPage.appendChild(o)}); + } +} + +document.getElementById('btn-or-test').addEventListener('click',async()=>{ + const key=document.getElementById('or-api-key').value.trim(); + const url=document.getElementById('or-url').value.trim()||'https://openrouter.ai/api/v1'; + const result=document.getElementById('or-test-result'); + if(!key){result.style.display='block';result.style.color='var(--warn)';result.textContent='API 키를 입력하세요';return} + result.style.display='block';result.style.color='var(--muted)';result.textContent='연결 중...'; + try{ + const fd=new FormData();fd.append('api_key',key);fd.append('base_url',url); + const r=await api('POST','/api/openrouter/test',fd);const d=await r.json(); + result.style.color=d.ok?'var(--accent)':'var(--warn)';result.textContent=d.message; + if(d.ok)loadOrModels(); + }catch{result.style.color='var(--warn)';result.textContent='요청 실패'} +}); + checkAuth(); diff --git a/app/tasks.py b/app/tasks.py index d24d8fb..fff1fcd 100644 --- a/app/tasks.py +++ b/app/tasks.py @@ -15,15 +15,12 @@ OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434") OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "600")) _cpu_threads_env = int(os.getenv("CPU_THREADS", "0")) -CPU_THREADS = _cpu_threads_env if _cpu_threads_env > 0 else None # None = auto +CPU_THREADS = _cpu_threads_env if _cpu_threads_env > 0 else None celery_app = Celery("whisper_tasks", broker=REDIS_URL, backend=REDIS_URL) celery_app.conf.update( - task_serializer="json", - result_serializer="json", - accept_content=["json"], - task_track_started=True, - result_expires=3600, + task_serializer="json", result_serializer="json", + accept_content=["json"], task_track_started=True, result_expires=3600, ) _model = None @@ -33,92 +30,128 @@ def get_model(): if _model is None: from faster_whisper import WhisperModel kwargs = dict(device=DEVICE, compute_type=COMPUTE_TYPE) - if CPU_THREADS is not None: - kwargs["cpu_threads"] = CPU_THREADS + if CPU_THREADS is not None: kwargs["cpu_threads"] = CPU_THREADS print(f"[Whisper] 로딩: {MODEL_SIZE} / {DEVICE} / {COMPUTE_TYPE} / threads={CPU_THREADS or 'auto'}") _model = WhisperModel(MODEL_SIZE, **kwargs) print("[Whisper] 로드 완료") return _model +# ── 후처리: Ollama ──────────────────────────────────────────── def _ollama_postprocess(text: str, model: str) -> str: - if not model or not text.strip(): - return text + if not model or not text.strip(): return text prompt = ( "다음은 음성 인식으로 추출된 텍스트입니다. " "내용은 절대 변경하지 말고, 문장 부호를 추가하고 자연스럽게 다듬어줘. " - "결과 텍스트만 출력하고 설명은 하지 마.\n\n" - f"{text}" + "결과 텍스트만 출력하고 설명은 하지 마.\n\n" + text + ) + try: + resp = httpx.post(f"{OLLAMA_URL}/api/chat", json={ + "model": model, + "messages": [{"role":"user","content":prompt}], + "stream": False, "options": {"temperature": 0.1}, + }, timeout=float(OLLAMA_TIMEOUT)) + resp.raise_for_status() + result = resp.json().get("message",{}).get("content","").strip() + return result if result else text + except Exception as e: + print(f"[Ollama 후처리 실패] {e}"); return text + + +# ── 후처리: OpenRouter (OpenAI 호환) ───────────────────────── +def _openrouter_postprocess(text: str, model: str, base_url: str, api_key: str) -> str: + if not model or not api_key or not text.strip(): return text + prompt = ( + "다음은 음성 인식으로 추출된 텍스트입니다. " + "내용은 절대 변경하지 말고, 문장 부호를 추가하고 자연스럽게 다듬어줘. " + "결과 텍스트만 출력하고 설명은 하지 마.\n\n" + text ) try: resp = httpx.post( - f"{OLLAMA_URL}/api/chat", - json={"model": model, - "messages": [{"role": "user", "content": prompt}], - "stream": False, "options": {"temperature": 0.1}}, + f"{base_url.rstrip('/')}/chat/completions", + headers={ + "Authorization": f"Bearer {api_key}", + "HTTP-Referer": "https://voicescript.local", + "X-Title": "VoiceScript", + "Content-Type": "application/json", + }, + json={ + "model": model, + "messages": [{"role":"user","content":prompt}], + "temperature": 0.1, + }, timeout=float(OLLAMA_TIMEOUT), ) resp.raise_for_status() - result = resp.json().get("message", {}).get("content", "").strip() + result = resp.json()["choices"][0]["message"]["content"].strip() return result if result else text except Exception as e: - print(f"[Ollama 후처리 실패] {e}") - return text + print(f"[OpenRouter 후처리 실패] {e}"); return text +# ════════════════════════════════════════════════════════════════ +# STT Task +# ════════════════════════════════════════════════════════════════ @celery_app.task(bind=True, name="tasks.transcribe_task", queue="stt") -def transcribe_task(self, file_id: str, audio_path: str, - use_ollama: bool = False, ollama_model: str = ""): - self.update_state(state="PROGRESS", meta={"progress": 5, "message": "모델 준비 중..."}) +def transcribe_task( + self, + file_id: str, + audio_path: str, + use_ollama: bool = False, + ollama_model: str = "", + use_openrouter: bool = False, + openrouter_model: str = "", + openrouter_url: str = "", + openrouter_key: str = "", +): + self.update_state(state="PROGRESS", meta={"progress":5,"message":"모델 준비 중..."}) try: model = get_model() - self.update_state(state="PROGRESS", meta={"progress": 15, "message": "오디오 분석 중..."}) + self.update_state(state="PROGRESS", meta={"progress":15,"message":"오디오 분석 중..."}) segments_gen, info = model.transcribe( - audio_path, - language=LANGUAGE, - beam_size=BEAM_SIZE, - initial_prompt=INITIAL_PROMPT, - vad_filter=True, - vad_parameters=dict(min_silence_duration_ms=500), - word_timestamps=False, + audio_path, language=LANGUAGE, beam_size=BEAM_SIZE, + initial_prompt=INITIAL_PROMPT, vad_filter=True, + vad_parameters=dict(min_silence_duration_ms=500), word_timestamps=False, ) - self.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 변환 중..."}) - + self.update_state(state="PROGRESS", meta={"progress":30,"message":"텍스트 변환 중..."}) segments, parts = [], [] duration = info.duration for seg in segments_gen: - segments.append({"start": round(seg.start, 2), - "end": round(seg.end, 2), - "text": seg.text.strip()}) + segments.append({"start":round(seg.start,2),"end":round(seg.end,2),"text":seg.text.strip()}) parts.append(seg.text.strip()) if duration > 0: - pct = 30 + int((seg.end / duration) * 50) - self.update_state( - state="PROGRESS", - meta={"progress": min(pct, 80), - "message": f"변환 중... {seg.end:.0f}s / {duration:.0f}s"}, - ) + pct = 30 + int((seg.end/duration)*50) + self.update_state(state="PROGRESS", + meta={"progress":min(pct,80),"message":f"변환 중... {seg.end:.0f}s / {duration:.0f}s"}) raw_text = "\n".join(parts) full_text = raw_text + # Ollama 후처리 if use_ollama and ollama_model: self.update_state(state="PROGRESS", - meta={"progress": 85, - "message": f"Ollama({ollama_model}) 후처리 중..."}) + meta={"progress":85,"message":f"Ollama({ollama_model}) 후처리 중..."}) full_text = _ollama_postprocess(raw_text, ollama_model) - self.update_state(state="PROGRESS", meta={"progress": 95, "message": "파일 저장 중..."}) + # OpenRouter 후처리 + elif use_openrouter and openrouter_model and openrouter_key: + self.update_state(state="PROGRESS", + meta={"progress":85,"message":f"OpenRouter({openrouter_model}) 후처리 중..."}) + full_text = _openrouter_postprocess(raw_text, openrouter_model, openrouter_url, openrouter_key) + + self.update_state(state="PROGRESS", meta={"progress":95,"message":"파일 저장 중..."}) os.makedirs(OUTPUT_DIR, exist_ok=True) output_filename = f"{file_id}.txt" with open(os.path.join(OUTPUT_DIR, output_filename), "w", encoding="utf-8") as f: f.write(f"# 변환 결과\n# 언어: {info.language} | 재생 시간: {duration:.1f}초") if use_ollama and ollama_model: - f.write(f" | Ollama 후처리: {ollama_model}") + f.write(f" | Ollama: {ollama_model}") + elif use_openrouter and openrouter_model: + f.write(f" | OpenRouter: {openrouter_model}") f.write("\n\n## 전체 텍스트\n\n" + full_text + "\n\n") f.write("## 타임스탬프별 세그먼트\n\n") for seg in segments: @@ -128,14 +161,16 @@ def transcribe_task(self, file_id: str, audio_path: str, except: pass return { - "text": full_text, - "raw_text": raw_text, - "segments": segments, - "language": info.language, - "duration": round(duration, 1), - "output_file": output_filename, - "ollama_used": use_ollama and bool(ollama_model), - "ollama_model": ollama_model if (use_ollama and ollama_model) else "", + "text": full_text, + "raw_text": raw_text, + "segments": segments, + "language": info.language, + "duration": round(duration, 1), + "output_file": output_filename, + "ollama_used": use_ollama and bool(ollama_model), + "ollama_model": ollama_model if (use_ollama and ollama_model) else "", + "openrouter_used": use_openrouter and bool(openrouter_model) and bool(openrouter_key), + "openrouter_model": openrouter_model if (use_openrouter and openrouter_model) else "", } except Exception as e: