From c3cb7a6e8fac5d1f80c1b56dc790b3fb57e508a4 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 4 May 2026 08:12:59 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20VoiceScript=20STT+OCR=20=EC=9E=90?= =?UTF-8?q?=EB=A7=89=EA=B8=B0=EB=8A=A5=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/auth.py | 50 +- app/main.py | 317 ++++++---- app/static/index.html | 1302 ++++++++++++++++++++++++++--------------- app/tasks.py | 588 ++++++++++--------- 4 files changed, 1371 insertions(+), 886 deletions(-) diff --git a/app/auth.py b/app/auth.py index 03f4169..61c22d4 100644 --- a/app/auth.py +++ b/app/auth.py @@ -1,22 +1,11 @@ """ 인증 모듈 — 다중 사용자 JSON 파일 기반 -사용자 구조: -{ - "password": "...", - "role": "admin" | "user", - "permissions": { - "stt": true | false, - "ocr": true | false, - "allowed_stt_models": ["medium", "large-v3", ...], # 빈 배열 = 모두 허용 - "allowed_ocr_models": ["granite3.2-vision", ...] # 빈 배열 = 모두 허용 - } -} +권한: stt | ocr | subtitle """ import os, json, threading from pathlib import Path from datetime import datetime, timedelta - -from fastapi import Depends, HTTPException, status +from fastapi import Depends, HTTPException from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials from jose import JWTError, jwt @@ -32,20 +21,15 @@ USERS_FILE = DATA_DIR / "users.json" _lock = threading.Lock() bearer = HTTPBearer(auto_error=False) - -# ── 파일 I/O ────────────────────────────────────────────────── def _load() -> dict: if not USERS_FILE.exists(): return {} - with open(USERS_FILE, "r", encoding="utf-8") as f: - return json.load(f) + with open(USERS_FILE, "r", encoding="utf-8") as f: return json.load(f) def _save(users: dict): USERS_FILE.parent.mkdir(parents=True, exist_ok=True) with open(USERS_FILE, "w", encoding="utf-8") as f: json.dump(users, f, ensure_ascii=False, indent=2) - -# ── 초기화 ──────────────────────────────────────────────────── def init_users(): with _lock: users = _load() @@ -53,15 +37,12 @@ def init_users(): "password": ADMIN_PASSWORD, "role": "admin", "permissions": { - "stt": True, "ocr": True, - "allowed_stt_models": [], # 빈 배열 = 제한 없음 - "allowed_ocr_models": [], + "stt": True, "ocr": True, "subtitle": True, + "allowed_stt_models": [], "allowed_ocr_models": [], }, } _save(users) - -# ── CRUD ────────────────────────────────────────────────────── def authenticate(username: str, password: str): with _lock: users = _load() u = users.get(username) @@ -80,9 +61,9 @@ def create_user(username: str, password: str, permissions: dict) -> tuple: with _lock: users = _load() if username in users: return False, "이미 존재하는 사용자입니다" - # 기본값 보완 permissions.setdefault("allowed_stt_models", []) permissions.setdefault("allowed_ocr_models", []) + permissions.setdefault("subtitle", False) users[username] = {"password": password, "role": "user", "permissions": permissions} _save(users) return True, "사용자가 생성되었습니다" @@ -94,6 +75,7 @@ def update_user(username: str, permissions: dict, password: str = None) -> tuple if username not in users: return False, "사용자를 찾을 수 없습니다" permissions.setdefault("allowed_stt_models", []) permissions.setdefault("allowed_ocr_models", []) + permissions.setdefault("subtitle", False) users[username]["permissions"] = permissions if password: users[username]["password"] = password _save(users) @@ -107,14 +89,10 @@ def delete_user(username: str) -> tuple: del users[username]; _save(users) return True, "삭제되었습니다" - -# ── JWT ─────────────────────────────────────────────────────── def create_access_token(username: str) -> str: exp = datetime.utcnow() + timedelta(hours=EXPIRE_HOURS) return jwt.encode({"sub": username, "exp": exp}, SECRET_KEY, algorithm=ALGORITHM) - -# ── FastAPI 의존성 ───────────────────────────────────────────── def require_auth(credentials: HTTPAuthorizationCredentials = Depends(bearer)) -> dict: if credentials is None: raise HTTPException(401, "인증이 필요합니다", headers={"WWW-Authenticate": "Bearer"}) @@ -126,16 +104,24 @@ def require_auth(credentials: HTTPAuthorizationCredentials = Depends(bearer)) -> if not u: raise JWTError() return {"username": username, **u} except JWTError: - raise HTTPException(401, "토큰이 유효하지 않거나 만료되었습니다", headers={"WWW-Authenticate": "Bearer"}) + raise HTTPException(401, "토큰이 유효하지 않거나 만료되었습니다", + headers={"WWW-Authenticate": "Bearer"}) def require_admin(user: dict = Depends(require_auth)) -> dict: if user.get("role") != "admin": raise HTTPException(403, "관리자 권한이 필요합니다") return user def require_stt(user: dict = Depends(require_auth)) -> dict: - if not user.get("permissions", {}).get("stt", False): raise HTTPException(403, "STT 사용 권한이 없습니다") + if not user.get("permissions", {}).get("stt", False): + raise HTTPException(403, "STT 사용 권한이 없습니다") return user def require_ocr(user: dict = Depends(require_auth)) -> dict: - if not user.get("permissions", {}).get("ocr", False): raise HTTPException(403, "OCR 사용 권한이 없습니다") + if not user.get("permissions", {}).get("ocr", False): + raise HTTPException(403, "OCR 사용 권한이 없습니다") + return user + +def require_subtitle(user: dict = Depends(require_auth)) -> dict: + if not user.get("permissions", {}).get("subtitle", False): + raise HTTPException(403, "자막 사용 권한이 없습니다") return user diff --git a/app/main.py b/app/main.py index 9fca6a8..e9fbe12 100644 --- a/app/main.py +++ b/app/main.py @@ -8,7 +8,7 @@ from fastapi.responses import FileResponse from typing import List from auth import (authenticate, create_access_token, init_users, - require_auth, require_admin, require_stt, require_ocr, + require_auth, require_admin, require_stt, require_ocr, require_subtitle, list_users, create_user, update_user, delete_user) from tasks import celery_app, transcribe_task, subtitle_pipeline_task from ocr_tasks import ocr_task @@ -24,26 +24,26 @@ OUTPUT_KEEP_SECS = int(os.getenv("OUTPUT_KEEP_HOURS", "48")) * 3600 DATA_DIR = Path(UPLOAD_DIR).parent SETTINGS_FILE = DATA_DIR / "settings.json" HISTORY_FILE = DATA_DIR / "history.json" -HISTORY_MAX = 300 +HISTORY_MAX = 500 os.makedirs(UPLOAD_DIR, exist_ok=True) os.makedirs(OUTPUT_DIR, exist_ok=True) AUDIO_EXT = {"mp3","mp4","wav","m4a","ogg","flac","aac","wma","webm", - "mkv","avi","mov","ts","mts","m2ts","wmv","flv","h264","h265","hevc","264","265"} + "mkv","avi","mov","ts","mts","m2ts","wmv","flv","h264","h265","hevc","264","265","m4v"} IMAGE_EXT = {"jpg","jpeg","png","bmp","tiff","tif","webp","gif"} -VIDEO_EXT = {"mp4","mkv","avi","mov","webm","ts","mts","m2ts","wmv","flv","h264","h265","hevc","264","265","m4v","3gp","rm","rmvb"} _DEFAULT_SETTINGS = { "stt_ollama_model":"","ocr_ollama_model":"granite3.2-vision:latest", - "cpu_threads":0,"stt_timeout":0,"ollama_timeout":600, + "cpu_threads":0,"stt_timeout":0,"ollama_timeout":600,"subtitle_timeout":600, "openrouter_url":"https://openrouter.ai/api/v1", "openrouter_api_key":"","openrouter_stt_model":"","openrouter_ocr_model":"", + "groq_api_key":"","openai_api_key":"","default_stt_engine":"local", } _hist_lock = threading.Lock() -# ── 설정 I/O ───────────────────────────────────────────────── +# ── 설정 I/O ────────────────────────────────────────────────── def _load_settings() -> dict: if not SETTINGS_FILE.exists(): return dict(_DEFAULT_SETTINGS) with open(SETTINGS_FILE,"r",encoding="utf-8") as f: data=json.load(f) @@ -54,8 +54,15 @@ def _save_settings(data:dict): SETTINGS_FILE.parent.mkdir(parents=True,exist_ok=True) with open(SETTINGS_FILE,"w",encoding="utf-8") as f: json.dump(data,f,ensure_ascii=False,indent=2) +def _mask(key:str)->str: + if not key: return "" + return key[:6]+"..."+(key[-4:] if len(key)>10 else "") -# ── 이력 I/O ───────────────────────────────────────────────── +def _keep(new_val:str, field:str, current:dict)->str: + return new_val.strip() if new_val.strip() else current.get(field,"") + + +# ── 이력 I/O ────────────────────────────────────────────────── def _load_history()->list: with _hist_lock: if not HISTORY_FILE.exists(): return [] @@ -83,10 +90,10 @@ def _update_history_by_task(task_id:str, result:dict, success:bool, error_msg:st with open(HISTORY_FILE,"r",encoding="utf-8") as f: history=json.load(f) for h in history: if h.get("task_id")!=task_id: continue - if h.get("status")!="processing": break + if h.get("status") not in ("processing","cancelled"): break h["status"]="failed" if not success else "success" if not success: - h["output"]={"error":error_msg[:300]} + h["output"]={"error":error_msg[:500]} elif h["type"]=="stt": text=result.get("text","") h["output"]={ @@ -99,14 +106,17 @@ def _update_history_by_task(task_id:str, result:dict, success:bool, error_msg:st "ollama_model":result.get("ollama_model",""), "openrouter_used":result.get("openrouter_used",False), "openrouter_model":result.get("openrouter_model",""), + "stt_engine":result.get("stt_engine","local"), } elif h["type"]=="subtitle": h["output"]={ "detected_language":result.get("detected_language",""), "duration_s":result.get("duration",0), "segment_count":result.get("segment_count",0), + "stt_engine":result.get("stt_engine","local"), "translated":result.get("translated",False), "translate_to":result.get("translate_to",""), + "refine_model":result.get("refine_model",""), "srt_orig":result.get("srt_orig",""), "vtt_orig":result.get("vtt_orig",""), "srt_trans":result.get("srt_trans",""), @@ -163,7 +173,7 @@ def login(username:str=Form(...),password:str=Form(...)): @app.get("/api/me") def me(user:dict=Depends(require_auth)): return {"username":user["username"],"role":user.get("role","user"), - "permissions":user.get("permissions",{"stt":False,"ocr":False})} + "permissions":user.get("permissions",{"stt":False,"ocr":False,"subtitle":False})} # ════════════════════════════════════════════════════════════════ @@ -180,17 +190,72 @@ def system_info(user:dict=Depends(require_auth)): "cpu_percent":psutil.cpu_percent(interval=0.3), "cpu_threads_setting":s.get("cpu_threads",0), "stt_timeout":s.get("stt_timeout",0),"ollama_timeout":s.get("ollama_timeout",600), + "subtitle_timeout":s.get("subtitle_timeout",600), + } + +@app.get("/api/stt-engines") +def stt_engines(user:dict=Depends(require_auth)): + s=_load_settings() + return { + "local":{"available":True}, + "groq":{"available":True,"key_set":bool(s.get("groq_api_key",""))}, + "openai":{"available":True,"key_set":bool(s.get("openai_api_key",""))}, + "default":s.get("default_stt_engine","local"), } # ════════════════════════════════════════════════════════════════ -# STT 단일 / 배치 +# 작업 상태 / 취소 # ════════════════════════════════════════════════════════════════ -async def _dispatch_stt(request,files,use_ollama,ollama_model,use_openrouter,openrouter_model,user): +@app.get("/api/status/{task_id}") +def get_status(task_id:str, user:dict=Depends(require_auth)): + r=celery_app.AsyncResult(task_id) + if r.state=="PENDING": return {"state":"pending","progress":0,"message":"대기 중..."} + if r.state=="PROGRESS": + m=r.info or {} + return {"state":"progress","progress":m.get("progress",0), + "step":m.get("step",0),"step_msg":m.get("step_msg",""), + "message":m.get("message","처리 중...")} + if r.state=="SUCCESS": + _update_history_by_task(task_id,r.result or {},True) + return {"state":"success","progress":100,**(r.result or {})} + if r.state=="FAILURE": + _update_history_by_task(task_id,{},False,str(r.info)) + return {"state":"failure","progress":0,"message":str(r.info)} + if r.state=="REVOKED": + return {"state":"cancelled","progress":0,"message":"작업이 취소되었습니다"} + return {"state":r.state.lower(),"progress":0} + +@app.post("/api/cancel/{task_id}") +def cancel_task(task_id:str, user:dict=Depends(require_auth)): + """작업 취소 (Celery revoke)""" + try: + celery_app.control.revoke(task_id, terminate=True, signal="SIGTERM") + # 이력에 취소 표시 + with _hist_lock: + if HISTORY_FILE.exists(): + with open(HISTORY_FILE,"r",encoding="utf-8") as f: history=json.load(f) + for h in history: + if h.get("task_id")==task_id and h.get("status")=="processing": + h["status"]="cancelled" + h["output"]={"error":"사용자가 취소했습니다"} + break + _write_history(history) + return {"ok":True,"message":"취소 요청 전송됨"} + except Exception as e: + return {"ok":False,"message":str(e)} + + +# ════════════════════════════════════════════════════════════════ +# STT +# ════════════════════════════════════════════════════════════════ +async def _dispatch_stt(request,files,use_ollama,ollama_model,use_openrouter,openrouter_model, + stt_engine,stt_language,user): s=_load_settings() _uo=use_ollama.lower()=="true"; _uor=use_openrouter.lower()=="true" if _uo and not ollama_model.strip(): ollama_model=s.get("stt_ollama_model","") if _uor and not openrouter_model.strip():openrouter_model=s.get("openrouter_stt_model","") + if not stt_engine: stt_engine=s.get("default_stt_engine","local") results=[] for file in files: _check_size(request) @@ -200,13 +265,18 @@ async def _dispatch_stt(request,files,use_ollama,ollama_model,use_openrouter,ope file_id=str(uuid.uuid4()) save_path=os.path.join(UPLOAD_DIR,f"{file_id}.{ext}") await _save_upload(file,save_path); file_size=os.path.getsize(save_path) - task=transcribe_task.delay(file_id,save_path,_uo,ollama_model,_uor,openrouter_model, - s.get("openrouter_url",""),s.get("openrouter_api_key","")) + task=transcribe_task.delay( + file_id,save_path,_uo,ollama_model,_uor,openrouter_model, + s.get("openrouter_url",""),s.get("openrouter_api_key",""), + stt_engine,s.get("groq_api_key",""),s.get("openai_api_key",""),stt_language or "", + ) append_history({"id":file_id,"task_id":task.id,"type":"stt","status":"processing", "timestamp":datetime.now().strftime("%Y-%m-%d %H:%M:%S"),"username":user["username"], "input":{"filename":file.filename,"size_bytes":file_size,"format":ext.upper()}, - "settings":{"model":os.getenv("WHISPER_MODEL","medium"),"language":os.getenv("WHISPER_LANGUAGE","ko"), - "compute_type":os.getenv("WHISPER_COMPUTE_TYPE","int8"),"cpu_threads":s.get("cpu_threads",0), + "settings":{"model":os.getenv("WHISPER_MODEL","medium"), + "language":stt_language or os.getenv("WHISPER_LANGUAGE","ko"), + "compute_type":os.getenv("WHISPER_COMPUTE_TYPE","int8"), + "cpu_threads":s.get("cpu_threads",0),"stt_engine":stt_engine, "use_ollama":_uo,"ollama_model":ollama_model if _uo else "", "use_openrouter":_uor,"openrouter_model":openrouter_model if _uor else ""}, "output":None}) @@ -217,87 +287,79 @@ async def _dispatch_stt(request,files,use_ollama,ollama_model,use_openrouter,ope async def transcribe(request:Request,file:UploadFile=File(...), use_ollama:str=Form("false"),ollama_model:str=Form(""), use_openrouter:str=Form("false"),openrouter_model:str=Form(""), + stt_engine:str=Form(""),stt_language:str=Form(""), user:dict=Depends(require_stt)): - items=await _dispatch_stt(request,[file],use_ollama,ollama_model,use_openrouter,openrouter_model,user) + items=await _dispatch_stt(request,[file],use_ollama,ollama_model,use_openrouter,openrouter_model,stt_engine,stt_language,user) return items[0] @app.post("/api/transcribe/batch") async def transcribe_batch(request:Request,files:List[UploadFile]=File(...), use_ollama:str=Form("false"),ollama_model:str=Form(""), use_openrouter:str=Form("false"),openrouter_model:str=Form(""), + stt_engine:str=Form(""),stt_language:str=Form(""), user:dict=Depends(require_stt)): if not files: raise HTTPException(400,"파일이 없습니다") if len(files)>20: raise HTTPException(400,"최대 20개까지") - items=await _dispatch_stt(request,files,use_ollama,ollama_model,use_openrouter,openrouter_model,user) + items=await _dispatch_stt(request,files,use_ollama,ollama_model,use_openrouter,openrouter_model,stt_engine,stt_language,user) return {"items":items,"total":len(items)} # ════════════════════════════════════════════════════════════════ -# 자막 파이프라인 (영상 → SRT/VTT) +# 자막 # ════════════════════════════════════════════════════════════════ @app.post("/api/subtitle") async def create_subtitle( - request: Request, - file: UploadFile = File(...), - src_language: str = Form(""), # 원어 (빈칸=자동) - subtitle_fmt: str = Form("srt"), # srt | vtt | both - translate_to: str = Form(""), # 번역 대상 언어 (빈칸=번역 안 함) - trans_model: str = Form(""), # 번역 모델 - trans_via: str = Form("ollama"), # ollama | openrouter - user: dict = Depends(require_stt), + request:Request, file:UploadFile=File(...), + src_language:str=Form(""),subtitle_fmt:str=Form("srt"), + stt_engine:str=Form("local"), + refine_model:str=Form(""),refine_via:str=Form("ollama"), + translate_to:str=Form(""),trans_model:str=Form(""),trans_via:str=Form("ollama"), + user:dict=Depends(require_subtitle), ): _check_size(request) - ext = _ext(file.filename) - # 영상 + 오디오 모두 허용 (오디오만 있어도 자막 생성 가능) - if ext not in AUDIO_EXT: - raise HTTPException(400, f"지원하지 않는 형식입니다. 영상/오디오 파일을 업로드하세요.") - if subtitle_fmt not in ("srt","vtt","both"): subtitle_fmt = "srt" - - s = _load_settings() - # 번역 모델 미지정 시 설정에서 가져옴 + ext=_ext(file.filename) + if ext not in AUDIO_EXT: raise HTTPException(400,"지원하지 않는 형식입니다") + if subtitle_fmt not in ("srt","vtt","both"): subtitle_fmt="srt" + s=_load_settings() + if not stt_engine: stt_engine=s.get("default_stt_engine","local") + if not refine_model.strip(): + refine_model=(s.get("openrouter_stt_model","") if refine_via=="openrouter" + else s.get("stt_ollama_model","")) if not trans_model.strip(): - trans_model = (s.get("openrouter_stt_model","") if trans_via=="openrouter" - else s.get("stt_ollama_model","")) - - file_id = str(uuid.uuid4()) - save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}") - await _save_upload(file, save_path) - file_size = os.path.getsize(save_path) - - task = subtitle_pipeline_task.delay( - file_id, save_path, - src_language, subtitle_fmt, - translate_to, trans_model, trans_via, - s.get("openrouter_url",""), s.get("openrouter_api_key",""), + trans_model=(s.get("openrouter_stt_model","") if trans_via=="openrouter" + else s.get("stt_ollama_model","")) + file_id=str(uuid.uuid4()) + save_path=os.path.join(UPLOAD_DIR,f"{file_id}.{ext}") + await _save_upload(file,save_path) + file_size=os.path.getsize(save_path) + subtitle_timeout=int(s.get("subtitle_timeout",600)) + task=subtitle_pipeline_task.delay( + file_id,save_path,src_language,subtitle_fmt, + stt_engine,s.get("groq_api_key",""),s.get("openai_api_key",""), + refine_model,refine_via,translate_to,trans_model,trans_via, + s.get("openrouter_url",""),s.get("openrouter_api_key",""), + subtitle_timeout, ) - - append_history({ - "id": file_id, "task_id": task.id, "type": "subtitle", - "status": "processing", - "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - "username": user["username"], - "input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()}, - "settings": { - "src_language": src_language or "auto", - "subtitle_fmt": subtitle_fmt, - "translate_to": translate_to, - "trans_model": trans_model, - "trans_via": trans_via, - }, - "output": None, - }) - return {"task_id": task.id, "file_id": file_id, "filename": file.filename} + append_history({"id":file_id,"task_id":task.id,"type":"subtitle","status":"processing", + "timestamp":datetime.now().strftime("%Y-%m-%d %H:%M:%S"),"username":user["username"], + "input":{"filename":file.filename,"size_bytes":file_size,"format":ext.upper()}, + "settings":{"src_language":src_language or "auto","subtitle_fmt":subtitle_fmt, + "stt_engine":stt_engine,"refine_model":refine_model,"refine_via":refine_via, + "translate_to":translate_to,"trans_model":trans_model,"trans_via":trans_via, + "subtitle_timeout":subtitle_timeout}, + "output":None}) + return {"task_id":task.id,"file_id":file_id,"filename":file.filename} # ════════════════════════════════════════════════════════════════ -# OCR 단일 / 배치 +# OCR # ════════════════════════════════════════════════════════════════ async def _dispatch_ocr(request,files,mode,backend,ollama_model,openrouter_model,custom_prompt,user): if mode not in ("text","structure"): mode="text" if backend not in ("paddle","ollama","openrouter"): backend="paddle" s=_load_settings() - if backend=="ollama" and not ollama_model.strip(): ollama_model=s.get("ocr_ollama_model","granite3.2-vision:latest") - if backend=="openrouter" and not openrouter_model.strip():openrouter_model=s.get("openrouter_ocr_model","") + if backend=="ollama" and not ollama_model.strip(): ollama_model=s.get("ocr_ollama_model","granite3.2-vision:latest") + if backend=="openrouter" and not openrouter_model.strip():openrouter_model=s.get("openrouter_ocr_model","") results=[] for file in files: _check_size(request) @@ -315,7 +377,7 @@ async def _dispatch_ocr(request,files,mode,backend,ollama_model,openrouter_model "settings":{"backend":backend,"mode":mode,"ocr_lang":os.getenv("OCR_LANG","korean"), "ollama_model":ollama_model if backend=="ollama" else "", "openrouter_model":openrouter_model if backend=="openrouter" else "", - "ollama_timeout":s.get("ollama_timeout",600),"custom_prompt":custom_prompt[:200] if custom_prompt else ""}, + "custom_prompt":custom_prompt[:200] if custom_prompt else ""}, "output":None}) results.append({"task_id":task.id,"file_id":file_id,"filename":file.filename}) return results @@ -340,17 +402,8 @@ async def ocr_batch(request:Request,files:List[UploadFile]=File(...), # ════════════════════════════════════════════════════════════════ -# 상태 / 이력 / 다운로드 / Ollama / OpenRouter / 설정 / 관리자 +# 이력 # ════════════════════════════════════════════════════════════════ -@app.get("/api/status/{task_id}") -def get_status(task_id:str,user:dict=Depends(require_auth)): - r=celery_app.AsyncResult(task_id) - if r.state=="PENDING": return {"state":"pending","progress":0,"message":"대기 중..."} - if r.state=="PROGRESS": m=r.info or {};return {"state":"progress","progress":m.get("progress",0),"step":m.get("step",0),"step_msg":m.get("step_msg",""),"message":m.get("message","처리 중...")} - if r.state=="SUCCESS": _update_history_by_task(task_id,r.result or {},True);return {"state":"success","progress":100,**(r.result or {})} - if r.state=="FAILURE": _update_history_by_task(task_id,{},False,str(r.info));return {"state":"failure","progress":0,"message":str(r.info)} - return {"state":r.state.lower(),"progress":0} - @app.get("/api/history") def get_history(page:int=1,per_page:int=15,type_:str="",user:dict=Depends(require_auth)): history=_load_history() @@ -366,8 +419,12 @@ def delete_history(history_id:str,user:dict=Depends(require_auth)): @app.delete("/api/history") def clear_all_history(user:dict=Depends(require_admin)): - clear_history();return {"ok":True} + clear_history(); return {"ok":True} + +# ════════════════════════════════════════════════════════════════ +# 다운로드 / Ollama / OpenRouter / 설정 / 관리자 +# ════════════════════════════════════════════════════════════════ @app.get("/api/download/{filename}") def download(filename:str,user:dict=Depends(require_auth)): if ".." in filename or "/" in filename: raise HTTPException(400,"잘못된 파일명") @@ -382,79 +439,102 @@ def download(filename:str,user:dict=Depends(require_auth)): @app.get("/api/ollama/models") def ollama_models(user:dict=Depends(require_auth)): try: - resp=httpx.get(f"{OLLAMA_URL}/api/tags",timeout=8.0);resp.raise_for_status() + resp=httpx.get(f"{OLLAMA_URL}/api/tags",timeout=8.0); resp.raise_for_status() return {"models":[m["name"] for m in resp.json().get("models",[])], "connected":True} except Exception as e: return {"models":[],"connected":False,"error":str(e)} @app.get("/api/openrouter/models") def openrouter_models(user:dict=Depends(require_auth)): - s=_load_settings();api_key=s.get("openrouter_api_key","");base_url=s.get("openrouter_url","https://openrouter.ai/api/v1").rstrip("/") - if not api_key: return {"models":[],"connected":False,"error":"API 키가 설정되지 않았습니다"} + s=_load_settings(); api_key=s.get("openrouter_api_key","") + base_url=s.get("openrouter_url","https://openrouter.ai/api/v1").rstrip("/") + if not api_key: return {"models":[],"vision_models":[],"text_models":[],"connected":False,"error":"API 키가 설정되지 않았습니다"} try: - resp=httpx.get(f"{base_url}/models",headers={"Authorization":f"Bearer {api_key}","HTTP-Referer":"https://voicescript.local"},timeout=12.0) + resp=httpx.get(f"{base_url}/models", + headers={"Authorization":f"Bearer {api_key}","HTTP-Referer":"https://voicescript.local"},timeout=12.0) resp.raise_for_status() all_models=resp.json().get("data",[]) - vision=[m["id"] for m in all_models if any(k in m["id"].lower() for k in ["vision","claude-3","gemini","gpt-4o","llava","pixtral","qwen-vl","deepseek-vl"])] - return {"models":[m["id"] for m in all_models],"vision_models":vision,"connected":True,"total":len(all_models)} - except httpx.HTTPStatusError as e: return {"models":[],"connected":False,"error":f"HTTP {e.response.status_code}"} - except Exception as e: return {"models":[],"connected":False,"error":str(e)} + vision=[m["id"] for m in all_models if any(k in m["id"].lower() + for k in ["vision","claude-3","gemini","gpt-4o","llava","pixtral","qwen-vl","deepseek-vl"])] + text=[m["id"] for m in all_models if m["id"] not in vision] + return {"models":[m["id"] for m in all_models],"vision_models":vision,"text_models":text, + "connected":True,"total":len(all_models)} + except httpx.HTTPStatusError as e: return {"models":[],"vision_models":[],"text_models":[],"connected":False,"error":f"HTTP {e.response.status_code}"} + except Exception as e: return {"models":[],"vision_models":[],"text_models":[],"connected":False,"error":str(e)} @app.post("/api/openrouter/test") def openrouter_test(api_key:str=Form(...),base_url:str=Form("https://openrouter.ai/api/v1"),user:dict=Depends(require_auth)): try: - resp=httpx.get(f"{base_url.rstrip('/')}/models",headers={"Authorization":f"Bearer {api_key}","HTTP-Referer":"https://voicescript.local"},timeout=10.0) - resp.raise_for_status();count=len(resp.json().get("data",[])) + resp=httpx.get(f"{base_url.rstrip('/')}/models", + headers={"Authorization":f"Bearer {api_key}","HTTP-Referer":"https://voicescript.local"},timeout=10.0) + resp.raise_for_status(); count=len(resp.json().get("data",[])) return {"ok":True,"message":f"연결 성공 — {count}개 모델 사용 가능"} except httpx.HTTPStatusError as e: return {"ok":False,"message":f"인증 실패 (HTTP {e.response.status_code})"} except Exception as e: return {"ok":False,"message":f"연결 실패: {str(e)}"} @app.get("/api/settings") def get_settings(user:dict=Depends(require_auth)): - s=_load_settings();result=dict(s) - if result.get("openrouter_api_key"): - key=result["openrouter_api_key"] - result["openrouter_api_key_masked"]=key[:8]+"..."+key[-4:] if len(key)>12 else "****" - else: result["openrouter_api_key_masked"]="" - result["openrouter_api_key"]="";return result + s=_load_settings(); result=dict(s) + for field in ("openrouter_api_key","groq_api_key","openai_api_key"): + result[field+"_masked"]=_mask(result.get(field,"")) + result[field]="" + return result @app.post("/api/settings") def save_settings_endpoint( stt_ollama_model:str=Form(""),ocr_ollama_model:str=Form(""), - cpu_threads:str=Form("0"),stt_timeout:str=Form("0"),ollama_timeout:str=Form("600"), - openrouter_url:str=Form("https://openrouter.ai/api/v1"),openrouter_api_key:str=Form(""), - openrouter_stt_model:str=Form(""),openrouter_ocr_model:str=Form(""), + cpu_threads:str=Form("0"),stt_timeout:str=Form("0"), + ollama_timeout:str=Form("600"),subtitle_timeout:str=Form("600"), + openrouter_url:str=Form("https://openrouter.ai/api/v1"), + openrouter_api_key:str=Form(""),openrouter_stt_model:str=Form(""),openrouter_ocr_model:str=Form(""), + groq_api_key:str=Form(""),openai_api_key:str=Form(""), + default_stt_engine:str=Form("local"), user:dict=Depends(require_auth), ): def _int(v,d): try: return max(0,int(v)) except: return d current=_load_settings() - final_key=openrouter_api_key.strip() if openrouter_api_key.strip() else current.get("openrouter_api_key","") - data={"stt_ollama_model":stt_ollama_model,"ocr_ollama_model":ocr_ollama_model, - "cpu_threads":_int(cpu_threads,0),"stt_timeout":_int(stt_timeout,0),"ollama_timeout":_int(ollama_timeout,600), - "openrouter_url":openrouter_url.strip() or "https://openrouter.ai/api/v1", - "openrouter_api_key":final_key,"openrouter_stt_model":openrouter_stt_model,"openrouter_ocr_model":openrouter_ocr_model} - _save_settings(data);return {"ok":True,"settings":{k:v for k,v in data.items() if k!="openrouter_api_key"}} + data={ + "stt_ollama_model":stt_ollama_model,"ocr_ollama_model":ocr_ollama_model, + "cpu_threads":_int(cpu_threads,0),"stt_timeout":_int(stt_timeout,0), + "ollama_timeout":_int(ollama_timeout,600),"subtitle_timeout":_int(subtitle_timeout,600), + "openrouter_url":openrouter_url.strip() or "https://openrouter.ai/api/v1", + "openrouter_api_key":_keep(openrouter_api_key,"openrouter_api_key",current), + "openrouter_stt_model":openrouter_stt_model,"openrouter_ocr_model":openrouter_ocr_model, + "groq_api_key":_keep(groq_api_key,"groq_api_key",current), + "openai_api_key":_keep(openai_api_key,"openai_api_key",current), + "default_stt_engine":default_stt_engine or "local", + } + _save_settings(data) + result={k:v for k,v in data.items() if not k.endswith("_api_key")} + for f in ("openrouter_api_key","groq_api_key","openai_api_key"): + result[f+"_masked"]=_mask(data.get(f,"")) + return {"ok":True,"settings":result} @app.get("/api/admin/users") def admin_list_users(user:dict=Depends(require_admin)): return {"users":list_users()} @app.post("/api/admin/users") def admin_create_user(username:str=Form(...),password:str=Form(...), - perm_stt:str=Form("false"),perm_ocr:str=Form("false"), - allowed_stt_models:str=Form(""),allowed_ocr_models:str=Form(""),user:dict=Depends(require_admin)): + perm_stt:str=Form("false"),perm_ocr:str=Form("false"),perm_subtitle:str=Form("false"), + allowed_stt_models:str=Form(""),allowed_ocr_models:str=Form(""), + user:dict=Depends(require_admin)): def _p(s): return [m.strip() for m in s.split(",") if m.strip()] perms={"stt":perm_stt.lower()=="true","ocr":perm_ocr.lower()=="true", + "subtitle":perm_subtitle.lower()=="true", "allowed_stt_models":_p(allowed_stt_models),"allowed_ocr_models":_p(allowed_ocr_models)} ok,msg=create_user(username,password,perms) if not ok: raise HTTPException(400,msg) return {"ok":True,"message":msg} @app.put("/api/admin/users/{username}") -def admin_update_user(username:str,perm_stt:str=Form("false"),perm_ocr:str=Form("false"), - password:str=Form(""),allowed_stt_models:str=Form(""),allowed_ocr_models:str=Form(""),user:dict=Depends(require_admin)): +def admin_update_user(username:str, + perm_stt:str=Form("false"),perm_ocr:str=Form("false"),perm_subtitle:str=Form("false"), + password:str=Form(""),allowed_stt_models:str=Form(""),allowed_ocr_models:str=Form(""), + user:dict=Depends(require_admin)): def _p(s): return [m.strip() for m in s.split(",") if m.strip()] perms={"stt":perm_stt.lower()=="true","ocr":perm_ocr.lower()=="true", + "subtitle":perm_subtitle.lower()=="true", "allowed_stt_models":_p(allowed_stt_models),"allowed_ocr_models":_p(allowed_ocr_models)} ok,msg=update_user(username,perms,password or None) if not ok: raise HTTPException(400,msg) @@ -469,6 +549,17 @@ def admin_delete_user(username:str,user:dict=Depends(require_admin)): @app.post("/api/cleanup") def cleanup(user:dict=Depends(require_auth)): return {"removed":_cleanup_outputs()} +@app.get("/") +async def index(): + import pathlib + path=pathlib.Path("static/index.html") + resp=FileResponse(path,media_type="text/html") + resp.headers["Cache-Control"]="no-cache, no-store, must-revalidate" + resp.headers["Pragma"]="no-cache"; resp.headers["Expires"]="0" + return resp + +app.mount("/",StaticFiles(directory="static",html=True),name="static") + # ════════════════════════════════════════════════════════════════ # 유틸 @@ -479,10 +570,10 @@ def _check_size(request:Request): def _cleanup_outputs(): if OUTPUT_KEEP_SECS==0: return 0 - cutoff=time.time()-OUTPUT_KEEP_SECS;removed=0 + cutoff=time.time()-OUTPUT_KEEP_SECS; removed=0 for f in glob.glob(os.path.join(OUTPUT_DIR,"*")): try: - if os.path.getmtime(f)MAX_UPLOAD_BYTES: - await f.close();os.remove(path) + await f.close(); os.remove(path) raise HTTPException(413,f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB") await f.write(chunk) - -app.mount("/", StaticFiles(directory="static", html=True), name="static") diff --git a/app/static/index.html b/app/static/index.html index 778999c..e45e9f1 100644 --- a/app/static/index.html +++ b/app/static/index.html @@ -60,7 +60,6 @@ header h1 span{color:var(--accent)} .nav-tab.admin-tab.active{color:var(--orange);border-bottom-color:var(--orange)} .nav-tab.settings-tab.active{color:var(--blue);border-bottom-color:var(--blue)} .nav-tab.history-tab.active{color:var(--purple);border-bottom-color:var(--purple)} -.nav-tab.subtitle-tab.active{color:var(--blue);border-bottom-color:var(--blue)} /* ── PAGE ── */ .page{display:none;flex:1;flex-direction:column} @@ -252,68 +251,61 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border .ollama-status{font-family:var(--mono);font-size:.63rem;padding:4px 9px;border-radius:2px} .ollama-status.ok{background:rgba(0,229,160,.1);color:var(--accent);border:1px solid rgba(0,229,160,.2)} .ollama-status.fail{background:rgba(255,107,53,.1);color:var(--warn);border:1px solid rgba(255,107,53,.2)} -/* ── 자막 탭 ── */ -#page-subtitle{display:none;flex-direction:column} -#page-subtitle.active{display:flex} -.subtitle-wrap{max-width:860px;margin:0 auto;padding:28px 16px;width:100%} -.step-indicator{display:flex;align-items:center;gap:0;margin-bottom:28px} -.step-dot{width:32px;height:32px;border-radius:50%;border:2px solid var(--border2);background:var(--surf);display:flex;align-items:center;justify-content:center;font-family:var(--mono);font-size:.72rem;font-weight:600;color:var(--muted);transition:all .3s;flex-shrink:0} -.step-dot.active{border-color:var(--blue);background:rgba(77,166,255,.1);color:var(--blue)} -.step-dot.done{border-color:var(--accent);background:rgba(0,229,160,.1);color:var(--accent)} -.step-line{flex:1;height:2px;background:var(--border);transition:background .3s} -.step-line.done{background:var(--accent)} -.step-labels{display:flex;justify-content:space-between;margin-top:6px;margin-bottom:20px} -.step-label{font-family:var(--mono);font-size:.6rem;color:var(--muted);text-align:center;flex:1;letter-spacing:.06em;text-transform:uppercase} -.step-label.active{color:var(--blue)}.step-label.done{color:var(--accent)} -.sub-card{background:var(--surf);border:1px solid var(--border2);border-radius:6px;padding:20px;margin-bottom:14px} -.sub-card h3{font-family:var(--mono);font-size:.68rem;letter-spacing:.1em;color:var(--muted);text-transform:uppercase;margin-bottom:14px;padding-bottom:10px;border-bottom:1px solid var(--border)} -.lang-grid{display:grid;grid-template-columns:1fr 1fr;gap:10px} -.sub-select{width:100%;background:var(--surf2);border:1px solid var(--border2);color:var(--text);padding:9px 10px;border-radius:3px;font-family:var(--mono);font-size:.78rem;outline:none;cursor:pointer;appearance:none;-webkit-appearance:none;background-image:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='10' height='6'%3E%3Cpath d='M0 0l5 6 5-6z' fill='%2352526a'/%3E%3C/svg%3E");background-repeat:no-repeat;background-position:right 10px center} -.sub-select:focus{border-color:var(--blue)} -.fmt-row{display:grid;grid-template-columns:1fr 1fr 1fr;gap:8px;margin-top:6px} -.fmt-btn{padding:9px;background:var(--surf);border:1px solid var(--border2);color:var(--muted);border-radius:3px;font-family:var(--mono);font-size:.7rem;cursor:pointer;transition:all .15s;text-align:center;text-transform:uppercase} -.fmt-btn.active{background:rgba(77,166,255,.08);border-color:#3a7cc4;color:var(--blue)} -.engine-row{display:grid;grid-template-columns:1fr 1fr;gap:8px;margin-top:6px} -.sub-prog-box{background:var(--surf2);border:1px solid var(--border2);border-radius:6px;padding:18px;margin-bottom:14px;display:none} -.sub-prog-steps{display:flex;flex-direction:column;gap:10px} -.sub-step-row{display:flex;align-items:center;gap:12px} -.sub-step-icon{width:26px;height:26px;border-radius:50%;border:2px solid var(--border2);display:flex;align-items:center;justify-content:center;font-size:.75rem;flex-shrink:0;transition:all .3s} -.sub-step-icon.waiting{border-color:var(--border2);color:var(--muted)} -.sub-step-icon.running{border-color:var(--blue);background:rgba(77,166,255,.1);color:var(--blue)} -.sub-step-icon.done{border-color:var(--accent);background:rgba(0,229,160,.1);color:var(--accent)} -.sub-step-icon.failed{border-color:var(--warn);background:rgba(255,107,53,.1);color:var(--warn)} -.sub-step-text{flex:1} -.sub-step-name{font-family:var(--mono);font-size:.72rem;color:var(--text)} -.sub-step-msg{font-family:var(--mono);font-size:.62rem;color:var(--muted);margin-top:2px} -.sub-prog-bar-wrap{height:3px;background:var(--border);border-radius:2px;overflow:hidden;margin-top:14px} -.sub-prog-bar{height:100%;background:var(--blue);border-radius:2px;transition:width .5s ease;width:0%} -.sub-result-card{background:var(--surf);border:1px solid rgba(0,229,160,.2);border-radius:6px;padding:20px;display:none} -.sub-result-title{font-family:var(--mono);font-size:.72rem;letter-spacing:.1em;color:var(--accent);text-transform:uppercase;margin-bottom:14px} -.sub-info-grid{display:grid;grid-template-columns:1fr 1fr;gap:8px;margin-bottom:14px} -.sub-info-item{background:var(--surf2);border:1px solid var(--border);border-radius:3px;padding:10px 12px} -.sub-info-label{font-family:var(--mono);font-size:.58rem;color:var(--muted);letter-spacing:.08em;text-transform:uppercase;margin-bottom:3px} -.sub-info-val{font-family:var(--mono);font-size:.8rem;color:var(--text);font-weight:600} -.sub-dl-grid{display:grid;grid-template-columns:1fr 1fr;gap:8px} -.sub-dl-btn{padding:11px;background:none;border:1px solid var(--border2);color:var(--text);border-radius:4px;font-family:var(--mono);font-size:.7rem;cursor:pointer;transition:all .15s;text-align:center;display:flex;flex-direction:column;align-items:center;gap:4px} -.sub-dl-btn:hover{border-color:var(--accent);color:var(--accent);background:rgba(0,229,160,.05)} -.sub-dl-btn .dl-icon{font-size:1.2rem;opacity:.6} -.sub-dl-btn .dl-label{font-weight:600}.sub-dl-btn .dl-lang{font-size:.58rem;color:var(--muted)} -.sub-dl-btn.trans{border-color:#3a7cc4;color:var(--blue)} -.sub-dl-btn.trans:hover{background:rgba(77,166,255,.07)} -/* 배치 큐 공통 */ -.batch-queue{margin-top:14px;display:flex;flex-direction:column;gap:6px;max-height:260px;overflow-y:auto} -.batch-item{display:grid;grid-template-columns:1fr auto auto;align-items:center;gap:8px;padding:9px 12px;background:var(--surf);border:1px solid var(--border2);border-radius:4px;transition:border-color .2s} -.batch-item.running{border-color:var(--accent2)}.batch-item.done{border-color:rgba(0,229,160,.3)}.batch-item.failed{border-color:rgba(255,107,53,.3)}.batch-item.waiting{opacity:.6} +/* ── 활성 작업 배너 ── */ +#active-tasks-banner{background:rgba(77,166,255,.08);border-bottom:1px solid rgba(77,166,255,.2);padding:8px 20px;display:none;font-family:var(--mono);font-size:.68rem;color:var(--blue);flex-wrap:wrap;gap:6px;align-items:center} +/* ── 배치 큐 ── */ +.batch-queue{margin-top:12px;display:flex;flex-direction:column;gap:5px;max-height:260px;overflow-y:auto} +.batch-item{display:grid;grid-template-columns:1fr auto auto;align-items:center;gap:8px;padding:8px 12px;background:var(--surf);border:1px solid var(--border2);border-radius:4px} +.batch-item.running{border-color:var(--accent2)}.batch-item.done{border-color:rgba(0,229,160,.3)}.batch-item.failed{border-color:rgba(255,107,53,.3)}.batch-item.cancelled{border-color:rgba(255,107,53,.2);opacity:.6} .bi-name{font-family:var(--mono);font-size:.72rem;overflow:hidden;text-overflow:ellipsis;white-space:nowrap} .bi-status{font-family:var(--mono);font-size:.6rem;padding:3px 7px;border-radius:2px;white-space:nowrap} -.bi-status.waiting{background:rgba(255,255,255,.04);color:var(--muted);border:1px solid var(--border)}.bi-status.running{background:rgba(0,229,160,.07);color:var(--accent);border:1px solid rgba(0,229,160,.2)}.bi-status.done{background:rgba(0,229,160,.07);color:var(--accent2);border:1px solid rgba(0,229,160,.2)}.bi-status.failed{background:rgba(255,107,53,.07);color:var(--warn);border:1px solid rgba(255,107,53,.2)} -.bi-dl{font-family:var(--mono);font-size:.6rem;padding:3px 8px;border:1px solid var(--border2);background:none;color:var(--text);border-radius:2px;cursor:pointer;white-space:nowrap}.bi-dl:hover{border-color:var(--accent);color:var(--accent)} +.bi-status.waiting{background:rgba(255,255,255,.04);color:var(--muted);border:1px solid var(--border)}.bi-status.running{background:rgba(0,229,160,.07);color:var(--accent);border:1px solid rgba(0,229,160,.2)}.bi-status.done{background:rgba(0,229,160,.07);color:var(--accent2);border:1px solid rgba(0,229,160,.2)}.bi-status.failed,.bi-status.cancelled{background:rgba(255,107,53,.07);color:var(--warn);border:1px solid rgba(255,107,53,.2)} +.bi-dl{font-family:var(--mono);font-size:.6rem;padding:3px 8px;border:1px solid var(--border2);background:none;color:var(--text);border-radius:2px;cursor:pointer}.bi-dl:hover{border-color:var(--accent);color:var(--accent)} .bi-prog{height:2px;background:var(--accent);border-radius:1px;transition:width .4s;margin-top:3px} -.batch-summary{font-family:var(--mono);font-size:.68rem;color:var(--muted);margin-top:8px;display:flex;gap:12px;flex-wrap:wrap}.batch-summary span{color:var(--text)} -.batch-add-btn{margin-top:8px;padding:7px 14px;background:none;border:1px dashed var(--border2);color:var(--muted);border-radius:3px;font-family:var(--mono);font-size:.68rem;cursor:pointer;width:100%;transition:all .15s}.batch-add-btn:hover{border-color:var(--accent);color:var(--accent)} -.batch-clear-btn{padding:7px 14px;background:none;border:1px solid var(--border2);color:var(--muted);border-radius:3px;font-family:var(--mono);font-size:.68rem;cursor:pointer;transition:all .15s}.batch-clear-btn:hover{border-color:var(--warn);color:var(--warn)} -.batch-actions{display:flex;gap:8px;margin-top:10px} -@media(min-width:768px){.subtitle-wrap{padding:32px}.lang-grid{grid-template-columns:1fr 1fr}.sub-info-grid{grid-template-columns:1fr 1fr 1fr 1fr}.sub-dl-grid{grid-template-columns:repeat(4,1fr)}} +.batch-summary{font-family:var(--mono);font-size:.66rem;color:var(--muted);margin-top:6px}.batch-summary span{color:var(--text)} +.batch-actions{display:flex;gap:8px;margin-top:8px} +.batch-add-btn{flex:1;padding:7px;background:none;border:1px dashed var(--border2);color:var(--muted);border-radius:3px;font-family:var(--mono);font-size:.68rem;cursor:pointer;transition:all .15s}.batch-add-btn:hover{border-color:var(--accent);color:var(--accent)} +.batch-clear-btn{padding:7px 12px;background:none;border:1px solid var(--border2);color:var(--muted);border-radius:3px;font-family:var(--mono);font-size:.68rem;cursor:pointer}.batch-clear-btn:hover{border-color:var(--warn);color:var(--warn)} +/* ── 자막 페이지 ── */ +#page-subtitle{display:none;flex-direction:column}#page-subtitle.active{display:flex} +.subtitle-wrap{max-width:860px;margin:0 auto;padding:28px 16px;width:100%} +.sub-card{background:var(--surf);border:1px solid var(--border2);border-radius:6px;padding:20px;margin-bottom:14px} +.sub-card h3{font-family:var(--mono);font-size:.68rem;letter-spacing:.1em;color:var(--muted);text-transform:uppercase;margin-bottom:14px;padding-bottom:10px;border-bottom:1px solid var(--border)} +.sub-select{width:100%;background:var(--surf2);border:1px solid var(--border2);color:var(--text);padding:9px 10px;border-radius:3px;font-family:var(--mono);font-size:.78rem;outline:none;cursor:pointer;appearance:none;-webkit-appearance:none;background-image:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='10' height='6'%3E%3Cpath d='M0 0l5 6 5-6z' fill='%2352526a'/%3E%3C/svg%3E");background-repeat:no-repeat;background-position:right 10px center;margin-top:4px} +.sub-select:focus{border-color:var(--blue)} +.lang-grid{display:grid;grid-template-columns:1fr 1fr;gap:12px} +.fmt-row{display:flex;gap:8px;margin-top:6px} +.fmt-btn{flex:1;padding:9px;background:var(--surf);border:1px solid var(--border2);color:var(--muted);border-radius:3px;font-family:var(--mono);font-size:.68rem;cursor:pointer;text-align:center;transition:all .15s;text-transform:uppercase} +.fmt-btn.active{background:rgba(77,166,255,.08);border-color:#3a7cc4;color:var(--blue)} +.step-indicator{display:flex;align-items:center;gap:0;margin-bottom:6px} +.step-dot{width:30px;height:30px;border-radius:50%;border:2px solid var(--border2);background:var(--surf);display:flex;align-items:center;justify-content:center;font-family:var(--mono);font-size:.7rem;font-weight:600;color:var(--muted);transition:all .3s;flex-shrink:0} +.step-dot.active{border-color:var(--blue);background:rgba(77,166,255,.1);color:var(--blue)}.step-dot.done{border-color:var(--accent);background:rgba(0,229,160,.1);color:var(--accent)} +.step-line{flex:1;height:2px;background:var(--border);transition:background .3s}.step-line.done{background:var(--accent)} +.step-labels{display:flex;justify-content:space-between;margin-top:5px;margin-bottom:18px} +.step-label{font-family:var(--mono);font-size:.58rem;color:var(--muted);text-align:center;flex:1;letter-spacing:.05em;text-transform:uppercase} +.step-label.active{color:var(--blue)}.step-label.done{color:var(--accent)} +.sub-prog-box{background:var(--surf2);border:1px solid var(--border2);border-radius:6px;padding:18px;margin-bottom:14px;display:none} +.sub-step-row{display:flex;align-items:flex-start;gap:12px;margin-bottom:10px} +.sub-step-icon{width:26px;height:26px;border-radius:50%;border:2px solid var(--border2);display:flex;align-items:center;justify-content:center;font-size:.72rem;flex-shrink:0;transition:all .3s} +.sub-step-icon.waiting{border-color:var(--border2);color:var(--muted)}.sub-step-icon.running{border-color:var(--blue);background:rgba(77,166,255,.1);color:var(--blue)}.sub-step-icon.done{border-color:var(--accent);background:rgba(0,229,160,.1);color:var(--accent)}.sub-step-icon.failed{border-color:var(--warn);background:rgba(255,107,53,.1);color:var(--warn)} +.sub-step-name{font-family:var(--mono);font-size:.72rem;color:var(--text);font-weight:600} +.sub-step-msg{font-family:var(--mono);font-size:.62rem;color:var(--muted);margin-top:3px;word-break:break-all} +.sub-prog-bar-wrap{height:3px;background:var(--border);border-radius:2px;overflow:hidden;margin-top:12px} +.sub-prog-bar{height:100%;background:var(--blue);border-radius:2px;transition:width .5s ease;width:0%} +.sub-result-card{background:var(--surf);border:1px solid rgba(0,229,160,.2);border-radius:6px;padding:20px;display:none} +.sub-info-grid{display:grid;grid-template-columns:repeat(2,1fr);gap:8px;margin-bottom:14px} +.sub-info-item{background:var(--surf2);border:1px solid var(--border);border-radius:3px;padding:10px 12px} +.sub-info-label{font-family:var(--mono);font-size:.58rem;color:var(--muted);text-transform:uppercase;letter-spacing:.08em;margin-bottom:3px} +.sub-info-val{font-family:var(--mono);font-size:.82rem;color:var(--text);font-weight:600} +.sub-dl-grid{display:grid;grid-template-columns:1fr 1fr;gap:8px} +.sub-dl-btn{padding:11px;background:none;border:1px solid var(--border2);color:var(--text);border-radius:4px;font-family:var(--mono);font-size:.68rem;cursor:pointer;transition:all .15s;text-align:center;display:flex;flex-direction:column;align-items:center;gap:4px} +.sub-dl-btn:hover{border-color:var(--accent);color:var(--accent)} +.sub-dl-btn.trans{border-color:#3a7cc4;color:var(--blue)}.sub-dl-btn.trans:hover{background:rgba(77,166,255,.07)} +.btn-cancel{padding:8px 16px;background:rgba(255,107,53,.08);border:1px solid rgba(255,107,53,.3);color:var(--warn);border-radius:3px;font-family:var(--mono);font-size:.7rem;cursor:pointer;transition:all .15s} +.btn-cancel:hover{background:rgba(255,107,53,.15)} +/* 이력 자막 뱃지 */ +.hist-type-badge.subtitle{background:rgba(77,166,255,.1);color:var(--blue);border:1px solid rgba(77,166,255,.2)} +@media(min-width:768px){.sub-info-grid{grid-template-columns:repeat(4,1fr)}.sub-dl-grid{grid-template-columns:repeat(4,1fr)}} /* ── ADMIN ── */ #page-admin{display:none;flex-direction:column} @@ -415,7 +407,7 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border - + @@ -426,9 +418,9 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
파일 업로드
- + 🎵 -
탭하거나 드래그하여 선택
음성·영상 파일 복수 선택 가능
+
탭하거나 드래그하여 선택
음성·영상 복수 선택 가능
mp3 · wav · m4a · ogg · flac · mp4 · webm
@@ -436,19 +428,24 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
- - + +
STT 엔진
+
후처리 모델
+
+
OpenRouter 교정 모델
+ +
처리 중...0%
@@ -491,7 +488,7 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
🖼 -
탭하거나 드래그하여 선택
복수 이미지 동시 선택 가능
+
탭하거나 드래그하여 선택
복수 선택 가능
jpg · png · bmp · tiff · webp · gif
@@ -500,14 +497,15 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
- - + +
OCR 엔진
+
Vision 모델
@@ -570,6 +568,7 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border +
@@ -623,6 +622,11 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
11b 이상 모델은 300+ 권장
+
+ + +
번역·교정 시 응답 대기 시간
+
@@ -640,8 +644,48 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border + +
+

🌐 OpenRouter 외부 AI 연동

+ +
+ + +
+ + + + +
+ + +
+

🔑 STT 엔진 API 키

+ + + + + + +
+
- +
@@ -652,32 +696,26 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border

🎬 자막 생성

- -
-
1
-
-
2
-
+
1
+
2
3
오디오 추출 - 음성 인식 + 음성 인식·교정 번역 (선택)
-

📁 영상 / 오디오 파일

-
- +
+ 🎬 -
탭하거나 드래그하여 선택
mp4 · mkv · avi · mov · h.264/h.265 등
+
탭하거나 드래그하여 선택
mp4 · mkv · h.264/h.265 · mp3 · wav 등
@@ -688,27 +726,16 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
음성 언어 (원어)
@@ -720,89 +747,86 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
+
+
STT 엔진
+
+ + + +
+
-

🌐 번역 설정 (선택사항)

-
- 빈칸으로 두면 원어 자막만 생성합니다 +

🧠 Step 2 — LLM 교정 (선택)

+ + +
+ +
+

🌐 Step 3 — 번역 (선택)

+
비워두면 원어 자막만 생성됩니다
번역 대상 언어
- +
+ + +
-
-
-
-
-
-
Step 1 — 오디오 추출
-
ffmpeg으로 오디오 트랙 추출
-
-
-
-
-
-
Step 2 — 음성 인식
-
Whisper로 자막 생성
-
-
-
-
-
-
Step 3 — 번역 (선택)
-
LLM 번역 (미선택 시 건너뜀)
-
-
-
+
Step 1 — 오디오 추출
ffmpeg 오디오 추출 대기
+
Step 2 — 음성 인식 · LLM 교정
Whisper 인식 대기
+
Step 3 — 번역
번역 대기
-
-
✓ 자막 생성 완료
+
✓ 자막 생성 완료
감지 언어
재생 시간
@@ -835,6 +859,7 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
+
-
⚙️ 설정
- ${isStt?` -
Whisper${esc(set.model||'—')}
-
언어${esc(set.language||'—')}
-
연산${esc(set.compute_type||'—')}
-
CPU${set.cpu_threads===0?'자동':set.cpu_threads+'스레드'}
-
STT 타임아웃${set.stt_timeout===0?'무제한':set.stt_timeout+'초'}
-
Ollama 교정${set.use_ollama?(set.ollama_model||'기본모델'):'없음'}
- `:` -
엔진${esc(set.backend||'—')}
-
모드${esc(set.mode||'—')}
-
언어${esc(set.ocr_lang||'—')}
- ${set.backend==='ollama'?`
모델${esc(set.ollama_model||'—')}
타임아웃${set.ollama_timeout||600}초
`:''} - ${set.custom_prompt?`
프롬프트${esc(set.custom_prompt)}
`:''} - `} -
-
📤 결과
- ${h.status==='failed'?`
오류${esc(out.error||'알 수 없음')}
`:''} - ${h.status==='processing'?`
상태처리 중...
`:''} - ${h.status==='success'&&isStt?` -
언어${esc(out.language||'—')}
-
재생시간${fmtDur(out.duration_s)}
-
세그먼트${out.segments||0}개
- ${out.ollama_used?`
Ollama${esc(out.ollama_model||'')}
`:''} - `:''} - ${h.status==='success'&&!isStt?` -
줄 수${out.line_count||0}줄
-
${out.table_count||0}개
- ${out.ollama_model?`
모델${esc(out.ollama_model)}
`:''} - `:''} -
- ${h.status==='success'&&out.text_preview?`
📄 미리보기
${esc(out.text_preview)}
`:''} - ${h.status==='success'?`
- ${isStt&&out.filename?``:''} - ${!isStt&&out.txt_file?``:''} - ${!isStt&&out.xlsx_file?``:''} -
`:''} +
⚙️ 설정
${settingsHtml}
+
📤 결과
${resultHtml}
+ ${previewHtml}${dlHtml}
`; list.appendChild(card); }); @@ -1473,6 +1346,199 @@ function renderPagination(){ const info=document.createElement('span');info.style.cssText='font-family:var(--mono);font-size:.63rem;color:var(--muted);padding:0 8px';info.textContent=`${histTotal}건`;pg.appendChild(info); } +// ══ 자막 ══ +const subDrop=document.getElementById('sub-drop'),subInput=document.getElementById('sub-input'); +let subFile=null, subTaskId=null, subFmt='srt', subTransVia='ollama', subRefineVia='ollama', subSttEng='local'; + +subInput.addEventListener('change',()=>setSubFile(subInput.files[0])); +subDrop.addEventListener('dragover',e=>{e.preventDefault();subDrop.classList.add('dragover')}); +subDrop.addEventListener('dragleave',()=>subDrop.classList.remove('dragover')); +subDrop.addEventListener('drop',e=>{e.preventDefault();subDrop.classList.remove('dragover');setSubFile(e.dataTransfer.files[0])}); + +function setSubFile(f){ + if(!f)return; subFile=f; + document.getElementById('sub-info').style.display='block'; + document.getElementById('sub-fname').textContent=f.name; + document.getElementById('sub-fsize').textContent=fmtBytes(f.size); + document.getElementById('sub-btn').disabled=false; + document.getElementById('sub-err').style.display='none'; +} + +// 포맷 버튼 +document.querySelectorAll('#page-subtitle .fmt-btn[data-fmt]').forEach(btn=>{ + btn.addEventListener('click',()=>{document.querySelectorAll('#page-subtitle .fmt-btn[data-fmt]').forEach(b=>b.classList.remove('active'));btn.classList.add('active');subFmt=btn.dataset.fmt;}); +}); + +// STT 엔진 버튼 +document.querySelectorAll('button[data-stt-eng]').forEach(btn=>{ + btn.addEventListener('click',()=>{document.querySelectorAll('button[data-stt-eng]').forEach(b=>b.classList.remove('active'));btn.classList.add('active');subSttEng=btn.dataset.sttEng;}); +}); + +// 교정 엔진 버튼 +document.querySelectorAll('button[data-refine-via]').forEach(btn=>{ + btn.addEventListener('click',()=>{document.querySelectorAll('button[data-refine-via]').forEach(b=>b.classList.remove('active'));btn.classList.add('active');subRefineVia=btn.dataset.refineVia;fillSubModels();}); +}); + +// 번역 엔진 버튼 +document.querySelectorAll('button[data-trans-via]').forEach(btn=>{ + btn.addEventListener('click',()=>{document.querySelectorAll('button[data-trans-via]').forEach(b=>b.classList.remove('active'));btn.classList.add('active');subTransVia=btn.dataset.transVia;fillSubModels();}); +}); + +document.getElementById('sub-refine-enable')?.addEventListener('change',function(){ + document.getElementById('sub-refine-opts').style.display=this.checked?'block':'none'; + if(this.checked)fillSubModels(); +}); + +document.getElementById('sub-trans-lang')?.addEventListener('change',function(){ + document.getElementById('sub-trans-engine-wrap').style.display=this.value?'block':'none'; + if(this.value)fillSubModels(); +}); + +function fillSubModels(){ + const refSel=document.getElementById('sub-refine-model'); + const transSel=document.getElementById('sub-trans-model'); + const refList=subRefineVia==='openrouter'?orModels:ollamaModels; + const transList=subTransVia==='openrouter'?orModels:ollamaModels; + const _fill=(sel,list,def)=>{if(!sel)return;const cur=sel.value||def||'';sel.innerHTML='';list.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;sel.appendChild(o);});}; + _fill(refSel,refList,appSettings.stt_ollama_model); + _fill(transSel,transList,appSettings.stt_ollama_model); +} + +function setSubStep(step,status){ + const icon=document.getElementById('sub-sicon-'+step);if(!icon)return; + icon.className='sub-step-icon '+status; + icon.textContent={waiting:'⏳',running:'⚡',done:'✓',failed:'✗'}[status]||'⏳'; + const dot=document.getElementById('sdot-'+step);if(dot)dot.className='step-dot '+(status==='done'?'done':status==='running'?'active':''); + if(step>1){const ln=document.getElementById('sline-'+(step-1));if(ln)ln.className='step-line '+(status!=='waiting'?'done':'');} + const lbl=document.getElementById('slabel-'+step);if(lbl)lbl.className='step-label '+(status==='done'?'done':status==='running'?'active':''); + if(status!=='waiting'){const ln=document.getElementById('sline-'+step);if(ln)ln.className='step-line '+(status==='done'?'done':'');} +} + +document.getElementById('sub-btn').addEventListener('click',async()=>{ + if(!subFile)return; + const transLang=document.getElementById('sub-trans-lang').value; + const useRefine=document.getElementById('sub-refine-enable').checked; + const fd=new FormData(); + fd.append('file',subFile); + fd.append('src_language',document.getElementById('sub-src-lang').value||''); + fd.append('subtitle_fmt',subFmt); + fd.append('stt_engine',subSttEng); + fd.append('refine_model',useRefine?(document.getElementById('sub-refine-model')?.value||''):''); + fd.append('refine_via',subRefineVia); + fd.append('translate_to',transLang); + fd.append('trans_model',transLang?(document.getElementById('sub-trans-model')?.value||''):''); + fd.append('trans_via',subTransVia); + + document.getElementById('sub-btn').disabled=true; + document.getElementById('sub-cancel-btn').style.display='block'; + document.getElementById('sub-err').style.display='none'; + document.getElementById('sub-prog-box').style.display='block'; + document.getElementById('sub-result-card').style.display='none'; + document.getElementById('sub-prog-bar').style.width='0%'; + [1,2,3].forEach(s=>setSubStep(s,'waiting')); setSubStep(1,'running'); + + try{ + const r=await api('POST','/api/subtitle',fd); const d=await r.json(); + if(!r.ok)throw new Error(d.detail||'업로드 실패'); + subTaskId=d.task_id; + addActiveTask(d.task_id,{type:'자막',filename:subFile.name,startedAt:Date.now()}); + renderActiveTasksBanner(); + pollSubtitle(d.task_id,!!transLang); + }catch(e){ + showErr('sub-err',e.message); + document.getElementById('sub-btn').disabled=false; + document.getElementById('sub-cancel-btn').style.display='none'; + document.getElementById('sub-prog-box').style.display='none'; + } +}); + +document.getElementById('sub-cancel-btn')?.addEventListener('click',async()=>{ + if(!subTaskId||!confirm('자막 생성을 취소하시겠습니까?'))return; + try{ + await api('POST','/api/cancel/'+subTaskId); + removeActiveTask(subTaskId); renderActiveTasksBanner(); + document.getElementById('sub-cancel-btn').style.display='none'; + document.getElementById('sub-btn').disabled=false; + showErr('sub-err','작업이 취소되었습니다'); + document.getElementById('sub-prog-box').style.display='none'; + [1,2,3].forEach(s=>setSubStep(s,'waiting')); + subTaskId=null; + }catch{} +}); + +const LANG_NAMES_MAP={ko:'한국어',en:'English',ja:'日本語',zh:'中文(简体)','zh-tw':'中文(繁體)',fr:'Français',de:'Deutsch',es:'Español',it:'Italiano',pt:'Português',ru:'Русский',ar:'العربية',vi:'Tiếng Việt',th:'ไทย',id:'Bahasa Indonesia',nl:'Nederlands',pl:'Polski',tr:'Türkçe',sv:'Svenska',hi:'हिन्दी'}; +function langName(code){return LANG_NAMES_MAP[code]||code||'알 수 없음'} + +function pollSubtitle(taskId,hasTranslation){ + let prevStep=0; + const t=setInterval(async()=>{ + try{ + const r=await api('GET','/api/status/'+taskId);if(r.status===401){clearInterval(t);showLogin();return} + const d=await r.json(); + if(['progress','success'].includes(d.state)){ + const step=d.step||1; const prog=d.progress||0; + document.getElementById('sub-prog-bar').style.width=prog+'%'; + if(step!==prevStep){ + if(prevStep>0&&prevStepsetSubStep(s,'done')); + document.getElementById('sub-prog-bar').style.width='100%'; + document.getElementById('sub-cancel-btn').style.display='none'; + setTimeout(()=>showSubResult(d),400); + } else if(['failure','cancelled'].includes(d.state)){ + clearInterval(t); removeActiveTask(taskId); renderActiveTasksBanner(); + if(prevStep>0)setSubStep(prevStep,'failed'); + document.getElementById('sub-cancel-btn').style.display='none'; + document.getElementById('sub-btn').disabled=false; + showErr('sub-err',d.message||(d.state==='cancelled'?'취소됨':'자막 생성 실패')); + } + }catch{} + },2000); +} + +function showSubResult(d){ + document.getElementById('sub-prog-box').style.display='none'; + const rc=document.getElementById('sub-result-card');rc.style.display='block'; + document.getElementById('sub-res-lang').textContent=langName(d.detected_language); + document.getElementById('sub-res-dur').textContent=fmtDur(d.duration); + document.getElementById('sub-res-segs').textContent=(d.segment_count||0)+'개'; + document.getElementById('sub-res-trans').textContent=d.translated?langName(d.translate_to):'없음'; + const grid=document.getElementById('sub-dl-grid');grid.innerHTML=''; + const addBtn=(label,lang,file,cls='')=>{ + if(!file)return; + const ext=file.split('.').pop().toUpperCase(); + const btn=document.createElement('button');btn.className='sub-dl-btn '+cls; + btn.innerHTML=`📄${ext} ${label}${langName(lang)}`; + btn.onclick=()=>dlFile(file);grid.appendChild(btn); + }; + addBtn('원어',d.detected_language,d.srt_orig); + addBtn('원어',d.detected_language,d.vtt_orig); + addBtn('번역',d.translate_to,d.srt_trans,'trans'); + addBtn('번역',d.translate_to,d.vtt_trans,'trans'); + document.getElementById('sub-btn').disabled=false; +} + +document.getElementById('sub-new')?.addEventListener('click',()=>{ + subFile=null;subInput.value='';subTaskId=null; + document.getElementById('sub-info').style.display='none'; + document.getElementById('sub-prog-box').style.display='none'; + document.getElementById('sub-result-card').style.display='none'; + document.getElementById('sub-err').style.display='none'; + document.getElementById('sub-cancel-btn').style.display='none'; + document.getElementById('sub-btn').disabled=true; + document.getElementById('sub-prog-bar').style.width='0%'; + document.getElementById('sub-refine-enable').checked=false; + document.getElementById('sub-refine-opts').style.display='none'; + [1,2,3].forEach(s=>setSubStep(s,'waiting')); +}); + // ══ ADMIN ══ // 모델 체크박스 렌더링 헬퍼 @@ -1550,6 +1616,7 @@ document.getElementById('btn-add-user').addEventListener('click', async () => { fd.append('username', u); fd.append('password', p); fd.append('perm_stt', document.getElementById('new-perm-stt').checked ? 'true' : 'false'); fd.append('perm_ocr', document.getElementById('new-perm-ocr').checked ? 'true' : 'false'); + fd.append('perm_subtitle', document.getElementById('new-perm-subtitle')?.checked ? 'true' : 'false'); fd.append('allowed_stt_models', getCheckedModels(document.getElementById('new-stt-model-checks')).join(',')); fd.append('allowed_ocr_models', getCheckedModels(document.getElementById('new-ocr-model-checks')).join(',')); try { @@ -1604,6 +1671,7 @@ document.getElementById('btn-modal-save').addEventListener('click', async () => const fd = new FormData(); fd.append('perm_stt', document.getElementById('edit-perm-stt').checked ? 'true' : 'false'); fd.append('perm_ocr', document.getElementById('edit-perm-ocr').checked ? 'true' : 'false'); + fd.append('perm_subtitle', document.getElementById('edit-perm-subtitle')?.checked ? 'true' : 'false'); fd.append('allowed_stt_models', getCheckedModels(document.getElementById('edit-stt-model-checks')).join(',')); fd.append('allowed_ocr_models', getCheckedModels(document.getElementById('edit-ocr-model-checks')).join(',')); const pw = document.getElementById('edit-password').value; @@ -1639,34 +1707,336 @@ function fmtTime(s){const m=Math.floor(s/60),ss=Math.floor(s%60);return String(m function esc(s){return String(s||'').replace(/&/g,'&').replace(//g,'>')} async function copyText(text,btn){try{await navigator.clipboard.writeText(text);const o=btn.textContent;btn.textContent='복사됨 ✓';setTimeout(()=>btn.textContent=o,1500)}catch{}} -// ══ OPENROUTER ══ -let orModels=[],orVisionModels=[]; -async function loadOrModels(){ - try{const r=await api('GET','/api/openrouter/models');const d=await r.json(); - if(d.connected){orModels=d.models||[];orVisionModels=d.vision_models||[];populateOrSelects();} + +// ══ 상태 ══ + +// ══ 활성 작업 추적 (페이지 닫혀도 유지) ══ +const ACTIVE_TASKS_KEY='vs_active_tasks'; // {taskId: {type, filename, startedAt}} +function saveActiveTasks(tasks){ localStorage.setItem(ACTIVE_TASKS_KEY,JSON.stringify(tasks)); } +function loadActiveTasks(){ try{ return JSON.parse(localStorage.getItem(ACTIVE_TASKS_KEY)||'{}'); } catch{ return {}; } } +function addActiveTask(taskId, meta){ const t=loadActiveTasks(); t[taskId]=meta; saveActiveTasks(t); } +function removeActiveTask(taskId){ const t=loadActiveTasks(); delete t[taskId]; saveActiveTasks(t); } + +// 활성 작업 배너 (헤더 아래) +function renderActiveTasksBanner(){ + let banner=document.getElementById('active-tasks-banner'); + if(!banner){ + banner=document.createElement('div'); + banner.id='active-tasks-banner'; + banner.style.cssText='background:rgba(77,166,255,.08);border-bottom:1px solid rgba(77,166,255,.2);padding:8px 20px;display:none;font-family:var(--mono);font-size:.68rem;color:var(--blue)'; + document.querySelector('.nav-tabs').insertAdjacentElement('beforebegin',banner); + } + const tasks=loadActiveTasks(); + const ids=Object.keys(tasks); + if(!ids.length){ banner.style.display='none'; return; } + banner.style.display='block'; + banner.innerHTML=`⚡ 진행 중인 작업 ${ids.length}개: `+ids.map(id=>{ + const m=tasks[id]; + return ` + ${m.type||'?'}: ${esc(m.filename||'')} + + `; + }).join(''); +} + +async function cancelTaskFromBanner(taskId){ + if(!confirm('이 작업을 취소하시겠습니까?')) return; + try{ + await api('POST','/api/cancel/'+taskId); + removeActiveTask(taskId); + renderActiveTasksBanner(); }catch{} } -function populateOrSelects(){ - const fill=(sel,def,list)=>{if(!sel)return;const cur=sel.value||def||'';sel.innerHTML='';list.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;sel.appendChild(o)})}; - fill(document.getElementById('setting-or-stt-model'),appSettings.openrouter_stt_model,orModels); - fill(document.getElementById('setting-or-ocr-model'),appSettings.openrouter_ocr_model,orVisionModels); - fill(document.getElementById('stt-or-model'),appSettings.openrouter_stt_model,orModels); - fill(document.getElementById('ocr-or-model'),appSettings.openrouter_ocr_model,orVisionModels); - fillSubTransModels(); + +// 시작 시 활성 작업 재폴링 +async function resumeActiveTasks(){ + const tasks=loadActiveTasks(); + const ids=Object.keys(tasks); + if(!ids.length) return; + renderActiveTasksBanner(); + for(const id of ids){ + pollResumed(id); + } } -document.getElementById('btn-or-test')?.addEventListener('click',async()=>{ - const key=document.getElementById('or-api-key').value.trim(),url=document.getElementById('or-url').value.trim()||'https://openrouter.ai/api/v1'; - const result=document.getElementById('or-test-result'); - if(!key){result.style.display='block';result.style.color='var(--warn)';result.textContent='API 키를 입력하세요';return} - result.style.display='block';result.style.color='var(--muted)';result.textContent='연결 중...'; - try{const fd=new FormData();fd.append('api_key',key);fd.append('base_url',url); - const r=await api('POST','/api/openrouter/test',fd);const d=await r.json(); - result.style.color=d.ok?'var(--accent)':'var(--warn)';result.textContent=d.message;if(d.ok)loadOrModels();} - catch{result.style.color='var(--warn)';result.textContent='요청 실패'} + +function pollResumed(taskId){ + const t=setInterval(async()=>{ + try{ + const r=await api('GET','/api/status/'+taskId); + if(r.status===401){ clearInterval(t); return; } + const d=await r.json(); + if(['success','failure','cancelled'].includes(d.state)){ + clearInterval(t); + removeActiveTask(taskId); + renderActiveTasksBanner(); + // 자막 탭이 열려있으면 결과 표시 + if(d.state==='success' && d.srt_orig){ + showSubResult(d); + } + } + }catch{} + },3000); +} + +// ══ API ══ + +// ══ AUTH ══ +async function checkAuth(){ + token=localStorage.getItem('vs_token'); + if(!token){showLogin();return} + try{ + const r=await api('GET','/api/me'); + if(r.ok){ + currentUser=await r.json(); + applyUserUI(); + await Promise.all([loadOllamaModels(),loadSettings()]); + hideLogin(); + startSysMonitor(); + await resumeActiveTasks(); + } else showLogin(); + }catch{showLogin()} +} + +function applyUserUI(){ + document.getElementById('user-name').textContent=currentUser.username; + const b=document.getElementById('user-badge'); + b.textContent=currentUser.role==='admin'?'ADMIN':'USER'; + b.className='user-badge '+currentUser.role; + document.getElementById('admin-tab').style.display=currentUser.role==='admin'?'flex':'none'; + document.getElementById('btn-hist-clear').style.display=currentUser.role==='admin'?'block':'none'; + // 권한에 따라 탭 표시 + const perms=currentUser.permissions||{}; + document.querySelectorAll('.nav-tab').forEach(t=>{ + const p=t.dataset.page; + if(p==='stt') { t.style.opacity=perms.stt?'1':'0.35'; t.style.pointerEvents=perms.stt?'':'none'; } + if(p==='ocr') { t.style.opacity=perms.ocr?'1':'0.35'; t.style.pointerEvents=perms.ocr?'':'none'; } + if(p==='subtitle'){ t.style.opacity=perms.subtitle?'1':'0.35'; t.style.pointerEvents=perms.subtitle?'':'none'; } + }); +} + +const showLogin=()=>{ document.getElementById('login-overlay').style.display='flex'; stopSysMonitor(); }; +const hideLogin=()=>{ document.getElementById('login-overlay').style.display='none'; }; +document.getElementById('btn-login').addEventListener('click',doLogin); +document.getElementById('inp-pass').addEventListener('keydown',e=>{ if(e.key==='Enter')doLogin(); }); + +async function doLogin(){ + const u=document.getElementById('inp-user').value.trim(), p=document.getElementById('inp-pass').value; + const err=document.getElementById('login-err'); err.style.display='none'; + if(!u||!p){ err.style.display='block'; err.textContent='아이디와 비밀번호를 입력하세요'; return; } + const fd=new FormData(); fd.append('username',u); fd.append('password',p); + try{ + const r=await fetch('/api/login',{method:'POST',body:fd}); + const d=await r.json(); + if(!r.ok){ err.style.display='block'; err.textContent=d.detail||'로그인 실패'; return; } + token=d.access_token; localStorage.setItem('vs_token',token); + await checkAuth(); + }catch{ err.style.display='block'; err.textContent='서버 연결 실패'; } +} + +document.getElementById('btn-logout').addEventListener('click',()=>{ + token=null; currentUser=null; localStorage.removeItem('vs_token'); + showLogin(); document.getElementById('inp-pass').value=''; + document.getElementById('ram-widget').style.display='none'; +}); + +// ══ 시스템 모니터 ══ +function startSysMonitor(){ fetchSysInfo(); sysTimer=setInterval(fetchSysInfo,6000); } +function stopSysMonitor(){ if(sysTimer){ clearInterval(sysTimer); sysTimer=null; } } + +async function fetchSysInfo(){ + try{ + const r=await api('GET','/api/system'); if(!r.ok)return; const d=await r.json(); + const p=d.ram_percent||0; + const bar=document.getElementById('ram-bar'); + bar.style.width=p+'%'; bar.style.background=p>85?'var(--warn)':p>65?'#f0b42a':'var(--accent)'; + document.getElementById('ram-text').textContent=`${d.ram_avail_gb}G여유`; + document.getElementById('cpu-text').textContent=`CPU ${d.cpu_percent}%`; + document.getElementById('ram-widget').style.display='flex'; + updateSC('ram',d.ram_percent,`${d.ram_used_gb}GB / ${d.ram_total_gb}GB`,`여유 ${d.ram_avail_gb}GB`,'var(--accent)'); + updateSC('cpu',d.cpu_percent,`${d.cpu_percent}%`,`물리 ${d.cpu_physical}코어 / 논리 ${d.cpu_logical}스레드`,'var(--blue)'); + const sp=d.swap_total_gb>0?Math.round(d.swap_used_gb/d.swap_total_gb*100):0; + updateSC('swap',sp,`${d.swap_used_gb}GB / ${d.swap_total_gb}GB`,`사용률 ${sp}%`,'var(--orange)'); + const th=d.cpu_threads_setting; + document.getElementById('sys-threads-val').textContent=th===0?`자동 (${d.cpu_logical}스레드)`:`${th} 스레드`; + const sl=document.getElementById('cpu-slider'); if(sl&&sl.max{ + const v=parseInt(cpuSlider.value); + cpuDisplay.textContent=v===0?'0 (자동)':v+' 스레드'; +}); + +// ══ Ollama 모델 ══ +async function loadOllamaModels(){ + try{ + const r=await api('GET','/api/ollama/models'); const d=await r.json(); + ollamaModels=d.models||[]; + const badge=document.getElementById('ollama-status-badge'); + if(badge){ badge.className='ollama-status '+(d.connected?'ok':'fail'); badge.textContent=d.connected?`✓ Ollama(${ollamaModels.length})`:'✗ Ollama 연결실패'; } + populateModelSelects(); + loadSttEngineStatus(); + }catch{} +} + +// ══ OpenRouter 모델 ══ +async function loadOrModels(){ + try{ + const r=await api('GET','/api/openrouter/models'); const d=await r.json(); + if(d.connected){ + orModels=d.models||[]; orVisionModels=d.vision_models||[]; orTextModels=d.text_models||[]; + const wrap=document.getElementById('or-models-wrap'); + if(wrap){ wrap.style.display='block'; + const badge=document.getElementById('or-connected-badge'); + if(badge) badge.textContent=`✓ 연결됨 — Vision ${orVisionModels.length}개 / 전체 ${orModels.length}개`; } + } + populateOrSelects(); + }catch{} +} + +let orFilter='vision'; +document.querySelectorAll('.or-model-tab')?.forEach(btn=>{ + btn.addEventListener('click',()=>{ + document.querySelectorAll('.or-model-tab').forEach(b=>b.classList.remove('active')); + btn.classList.add('active'); orFilter=btn.dataset.filter; populateOrSelects(orFilter); + }); +}); + +function populateOrSelects(filter){ + filter=filter||orFilter; + const allList=filter==='vision'?orVisionModels:filter==='text'?orTextModels:orModels; + const _fill=(sel,def,list)=>{ + if(!sel)return; const cur=sel.value||def||''; + sel.innerHTML=''; + list.forEach(m=>{ const o=document.createElement('option'); o.value=m; o.textContent=m; if(m===cur)o.selected=true; sel.appendChild(o); }); + }; + // 설정 탭 + _fill(document.getElementById('setting-or-stt-model'),appSettings.openrouter_stt_model,orModels); + const ocrSel=document.getElementById('setting-or-ocr-model'); + if(ocrSel){ const cur=ocrSel.value||appSettings.openrouter_ocr_model||''; ocrSel.innerHTML=''; orVisionModels.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;ocrSel.appendChild(o);}); } + // STT 탭 OR 모델 + _fill(document.getElementById('stt-or-model'),appSettings.openrouter_stt_model,orModels); + // OCR 탭 OR 모델 + const ocrPageSel=document.getElementById('ocr-or-model'); + if(ocrPageSel){ const cur=ocrPageSel.value||appSettings.openrouter_ocr_model||''; ocrPageSel.innerHTML=''; orVisionModels.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;ocrPageSel.appendChild(o);}); } + // 자막 탭 OR 모델 (번역/교정) + _fill(document.getElementById('sub-trans-model'),appSettings.openrouter_stt_model,orModels); + _fill(document.getElementById('sub-refine-model'),appSettings.openrouter_stt_model,orModels); +} + +function populateModelSelects(){ + const fill=(sel,def,ph)=>{ if(!sel)return; const cur=sel.value||def||''; sel.innerHTML=``; ollamaModels.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;sel.appendChild(o);}); }; + fill(document.getElementById('stt-ollama-model'),appSettings.stt_ollama_model,'설정 기본 모델 사용'); + fill(document.getElementById('ocr-ollama-model'),appSettings.ocr_ollama_model,'설정 기본 모델 사용'); + fill(document.getElementById('setting-stt-model'),appSettings.stt_ollama_model,'(없음)'); + fill(document.getElementById('setting-ocr-model'),appSettings.ocr_ollama_model,'(없음)'); + fill(document.getElementById('sub-trans-model-ollama'),appSettings.stt_ollama_model,'설정 기본 모델 사용'); + fill(document.getElementById('sub-refine-model-ollama'),appSettings.stt_ollama_model,'설정 기본 모델 사용'); + populateOrSelects(); +} + +async function loadSttEngineStatus(){ + try{ + const r=await api('GET','/api/stt-engines'); if(!r.ok)return; + const d=await r.json(); + sttEngineStatus={local:true,groq:d.groq?.key_set||false,openai:d.openai?.key_set||false}; + }catch{} +} + +// ══ 설정 로드 ══ +async function loadSettings(){ + try{ + const r=await api('GET','/api/settings'); appSettings=await r.json(); + const th=appSettings.cpu_threads||0; + if(cpuSlider){ cpuSlider.value=th; if(cpuDisplay)cpuDisplay.textContent=th===0?'0 (자동)':th+' 스레드'; } + const _set=(id,v)=>{ const el=document.getElementById(id); if(el)el.value=v; }; + _set('stt-timeout', appSettings.stt_timeout||0); + _set('ollama-timeout',appSettings.ollama_timeout||600); + _set('subtitle-timeout',appSettings.subtitle_timeout||600); + if(appSettings.openrouter_url){ _set('or-url',appSettings.openrouter_url); } + if(appSettings.openrouter_api_key_masked){ const el=document.getElementById('or-api-key'); if(el)el.placeholder='저장됨: '+appSettings.openrouter_api_key_masked; } + if(appSettings.groq_api_key_masked){ const el=document.getElementById('groq-api-key'); if(el)el.placeholder='저장됨: '+appSettings.groq_api_key_masked; } + if(appSettings.openai_api_key_masked){ const el=document.getElementById('openai-api-key'); if(el)el.placeholder='저장됨: '+appSettings.openai_api_key_masked; } + const defEng=document.getElementById('default-stt-engine'); if(defEng)defEng.value=appSettings.default_stt_engine||'local'; + populateModelSelects(); + if(appSettings.openrouter_api_key_masked) loadOrModels(); + loadSttEngineStatus(); + }catch{} +} + +// ══ 설정 저장 ══ +document.getElementById('btn-save-settings')?.addEventListener('click',async()=>{ + const fd=new FormData(); + const _get=(id,def='')=>document.getElementById(id)?.value||def; + fd.append('stt_ollama_model', _get('setting-stt-model')); + fd.append('ocr_ollama_model', _get('setting-ocr-model')); + fd.append('cpu_threads', cpuSlider?.value||'0'); + fd.append('stt_timeout', _get('stt-timeout','0')); + fd.append('ollama_timeout', _get('ollama-timeout','600')); + fd.append('subtitle_timeout', _get('subtitle-timeout','600')); + fd.append('openrouter_url', _get('or-url','https://openrouter.ai/api/v1')); + fd.append('openrouter_stt_model',_get('setting-or-stt-model')); + fd.append('openrouter_ocr_model',_get('setting-or-ocr-model')); + fd.append('default_stt_engine', _get('default-stt-engine','local')); + const orKey=document.getElementById('or-api-key')?.value?.trim(); if(orKey) fd.append('openrouter_api_key', orKey); + const groqKey=document.getElementById('groq-api-key')?.value?.trim(); if(groqKey) fd.append('groq_api_key', groqKey); + const oaKey=document.getElementById('openai-api-key')?.value?.trim(); if(oaKey) fd.append('openai_api_key', oaKey); + try{ + const r=await api('POST','/api/settings',fd); const d=await r.json(); + if(r.ok){ + appSettings=d.settings||appSettings; + ['or-api-key','groq-api-key','openai-api-key'].forEach(id=>{ const el=document.getElementById(id); if(el)el.value=''; }); + await loadSttEngineStatus(); + if(orKey||appSettings.openrouter_api_key_masked) loadOrModels(); + const msg=document.getElementById('settings-msg'); if(msg){ msg.style.display='block'; setTimeout(()=>msg.style.display='none',3500); } + } + }catch{} +}); + +document.getElementById('btn-refresh-models')?.addEventListener('click',()=>{ loadOllamaModels(); loadOrModels(); }); +document.getElementById('btn-or-test')?.addEventListener('click',async()=>{ + const key=document.getElementById('or-api-key')?.value?.trim(); + const url=document.getElementById('or-url')?.value?.trim()||'https://openrouter.ai/api/v1'; + const result=document.getElementById('or-test-result'); + if(!key){ if(result){result.style.display='block';result.style.color='var(--warn)';result.textContent='API 키를 입력하세요';} return; } + if(result){result.style.display='block';result.style.color='var(--muted)';result.textContent='연결 중...';} + try{ + const fd=new FormData(); fd.append('api_key',key); fd.append('base_url',url); + const r=await api('POST','/api/openrouter/test',fd); const d=await r.json(); + if(result){result.style.color=d.ok?'var(--accent)':'var(--warn)';result.textContent=d.message;} + if(d.ok) loadOrModels(); + }catch{ if(result){result.style.color='var(--warn)';result.textContent='요청 실패';} } +}); + +// ══ NAV ══ +document.querySelectorAll('.nav-tab').forEach(btn=>{ + btn.addEventListener('click',()=>{ + document.querySelectorAll('.nav-tab').forEach(b=>b.classList.remove('active')); + document.querySelectorAll('.page').forEach(p=>p.classList.remove('active')); + btn.classList.add('active'); + const p=document.getElementById('page-'+btn.dataset.page); if(p)p.classList.add('active'); + if(btn.dataset.page==='admin') loadUsers(); + if(btn.dataset.page==='settings'){ loadSettings(); fetchSysInfo(); } + if(btn.dataset.page==='history'){ histPage=1; loadHistory(); } + if(btn.dataset.page==='subtitle') fillSubModels(); + }); }); -document.getElementById('btn-refresh-models')?.addEventListener('click',()=>{loadOllamaModels();loadOrModels()}); checkAuth(); + diff --git a/app/tasks.py b/app/tasks.py index 38114af..44eb07c 100644 --- a/app/tasks.py +++ b/app/tasks.py @@ -1,10 +1,5 @@ """ STT + Subtitle Pipeline Celery Tasks - -subtitle_pipeline_task: - Step 1: ffmpeg → 16kHz WAV 추출 - Step 2: Whisper → 원어 SRT / VTT 생성 - Step 3: LLM → 번역 SRT / VTT 생성 (선택) """ import os, json, subprocess, tempfile import httpx @@ -21,6 +16,8 @@ INITIAL_PROMPT = os.getenv("WHISPER_INITIAL_PROMPT", "") or None OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs") OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434") OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "600")) +GROQ_BASE = "https://api.groq.com/openai/v1" +OPENAI_BASE = "https://api.openai.com/v1" _cpu_threads_env = int(os.getenv("CPU_THREADS", "0")) CPU_THREADS = _cpu_threads_env if _cpu_threads_env > 0 else None @@ -28,7 +25,7 @@ CPU_THREADS = _cpu_threads_env if _cpu_threads_env > 0 else None celery_app = Celery("whisper_tasks", broker=REDIS_URL, backend=REDIS_URL) celery_app.conf.update( task_serializer="json", result_serializer="json", - accept_content=["json"], task_track_started=True, result_expires=3600, + accept_content=["json"], task_track_started=True, result_expires=86400, ) _whisper_model = None @@ -39,14 +36,14 @@ def get_model(): from faster_whisper import WhisperModel kwargs = dict(device=DEVICE, compute_type=COMPUTE_TYPE) if CPU_THREADS is not None: kwargs["cpu_threads"] = CPU_THREADS - print(f"[Whisper] 로딩: {MODEL_SIZE}/{DEVICE}/{COMPUTE_TYPE}/threads={CPU_THREADS or 'auto'}") + print(f"[Whisper] 로딩: {MODEL_SIZE}/{DEVICE}/{COMPUTE_TYPE}") _whisper_model = WhisperModel(MODEL_SIZE, **kwargs) - print("[Whisper] 로드 완료") + print("[Whisper] 완료") return _whisper_model # ══════════════════════════════════════════════════════════════ -# 언어 코드 → 표시명 +# 공통 유틸 # ══════════════════════════════════════════════════════════════ LANG_NAMES = { "ko":"한국어","en":"English","ja":"日本語","zh":"中文(简体)", @@ -54,361 +51,404 @@ LANG_NAMES = { "it":"Italiano","pt":"Português","ru":"Русский","ar":"العربية", "vi":"Tiếng Việt","th":"ไทย","id":"Bahasa Indonesia", "nl":"Nederlands","pl":"Polski","tr":"Türkçe","sv":"Svenska", - "uk":"Українська","hi":"हिन्दी","bn":"বাংলা", + "uk":"Українська","hi":"हिन्दी", } def _lang_name(code): return LANG_NAMES.get(code, code) - -# ══════════════════════════════════════════════════════════════ -# 자막 포맷 생성 -# ══════════════════════════════════════════════════════════════ -def _srt_time(s: float) -> str: - ms = int(round(s * 1000)) - h, r = divmod(ms, 3600000); m, r = divmod(r, 60000); sec, ms = divmod(r, 1000) +def _srt_time(s): + ms=int(round(s*1000)); h,r=divmod(ms,3600000); m,r=divmod(r,60000); sec,ms=divmod(r,1000) return f"{h:02d}:{m:02d}:{sec:02d},{ms:03d}" -def _vtt_time(s: float) -> str: - return _srt_time(s).replace(",", ".") +def _vtt_time(s): return _srt_time(s).replace(",",".") -def make_srt(segments: list) -> str: - out = [] - for i, seg in enumerate(segments, 1): - out += [str(i), f"{_srt_time(seg['start'])} --> {_srt_time(seg['end'])}", seg["text"].strip(), ""] +def make_srt(segments): + out=[] + for i,seg in enumerate(segments,1): + out+=[str(i),f"{_srt_time(seg['start'])} --> {_srt_time(seg['end'])}",seg["text"].strip(),""] return "\n".join(out) -def make_vtt(segments: list) -> str: - out = ["WEBVTT", ""] - for i, seg in enumerate(segments, 1): - out += [str(i), f"{_vtt_time(seg['start'])} --> {_vtt_time(seg['end'])}", seg["text"].strip(), ""] +def make_vtt(segments): + out=["WEBVTT",""] + for i,seg in enumerate(segments,1): + out+=[str(i),f"{_vtt_time(seg['start'])} --> {_vtt_time(seg['end'])}",seg["text"].strip(),""] return "\n".join(out) +def _llm_call(prompt, model, use_openrouter, openrouter_url, openrouter_key, timeout): + """LLM 호출 — 명확한 에러 메시지 포함""" + if use_openrouter: + if not openrouter_key: + raise Exception("OpenRouter API 키가 설정되지 않았습니다. 설정 → OpenRouter에서 입력하세요.") + try: + resp = httpx.post( + f"{openrouter_url.rstrip('/')}/chat/completions", + headers={"Authorization":f"Bearer {openrouter_key}", + "HTTP-Referer":"https://voicescript.local","Content-Type":"application/json"}, + json={"model":model,"messages":[{"role":"user","content":prompt}],"temperature":0.2}, + timeout=float(timeout), + ) + resp.raise_for_status() + return resp.json()["choices"][0]["message"]["content"].strip() + except httpx.TimeoutException: + raise Exception(f"OpenRouter 응답 시간 초과 ({timeout}초). 설정에서 타임아웃을 늘리거나 모델을 변경하세요.") + except httpx.HTTPStatusError as e: + raise Exception(f"OpenRouter 오류 ({e.response.status_code}): API 키 또는 모델을 확인하세요.") + else: + try: + resp = httpx.post(f"{OLLAMA_URL}/api/chat", + json={"model":model,"messages":[{"role":"user","content":prompt}], + "stream":False,"options":{"temperature":0.2}}, + timeout=float(timeout)) + resp.raise_for_status() + result = resp.json().get("message",{}).get("content","").strip() + if not result: + raise Exception(f"Ollama({model}) 빈 응답. 모델이 실행 중인지 확인: ollama list") + return result + except httpx.ConnectError: + raise Exception(f"Ollama 서버 연결 실패 ({OLLAMA_URL}). 서버가 실행 중인지 확인하세요.") + except httpx.TimeoutException: + raise Exception( + f"Ollama({model}) 응답 시간 초과 ({timeout}초).\n" + f"원인: 모델 로딩 중이거나, 시스템 리소스 부족, 또는 모델이 응답하지 않음.\n" + f"해결: 설정에서 Ollama 타임아웃을 늘리거나, 더 작은 모델을 사용하세요." + ) -# ══════════════════════════════════════════════════════════════ -# LLM 번역 (세그먼트 배치) -# ══════════════════════════════════════════════════════════════ -def _translate_batch(texts: list, target_lang: str, - use_openrouter: bool, model: str, - openrouter_url: str, openrouter_key: str) -> list: - """texts 리스트 → 번역된 texts 리스트""" +def _translate_batch(texts, target_lang, use_or, model, or_url, or_key, timeout): if not texts or not model: return texts - lang_name = _lang_name(target_lang) prompt = ( - f"아래 자막 문장 배열을 {lang_name}로 번역해줘.\n" + f"아래 자막 문장 배열을 {_lang_name(target_lang)}로 번역해줘.\n" f"반드시 JSON 문자열 배열로만 답해. 설명·마크다운 없이 배열만 출력.\n" f"입력과 동일한 개수와 순서를 유지해.\n\n" f"{json.dumps(texts, ensure_ascii=False)}" ) try: - if use_openrouter and openrouter_key: - resp = httpx.post( - f"{openrouter_url.rstrip('/')}/chat/completions", - headers={"Authorization": f"Bearer {openrouter_key}", - "HTTP-Referer": "https://voicescript.local", - "Content-Type": "application/json"}, - json={"model": model, - "messages": [{"role":"user","content":prompt}], - "temperature": 0.2}, - timeout=float(OLLAMA_TIMEOUT), - ) - resp.raise_for_status() - raw = resp.json()["choices"][0]["message"]["content"].strip() - else: - resp = httpx.post(f"{OLLAMA_URL}/api/chat", - json={"model": model, - "messages": [{"role":"user","content":prompt}], - "stream": False, "options": {"temperature": 0.2}}, - timeout=float(OLLAMA_TIMEOUT)) - resp.raise_for_status() - raw = resp.json().get("message",{}).get("content","").strip() - - # 코드블록 제거 후 JSON 파싱 - if "```" in raw: - raw = raw.split("```")[1].lstrip("json\n").rstrip() + raw = _llm_call(prompt, model, use_or, or_url, or_key, timeout) + if "```" in raw: raw=raw.split("```")[1].lstrip("json\n").rstrip() result = json.loads(raw) - if isinstance(result, list) and len(result) == len(texts): + if isinstance(result,list) and len(result)==len(texts): return [str(r) for r in result] return texts except Exception as e: print(f"[번역 실패] {e}") - return texts # 실패 시 원본 유지 + return texts +def _refine_batch(texts, model, use_or, or_url, or_key, timeout): + if not texts or not model: return texts + prompt = ( + "아래는 음성 인식 자막 문장 배열입니다.\n" + "내용은 절대 변경하지 말고, 문장 부호만 자연스럽게 교정해줘.\n" + "반드시 JSON 문자열 배열로만 답해. 설명·마크다운 없이 배열만.\n" + "입력과 동일한 개수와 순서를 유지해.\n\n" + f"{json.dumps(texts, ensure_ascii=False)}" + ) + try: + raw = _llm_call(prompt, model, use_or, or_url, or_key, timeout) + if "```" in raw: raw=raw.split("```")[1].lstrip("json\n").rstrip() + result = json.loads(raw) + if isinstance(result,list) and len(result)==len(texts): + return [str(r) for r in result] + return texts + except Exception as e: + print(f"[교정 실패] {e}") + return texts -# ══════════════════════════════════════════════════════════════ -# STT + Ollama/OpenRouter 후처리 (기존 음성변환용) -# ══════════════════════════════════════════════════════════════ -def _ollama_postprocess(text: str, model: str) -> str: +def _ollama_postprocess(text, model): if not model or not text.strip(): return text - prompt = ("다음은 음성 인식으로 추출된 텍스트입니다. " - "내용은 절대 변경하지 말고, 문장 부호를 추가하고 자연스럽게 다듬어줘. " - "결과 텍스트만 출력하고 설명은 하지 마.\n\n" + text) + prompt=("다음은 음성 인식으로 추출된 텍스트입니다. 내용은 절대 변경하지 말고 문장 부호만 추가해줘. " + "결과 텍스트만 출력하고 설명은 하지 마.\n\n"+text) try: - resp = httpx.post(f"{OLLAMA_URL}/api/chat", - json={"model":model,"messages":[{"role":"user","content":prompt}], - "stream":False,"options":{"temperature":0.1}}, - timeout=float(OLLAMA_TIMEOUT)) - resp.raise_for_status() - return resp.json().get("message",{}).get("content","").strip() or text - except: return text + raw=_llm_call(prompt,model,False,"","",OLLAMA_TIMEOUT) + return raw if raw else text + except Exception as e: + print(f"[Ollama 후처리 실패] {e}"); return text -def _openrouter_postprocess(text: str, model: str, base_url: str, api_key: str) -> str: +def _openrouter_postprocess(text, model, base_url, api_key): if not model or not api_key or not text.strip(): return text - prompt = ("다음은 음성 인식으로 추출된 텍스트입니다. " - "내용은 절대 변경하지 말고, 문장 부호를 추가하고 자연스럽게 다듬어줘. " - "결과 텍스트만 출력하고 설명은 하지 마.\n\n" + text) + prompt=("다음은 음성 인식으로 추출된 텍스트입니다. 내용은 절대 변경하지 말고 문장 부호만 추가해줘. " + "결과 텍스트만 출력하고 설명은 하지 마.\n\n"+text) try: - resp = httpx.post(f"{base_url.rstrip('/')}/chat/completions", - headers={"Authorization":f"Bearer {api_key}","HTTP-Referer":"https://voicescript.local","Content-Type":"application/json"}, - json={"model":model,"messages":[{"role":"user","content":prompt}],"temperature":0.1}, - timeout=float(OLLAMA_TIMEOUT)) + raw=_llm_call(prompt,model,True,base_url,api_key,OLLAMA_TIMEOUT) + return raw if raw else text + except Exception as e: + print(f"[OpenRouter 후처리 실패] {e}"); return text + +def _api_transcribe(audio_path, api_key, base_url, language, model="whisper-large-v3"): + """Groq / OpenAI Whisper API 호출""" + with open(audio_path,"rb") as f: + data = f.read() + params = {"model":model} + if language: params["language"] = language + try: + resp = httpx.post( + f"{base_url}/audio/transcriptions", + headers={"Authorization":f"Bearer {api_key}"}, + files={"file":("audio.mp3", data, "audio/mpeg")}, + data=params, + timeout=600.0, + ) resp.raise_for_status() - return resp.json()["choices"][0]["message"]["content"].strip() or text - except: return text + d = resp.json() + text = d.get("text","") + # segments 구조 없으면 전체 텍스트로 단일 세그먼트 + segs = d.get("segments",[]) + if not segs and text: + segs = [{"start":0,"end":0,"text":text}] + return {"text":text, "segments":segs, + "language":d.get("language", language or ""), "duration":0} + except httpx.TimeoutException: + raise Exception(f"API 응답 시간 초과. 파일이 너무 크거나 서버 문제일 수 있습니다.") + except httpx.HTTPStatusError as e: + raise Exception(f"API 오류 ({e.response.status_code}): API 키를 확인하세요.") # ══════════════════════════════════════════════════════════════ -# 기존 STT 태스크 (음성변환 탭용) +# STT Task (음성변환 탭) # ══════════════════════════════════════════════════════════════ @celery_app.task(bind=True, name="tasks.transcribe_task", queue="stt") def transcribe_task( self, - file_id: str, audio_path: str, - use_ollama: bool = False, ollama_model: str = "", - use_openrouter: bool = False, openrouter_model: str = "", - openrouter_url: str = "", openrouter_key: str = "", + file_id:str, audio_path:str, + use_ollama:bool=False, ollama_model:str="", + use_openrouter:bool=False, openrouter_model:str="", + openrouter_url:str="", openrouter_key:str="", + stt_engine:str="local", + groq_api_key:str="", openai_api_key:str="", + stt_language:str="", ): self.update_state(state="PROGRESS", meta={"progress":5,"message":"모델 준비 중..."}) + tmp_mp3=None try: - model = get_model() - self.update_state(state="PROGRESS", meta={"progress":15,"message":"오디오 분석 중..."}) - segments_gen, info = model.transcribe( - audio_path, language=LANGUAGE, beam_size=BEAM_SIZE, - initial_prompt=INITIAL_PROMPT, vad_filter=True, - vad_parameters=dict(min_silence_duration_ms=500), word_timestamps=False, - ) - self.update_state(state="PROGRESS", meta={"progress":30,"message":"텍스트 변환 중..."}) - segments, parts = [], [] - duration = info.duration - for seg in segments_gen: - segments.append({"start":round(seg.start,3),"end":round(seg.end,3),"text":seg.text.strip()}) - parts.append(seg.text.strip()) - if duration > 0: - pct = 30 + int((seg.end/duration)*50) - self.update_state(state="PROGRESS", - meta={"progress":min(pct,80),"message":f"변환 중... {seg.end:.0f}s / {duration:.0f}s"}) + segments=[]; duration=0.0; detected_lang="" - raw_text = "\n".join(parts) - full_text = raw_text + if stt_engine in ("groq","openai"): + api_key = groq_api_key if stt_engine=="groq" else openai_api_key + base_url= GROQ_BASE if stt_engine=="groq" else OPENAI_BASE + if not api_key: + raise Exception(f"{stt_engine.upper()} API 키가 설정되지 않았습니다. 설정 → STT 엔진 API 키에서 입력하세요.") + self.update_state(state="PROGRESS",meta={"progress":20,"message":f"{stt_engine.upper()} API 변환 중..."}) + import tempfile + suffix=".mp3" + with tempfile.NamedTemporaryFile(suffix=suffix,delete=False) as tf: tmp_mp3=tf.name + cmd=["ffmpeg","-y","-i",audio_path,"-ar","16000","-ac","1","-b:a","128k",tmp_mp3] + r=subprocess.run(cmd,capture_output=True,timeout=300) + if r.returncode!=0: raise Exception(f"ffmpeg 변환 실패: {r.stderr.decode(errors='replace')[-200:]}") + result=_api_transcribe(tmp_mp3,api_key,base_url,stt_language) + segments=[{"start":round(s.get("start",0),3),"end":round(s.get("end",0),3),"text":s.get("text","").strip()} + for s in result.get("segments",[])] + detected_lang=result.get("language","") + duration=result.get("duration",0) or (segments[-1]["end"] if segments else 0) + else: + model=get_model() + self.update_state(state="PROGRESS",meta={"progress":15,"message":"오디오 분석 중..."}) + lang=stt_language.strip() or LANGUAGE + segments_gen,info=model.transcribe(audio_path,language=lang,beam_size=BEAM_SIZE, + initial_prompt=INITIAL_PROMPT,vad_filter=True, + vad_parameters=dict(min_silence_duration_ms=500),word_timestamps=False) + self.update_state(state="PROGRESS",meta={"progress":30,"message":"텍스트 변환 중..."}) + duration=info.duration; detected_lang=info.language + for seg in segments_gen: + segments.append({"start":round(seg.start,3),"end":round(seg.end,3),"text":seg.text.strip()}) + if duration>0: + pct=30+int((seg.end/duration)*50) + self.update_state(state="PROGRESS",meta={"progress":min(pct,80),"message":f"변환 중... {seg.end:.0f}s/{duration:.0f}s"}) + + raw_text="\n".join(s["text"] for s in segments) + full_text=raw_text if use_ollama and ollama_model: self.update_state(state="PROGRESS",meta={"progress":85,"message":f"Ollama({ollama_model}) 교정 중..."}) - full_text = _ollama_postprocess(raw_text, ollama_model) + full_text=_ollama_postprocess(raw_text,ollama_model) elif use_openrouter and openrouter_model and openrouter_key: self.update_state(state="PROGRESS",meta={"progress":85,"message":f"OpenRouter({openrouter_model}) 교정 중..."}) - full_text = _openrouter_postprocess(raw_text, openrouter_model, openrouter_url, openrouter_key) + full_text=_openrouter_postprocess(raw_text,openrouter_model,openrouter_url,openrouter_key) self.update_state(state="PROGRESS",meta={"progress":95,"message":"파일 저장 중..."}) - os.makedirs(OUTPUT_DIR, exist_ok=True) - output_filename = f"{file_id}.txt" - with open(os.path.join(OUTPUT_DIR, output_filename),"w",encoding="utf-8") as f: - f.write(f"# 변환 결과\n# 언어: {info.language} | 재생 시간: {duration:.1f}초\n\n## 전체 텍스트\n\n{full_text}\n\n## 타임스탬프별 세그먼트\n\n") + os.makedirs(OUTPUT_DIR,exist_ok=True) + output_filename=f"{file_id}.txt" + with open(os.path.join(OUTPUT_DIR,output_filename),"w",encoding="utf-8") as f: + f.write(f"# 변환 결과\n# 언어: {detected_lang} | 재생시간: {duration:.1f}초\n\n{full_text}\n\n## 타임스탬프\n\n") for seg in segments: m,s=divmod(int(seg['start']),60) - f.write(f"[{m:02d}:{s:02d}] {seg['text']}\n") - try: os.remove(audio_path) - except: pass + f.write(f"[{m:02d}:{s:02d}] {seg['text']}\n") + + for p in [audio_path, tmp_mp3]: + try: + if p: os.remove(p) + except: pass + return { "text":full_text,"raw_text":raw_text,"segments":segments, - "language":info.language,"duration":round(duration,1), + "language":detected_lang,"duration":round(duration,1), "output_file":output_filename, "ollama_used":use_ollama and bool(ollama_model), "ollama_model":ollama_model if (use_ollama and ollama_model) else "", "openrouter_used":use_openrouter and bool(openrouter_model) and bool(openrouter_key), "openrouter_model":openrouter_model if (use_openrouter and openrouter_model) else "", + "stt_engine":stt_engine, } except Exception as e: + for p in [audio_path, tmp_mp3]: + try: + if p: os.remove(p) + except: pass raise Exception(f"변환 실패: {str(e)}") # ══════════════════════════════════════════════════════════════ -# 자막 파이프라인 태스크 +# 자막 파이프라인 Task # Step 1: ffmpeg → WAV -# Step 2: Whisper → 원어 SRT/VTT -# Step 3: LLM → 번역 SRT/VTT (선택) +# Step 2: Whisper / API → 원어 자막 +# Step 2b: LLM 교정 (선택) +# Step 3: LLM 번역 (선택) # ══════════════════════════════════════════════════════════════ @celery_app.task(bind=True, name="tasks.subtitle_pipeline_task", queue="stt") def subtitle_pipeline_task( self, - file_id: str, - video_path: str, - src_language: str = "", # 원어 코드 (빈칸=자동) - subtitle_fmt: str = "srt", # srt | vtt | both - translate_to: str = "", # 번역 대상 (빈칸=번역 안 함) - trans_model: str = "", # 번역 모델 - trans_via: str = "ollama",# ollama | openrouter - openrouter_url: str = "", - openrouter_key: str = "", + file_id:str, video_path:str, + src_language:str="", + subtitle_fmt:str="srt", + # STT 엔진 + stt_engine:str="local", + groq_api_key:str="", openai_api_key:str="", + # 교정 + refine_model:str="", refine_via:str="ollama", + # 번역 + translate_to:str="", trans_model:str="", trans_via:str="ollama", + # 공통 API 설정 + openrouter_url:str="", openrouter_key:str="", + # 타임아웃 (설정에서 받아옴) + subtitle_timeout:int=0, # 0=OLLAMA_TIMEOUT 기본값 ): - os.makedirs(OUTPUT_DIR, exist_ok=True) - wav_path = os.path.join(os.path.dirname(video_path), f"{file_id}_audio.wav") - result_files = {} + os.makedirs(OUTPUT_DIR,exist_ok=True) + wav_path=os.path.join(os.path.dirname(video_path),f"{file_id}_audio.wav") + tmp_mp3=None + result_files={} + timeout=subtitle_timeout if subtitle_timeout>0 else OLLAMA_TIMEOUT + + def _prog(pct, step, step_msg, msg): + self.update_state(state="PROGRESS",meta={"progress":pct,"step":step,"step_msg":step_msg,"message":msg}) try: - # ── Step 1: ffmpeg 오디오 추출 ──────────────────────── - self.update_state(state="PROGRESS", meta={ - "progress": 5, - "step": 1, - "step_msg": "오디오 추출 중...", - "message": "Step 1/3 — ffmpeg 오디오 추출 중..." - }) - - cmd = [ - "ffmpeg", "-y", - "-i", video_path, - "-vn", # 비디오 스트림 제거 - "-ar", "16000", # 16kHz — Whisper 최적 - "-ac", "1", # 모노 - "-c:a", "pcm_s16le",# WAV 무손실 - wav_path - ] - proc = subprocess.run(cmd, capture_output=True, timeout=600) - if proc.returncode != 0: - err = proc.stderr.decode(errors="replace")[-500:] - raise Exception(f"ffmpeg 오디오 추출 실패: {err}") - if not os.path.exists(wav_path) or os.path.getsize(wav_path) < 1000: + # ── Step 1: ffmpeg ──────────────────────────────────── + _prog(5,1,"오디오 추출 중...","Step 1/3 — ffmpeg 오디오 추출 중...") + cmd=["ffmpeg","-y","-i",video_path,"-vn","-ar","16000","-ac","1","-c:a","pcm_s16le",wav_path] + proc=subprocess.run(cmd,capture_output=True,timeout=600) + if proc.returncode!=0: + raise Exception(f"ffmpeg 오디오 추출 실패: {proc.stderr.decode(errors='replace')[-300:]}") + if not os.path.exists(wav_path) or os.path.getsize(wav_path)<1000: raise Exception("ffmpeg가 오디오를 추출하지 못했습니다. 영상에 오디오 트랙이 있는지 확인하세요.") - try: os.remove(video_path) except: pass - # ── Step 2: Whisper STT → 원어 자막 ─────────────────── - self.update_state(state="PROGRESS", meta={ - "progress": 15, - "step": 2, - "step_msg": "음성 인식 중...", - "message": "Step 2/3 — Whisper 음성 인식 시작..." - }) + # ── Step 2: STT ────────────────────────────────────── + _prog(15,2,"음성 인식 중...","Step 2/3 — 음성 인식 시작...") + segments=[]; duration=0.0; detected_lang="" - whisper = get_model() - lang = src_language.strip() or None - segments_gen, info = whisper.transcribe( - wav_path, - language=lang, - beam_size=BEAM_SIZE, - initial_prompt=INITIAL_PROMPT, - vad_filter=True, - vad_parameters=dict(min_silence_duration_ms=500), - word_timestamps=False, - ) + if stt_engine in ("groq","openai"): + api_key=groq_api_key if stt_engine=="groq" else openai_api_key + base_url=GROQ_BASE if stt_engine=="groq" else OPENAI_BASE + if not api_key: + raise Exception(f"{stt_engine.upper()} API 키가 없습니다. 설정에서 입력하세요.") + import tempfile + with tempfile.NamedTemporaryFile(suffix=".mp3",delete=False) as tf: tmp_mp3=tf.name + r=subprocess.run(["ffmpeg","-y","-i",wav_path,"-ar","16000","-ac","1","-b:a","128k",tmp_mp3], + capture_output=True,timeout=300) + if r.returncode!=0: raise Exception("MP3 변환 실패") + _prog(25,2,"API 음성 인식 중...",f"Step 2/3 — {stt_engine.upper()} API 인식 중...") + result=_api_transcribe(tmp_mp3,api_key,base_url,src_language) + segments=[{"start":round(s.get("start",0),3),"end":round(s.get("end",0),3),"text":s.get("text","").strip()} + for s in result.get("segments",[])] + detected_lang=result.get("language","") + duration=result.get("duration",0) or (segments[-1]["end"] if segments else 0) + try: os.remove(tmp_mp3); tmp_mp3=None + except: pass + else: + whisper=get_model() + lang=src_language.strip() or None + segments_gen,info=whisper.transcribe(wav_path,language=lang,beam_size=BEAM_SIZE, + initial_prompt=INITIAL_PROMPT,vad_filter=True, + vad_parameters=dict(min_silence_duration_ms=500),word_timestamps=False) + duration=info.duration; detected_lang=info.language + for seg in segments_gen: + segments.append({"start":round(seg.start,3),"end":round(seg.end,3),"text":seg.text.strip()}) + if duration>0: + pct=15+int((seg.end/duration)*50) + _prog(min(pct,65),2,f"{seg.end:.0f}s/{duration:.0f}s 인식",f"Step 2/3 — {seg.end:.0f}s / {duration:.0f}s") - segments = [] - duration = info.duration - detected_lang = info.language - - for seg in segments_gen: - segments.append({ - "start": round(seg.start, 3), - "end": round(seg.end, 3), - "text": seg.text.strip(), - }) - if duration > 0: - pct = 15 + int((seg.end / duration) * 55) - self.update_state(state="PROGRESS", meta={ - "progress": min(pct, 70), - "step": 2, - "step_msg": f"{seg.end:.0f}s / {duration:.0f}s 인식 완료", - "message": f"Step 2/3 — {seg.end:.0f}s / {duration:.0f}s", - }) - - try: os.remove(wav_path) + try: os.remove(wav_path); wav_path=None except: pass if not segments: raise Exception("음성이 감지되지 않았습니다. 영상에 음성이 있는지 확인하세요.") + # ── Step 2b: LLM 교정 ──────────────────────────────── + if refine_model.strip(): + use_or_refine=(refine_via=="openrouter" and bool(openrouter_key)) + total=len(segments); CHUNK=25; refined=[] + for ci,start in enumerate(range(0,total,CHUNK)): + chunk=segments[start:start+CHUNK] + pct=67+int((ci*CHUNK/total)*6) + _prog(min(pct,73),2,f"교정 {min(start+CHUNK,total)}/{total}", + f"Step 2/3 — LLM 교정 중... ({min(start+CHUNK,total)}/{total})") + batch=[s["text"] for s in chunk] + refined.extend(_refine_batch(batch,refine_model,use_or_refine,openrouter_url,openrouter_key,timeout)) + segments=[{**seg,"text":refined[i] if i