feat: 자막 생성 탭 (ffmpeg+Whisper+LLM 3단계 파이프라인)

This commit is contained in:
root
2026-05-02 07:28:34 +09:00
parent 4fc3da1a2d
commit b3805c2b0b
3 changed files with 884 additions and 608 deletions

View File

@@ -10,7 +10,7 @@ from typing import List
from auth import (authenticate, create_access_token, init_users, from auth import (authenticate, create_access_token, init_users,
require_auth, require_admin, require_stt, require_ocr, require_auth, require_admin, require_stt, require_ocr,
list_users, create_user, update_user, delete_user) list_users, create_user, update_user, delete_user)
from tasks import celery_app, transcribe_task from tasks import celery_app, transcribe_task, subtitle_pipeline_task
from ocr_tasks import ocr_task from ocr_tasks import ocr_task
app = FastAPI(title="VoiceScript API") app = FastAPI(title="VoiceScript API")
@@ -30,18 +30,9 @@ os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True) os.makedirs(OUTPUT_DIR, exist_ok=True)
AUDIO_EXT = {"mp3","mp4","wav","m4a","ogg","flac","aac","wma","webm", AUDIO_EXT = {"mp3","mp4","wav","m4a","ogg","flac","aac","wma","webm",
"mkv","avi","mov","ts","mts","m2ts","wmv","flv","rmvb", "mkv","avi","mov","ts","mts","m2ts","wmv","flv","h264","h265","hevc","264","265"}
"h264","h265","hevc","264","265"}
IMAGE_EXT = {"jpg","jpeg","png","bmp","tiff","tif","webp","gif"} IMAGE_EXT = {"jpg","jpeg","png","bmp","tiff","tif","webp","gif"}
VIDEO_EXT = {"mp4","mkv","avi","mov","webm","ts","mts","m2ts","wmv","flv","h264","h265","hevc","264","265","m4v","3gp","rm","rmvb"}
SUPPORTED_LANGS = {
"ko":"한국어","en":"English","ja":"日本語","zh":"中文(简体)",
"zh-tw":"中文(繁體)","fr":"Français","de":"Deutsch","es":"Español",
"it":"Italiano","pt":"Português","ru":"Русский","ar":"العربية",
"vi":"Tiếng Việt","th":"ไทย","id":"Bahasa Indonesia",
"nl":"Nederlands","pl":"Polski","tr":"Türkçe","sv":"Svenska",
"uk":"Українська","hi":"हिन्दी","bn":"বাংলা",
}
_DEFAULT_SETTINGS = { _DEFAULT_SETTINGS = {
"stt_ollama_model":"","ocr_ollama_model":"granite3.2-vision:latest", "stt_ollama_model":"","ocr_ollama_model":"granite3.2-vision:latest",
@@ -85,7 +76,7 @@ def append_history(record:dict):
history.insert(0,record); _write_history(history[:HISTORY_MAX]) history.insert(0,record); _write_history(history[:HISTORY_MAX])
except: pass except: pass
def _update_history_by_task(task_id:str,result:dict,success:bool,error_msg:str=""): def _update_history_by_task(task_id:str, result:dict, success:bool, error_msg:str=""):
with _hist_lock: with _hist_lock:
if not HISTORY_FILE.exists(): return if not HISTORY_FILE.exists(): return
try: try:
@@ -93,9 +84,10 @@ def _update_history_by_task(task_id:str,result:dict,success:bool,error_msg:str="
for h in history: for h in history:
if h.get("task_id")!=task_id: continue if h.get("task_id")!=task_id: continue
if h.get("status")!="processing": break if h.get("status")!="processing": break
if not success: h["status"]="failed";h["output"]={"error":error_msg[:300]};break h["status"]="failed" if not success else "success"
h["status"]="success" if not success:
if h["type"]=="stt": h["output"]={"error":error_msg[:300]}
elif h["type"]=="stt":
text=result.get("text","") text=result.get("text","")
h["output"]={ h["output"]={
"filename":result.get("output_file",""), "filename":result.get("output_file",""),
@@ -107,11 +99,18 @@ def _update_history_by_task(task_id:str,result:dict,success:bool,error_msg:str="
"ollama_model":result.get("ollama_model",""), "ollama_model":result.get("ollama_model",""),
"openrouter_used":result.get("openrouter_used",False), "openrouter_used":result.get("openrouter_used",False),
"openrouter_model":result.get("openrouter_model",""), "openrouter_model":result.get("openrouter_model",""),
"subtitle_mode":result.get("subtitle_mode",False), }
elif h["type"]=="subtitle":
h["output"]={
"detected_language":result.get("detected_language",""),
"duration_s":result.get("duration",0),
"segment_count":result.get("segment_count",0),
"translated":result.get("translated",False), "translated":result.get("translated",False),
"translate_to":result.get("translate_to",""), "translate_to":result.get("translate_to",""),
"srt_file":result.get("srt_file",""), "srt_orig":result.get("srt_orig",""),
"vtt_file":result.get("vtt_file",""), "vtt_orig":result.get("vtt_orig",""),
"srt_trans":result.get("srt_trans",""),
"vtt_trans":result.get("vtt_trans",""),
} }
else: else:
ft=result.get("full_text","") ft=result.get("full_text","")
@@ -129,12 +128,12 @@ def _update_history_by_task(task_id:str,result:dict,success:bool,error_msg:str="
_write_history(history) _write_history(history)
except: pass except: pass
def delete_history_item(history_id:str)->bool: def delete_history_item(hid:str)->bool:
with _hist_lock: with _hist_lock:
if not HISTORY_FILE.exists(): return False if not HISTORY_FILE.exists(): return False
try: try:
with open(HISTORY_FILE,"r",encoding="utf-8") as f: history=json.load(f) with open(HISTORY_FILE,"r",encoding="utf-8") as f: history=json.load(f)
new=[h for h in history if h.get("id")!=history_id] new=[h for h in history if h.get("id")!=hid]
if len(new)==len(history): return False if len(new)==len(history): return False
_write_history(new); return True _write_history(new); return True
except: return False except: return False
@@ -166,10 +165,6 @@ def me(user:dict=Depends(require_auth)):
return {"username":user["username"],"role":user.get("role","user"), return {"username":user["username"],"role":user.get("role","user"),
"permissions":user.get("permissions",{"stt":False,"ocr":False})} "permissions":user.get("permissions",{"stt":False,"ocr":False})}
@app.get("/api/languages")
def get_languages(user:dict=Depends(require_auth)):
return {"languages":SUPPORTED_LANGS}
# ════════════════════════════════════════════════════════════════ # ════════════════════════════════════════════════════════════════
# 시스템 정보 # 시스템 정보
@@ -189,115 +184,120 @@ def system_info(user:dict=Depends(require_auth)):
# ════════════════════════════════════════════════════════════════ # ════════════════════════════════════════════════════════════════
# STT 공통 디스패 # STT 단일 / 배
# ════════════════════════════════════════════════════════════════ # ════════════════════════════════════════════════════════════════
async def _dispatch_stt( async def _dispatch_stt(request,files,use_ollama,ollama_model,use_openrouter,openrouter_model,user):
request, files, s=_load_settings()
use_ollama, ollama_model, _uo=use_ollama.lower()=="true"; _uor=use_openrouter.lower()=="true"
use_openrouter, openrouter_model, if _uo and not ollama_model.strip(): ollama_model=s.get("stt_ollama_model","")
subtitle_mode, subtitle_format, if _uor and not openrouter_model.strip():openrouter_model=s.get("openrouter_stt_model","")
force_language,
translate_to, translate_model, translate_via,
user,
):
s = _load_settings()
_use_ollama = use_ollama.lower() == "true"
_use_openrouter = use_openrouter.lower() == "true"
_sub_mode = subtitle_mode.lower() == "true"
if _use_ollama and not ollama_model.strip(): ollama_model = s.get("stt_ollama_model","")
if _use_openrouter and not openrouter_model.strip():openrouter_model= s.get("openrouter_stt_model","")
if not translate_model.strip():
translate_model = ollama_model if translate_via=="ollama" else openrouter_model
results=[] results=[]
for file in files: for file in files:
_check_size(request) _check_size(request)
ext=_ext(file.filename) ext=_ext(file.filename)
if ext not in AUDIO_EXT: if ext not in AUDIO_EXT:
results.append({"error":f"{file.filename}: 지원하지 않는 형식","filename":file.filename}) results.append({"error":f"{file.filename}: 지원하지 않는 형식","filename":file.filename}); continue
continue
file_id=str(uuid.uuid4()) file_id=str(uuid.uuid4())
save_path=os.path.join(UPLOAD_DIR,f"{file_id}.{ext}") save_path=os.path.join(UPLOAD_DIR,f"{file_id}.{ext}")
await _save_upload(file,save_path) await _save_upload(file,save_path); file_size=os.path.getsize(save_path)
file_size=os.path.getsize(save_path) task=transcribe_task.delay(file_id,save_path,_uo,ollama_model,_uor,openrouter_model,
task=transcribe_task.delay( s.get("openrouter_url",""),s.get("openrouter_api_key",""))
file_id, save_path, append_history({"id":file_id,"task_id":task.id,"type":"stt","status":"processing",
_use_ollama, ollama_model, "timestamp":datetime.now().strftime("%Y-%m-%d %H:%M:%S"),"username":user["username"],
_use_openrouter, openrouter_model,
s.get("openrouter_url",""), s.get("openrouter_api_key",""),
_sub_mode, subtitle_format or "srt",
translate_to or "",
translate_model or "",
translate_via or "ollama",
force_language or "",
)
append_history({
"id":file_id,"task_id":task.id,"type":"stt",
"status":"processing",
"timestamp":datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"username":user["username"],
"input":{"filename":file.filename,"size_bytes":file_size,"format":ext.upper()}, "input":{"filename":file.filename,"size_bytes":file_size,"format":ext.upper()},
"settings":{ "settings":{"model":os.getenv("WHISPER_MODEL","medium"),"language":os.getenv("WHISPER_LANGUAGE","ko"),
"model":os.getenv("WHISPER_MODEL","medium"), "compute_type":os.getenv("WHISPER_COMPUTE_TYPE","int8"),"cpu_threads":s.get("cpu_threads",0),
"language":force_language or os.getenv("WHISPER_LANGUAGE","auto"), "use_ollama":_uo,"ollama_model":ollama_model if _uo else "",
"compute_type":os.getenv("WHISPER_COMPUTE_TYPE","int8"), "use_openrouter":_uor,"openrouter_model":openrouter_model if _uor else ""},
"cpu_threads":s.get("cpu_threads",0), "output":None})
"subtitle_mode":_sub_mode,
"subtitle_format":subtitle_format,
"translate_to":translate_to,
"translate_model":translate_model,
"use_ollama":_use_ollama,"ollama_model":ollama_model if _use_ollama else "",
"use_openrouter":_use_openrouter,"openrouter_model":openrouter_model if _use_openrouter else "",
},
"output":None,
})
results.append({"task_id":task.id,"file_id":file_id,"filename":file.filename}) results.append({"task_id":task.id,"file_id":file_id,"filename":file.filename})
return results return results
# ════════════════════════════════════════════════════════════════
# STT — 단일 / 배치
# ════════════════════════════════════════════════════════════════
@app.post("/api/transcribe") @app.post("/api/transcribe")
async def transcribe( async def transcribe(request:Request,file:UploadFile=File(...),
request:Request, file:UploadFile=File(...), use_ollama:str=Form("false"),ollama_model:str=Form(""),
use_ollama:str=Form("false"), ollama_model:str=Form(""), use_openrouter:str=Form("false"),openrouter_model:str=Form(""),
use_openrouter:str=Form("false"), openrouter_model:str=Form(""), user:dict=Depends(require_stt)):
subtitle_mode:str=Form("false"), subtitle_format:str=Form("srt"), items=await _dispatch_stt(request,[file],use_ollama,ollama_model,use_openrouter,openrouter_model,user)
force_language:str=Form(""),
translate_to:str=Form(""), translate_model:str=Form(""), translate_via:str=Form("ollama"),
user:dict=Depends(require_stt),
):
items=await _dispatch_stt(request,[file],use_ollama,ollama_model,use_openrouter,openrouter_model,
subtitle_mode,subtitle_format,force_language,translate_to,translate_model,translate_via,user)
return items[0] return items[0]
@app.post("/api/transcribe/batch") @app.post("/api/transcribe/batch")
async def transcribe_batch( async def transcribe_batch(request:Request,files:List[UploadFile]=File(...),
request:Request, files:List[UploadFile]=File(...), use_ollama:str=Form("false"),ollama_model:str=Form(""),
use_ollama:str=Form("false"), ollama_model:str=Form(""), use_openrouter:str=Form("false"),openrouter_model:str=Form(""),
use_openrouter:str=Form("false"), openrouter_model:str=Form(""), user:dict=Depends(require_stt)):
subtitle_mode:str=Form("false"), subtitle_format:str=Form("srt"),
force_language:str=Form(""),
translate_to:str=Form(""), translate_model:str=Form(""), translate_via:str=Form("ollama"),
user:dict=Depends(require_stt),
):
if not files: raise HTTPException(400,"파일이 없습니다") if not files: raise HTTPException(400,"파일이 없습니다")
if len(files)>20: raise HTTPException(400,"한 번에 최대 20개까지 업로드할 수 있습니다") if len(files)>20: raise HTTPException(400,"최대 20개까지")
items=await _dispatch_stt(request,files,use_ollama,ollama_model,use_openrouter,openrouter_model, items=await _dispatch_stt(request,files,use_ollama,ollama_model,use_openrouter,openrouter_model,user)
subtitle_mode,subtitle_format,force_language,translate_to,translate_model,translate_via,user)
return {"items":items,"total":len(items)} return {"items":items,"total":len(items)}
# ════════════════════════════════════════════════════════════════ # ════════════════════════════════════════════════════════════════
# OCR 공통 디스패치 # 자막 파이프라인 (영상 → SRT/VTT)
# ════════════════════════════════════════════════════════════════
@app.post("/api/subtitle")
async def create_subtitle(
request: Request,
file: UploadFile = File(...),
src_language: str = Form(""), # 원어 (빈칸=자동)
subtitle_fmt: str = Form("srt"), # srt | vtt | both
translate_to: str = Form(""), # 번역 대상 언어 (빈칸=번역 안 함)
trans_model: str = Form(""), # 번역 모델
trans_via: str = Form("ollama"), # ollama | openrouter
user: dict = Depends(require_stt),
):
_check_size(request)
ext = _ext(file.filename)
# 영상 + 오디오 모두 허용 (오디오만 있어도 자막 생성 가능)
if ext not in AUDIO_EXT:
raise HTTPException(400, f"지원하지 않는 형식입니다. 영상/오디오 파일을 업로드하세요.")
if subtitle_fmt not in ("srt","vtt","both"): subtitle_fmt = "srt"
s = _load_settings()
# 번역 모델 미지정 시 설정에서 가져옴
if not trans_model.strip():
trans_model = (s.get("openrouter_stt_model","") if trans_via=="openrouter"
else s.get("stt_ollama_model",""))
file_id = str(uuid.uuid4())
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
await _save_upload(file, save_path)
file_size = os.path.getsize(save_path)
task = subtitle_pipeline_task.delay(
file_id, save_path,
src_language, subtitle_fmt,
translate_to, trans_model, trans_via,
s.get("openrouter_url",""), s.get("openrouter_api_key",""),
)
append_history({
"id": file_id, "task_id": task.id, "type": "subtitle",
"status": "processing",
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"username": user["username"],
"input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()},
"settings": {
"src_language": src_language or "auto",
"subtitle_fmt": subtitle_fmt,
"translate_to": translate_to,
"trans_model": trans_model,
"trans_via": trans_via,
},
"output": None,
})
return {"task_id": task.id, "file_id": file_id, "filename": file.filename}
# ════════════════════════════════════════════════════════════════
# OCR 단일 / 배치
# ════════════════════════════════════════════════════════════════ # ════════════════════════════════════════════════════════════════
async def _dispatch_ocr(request,files,mode,backend,ollama_model,openrouter_model,custom_prompt,user): async def _dispatch_ocr(request,files,mode,backend,ollama_model,openrouter_model,custom_prompt,user):
if mode not in ("text","structure"): mode="text" if mode not in ("text","structure"): mode="text"
if backend not in ("paddle","ollama","openrouter"): backend="paddle" if backend not in ("paddle","ollama","openrouter"): backend="paddle"
s=_load_settings() s=_load_settings()
if backend=="ollama" and not ollama_model.strip(): ollama_model=s.get("ocr_ollama_model","granite3.2-vision:latest") if backend=="ollama" and not ollama_model.strip(): ollama_model=s.get("ocr_ollama_model","granite3.2-vision:latest")
if backend=="openrouter" and not openrouter_model.strip(): openrouter_model=s.get("openrouter_ocr_model","") if backend=="openrouter" and not openrouter_model.strip():openrouter_model=s.get("openrouter_ocr_model","")
results=[] results=[]
for file in files: for file in files:
_check_size(request) _check_size(request)
@@ -334,7 +334,7 @@ async def ocr_batch(request:Request,files:List[UploadFile]=File(...),
ollama_model:str=Form(""),openrouter_model:str=Form(""),custom_prompt:str=Form(""), ollama_model:str=Form(""),openrouter_model:str=Form(""),custom_prompt:str=Form(""),
user:dict=Depends(require_ocr)): user:dict=Depends(require_ocr)):
if not files: raise HTTPException(400,"파일이 없습니다") if not files: raise HTTPException(400,"파일이 없습니다")
if len(files)>20: raise HTTPException(400,"한 번에 최대 20개까지") if len(files)>20: raise HTTPException(400,"최대 20개까지")
items=await _dispatch_ocr(request,files,mode,backend,ollama_model,openrouter_model,custom_prompt,user) items=await _dispatch_ocr(request,files,mode,backend,ollama_model,openrouter_model,custom_prompt,user)
return {"items":items,"total":len(items)} return {"items":items,"total":len(items)}
@@ -346,7 +346,7 @@ async def ocr_batch(request:Request,files:List[UploadFile]=File(...),
def get_status(task_id:str,user:dict=Depends(require_auth)): def get_status(task_id:str,user:dict=Depends(require_auth)):
r=celery_app.AsyncResult(task_id) r=celery_app.AsyncResult(task_id)
if r.state=="PENDING": return {"state":"pending","progress":0,"message":"대기 중..."} if r.state=="PENDING": return {"state":"pending","progress":0,"message":"대기 중..."}
if r.state=="PROGRESS": m=r.info or {};return {"state":"progress","progress":m.get("progress",0),"message":m.get("message","처리 중...")} if r.state=="PROGRESS": m=r.info or {};return {"state":"progress","progress":m.get("progress",0),"step":m.get("step",0),"step_msg":m.get("step_msg",""),"message":m.get("message","처리 중...")}
if r.state=="SUCCESS": _update_history_by_task(task_id,r.result or {},True);return {"state":"success","progress":100,**(r.result or {})} if r.state=="SUCCESS": _update_history_by_task(task_id,r.result or {},True);return {"state":"success","progress":100,**(r.result or {})}
if r.state=="FAILURE": _update_history_by_task(task_id,{},False,str(r.info));return {"state":"failure","progress":0,"message":str(r.info)} if r.state=="FAILURE": _update_history_by_task(task_id,{},False,str(r.info));return {"state":"failure","progress":0,"message":str(r.info)}
return {"state":r.state.lower(),"progress":0} return {"state":r.state.lower(),"progress":0}
@@ -355,7 +355,7 @@ def get_status(task_id:str,user:dict=Depends(require_auth)):
def get_history(page:int=1,per_page:int=15,type_:str="",user:dict=Depends(require_auth)): def get_history(page:int=1,per_page:int=15,type_:str="",user:dict=Depends(require_auth)):
history=_load_history() history=_load_history()
if user.get("role")!="admin": history=[h for h in history if h.get("username")==user["username"]] if user.get("role")!="admin": history=[h for h in history if h.get("username")==user["username"]]
if type_ in ("stt","ocr"): history=[h for h in history if h.get("type")==type_] if type_ in ("stt","ocr","subtitle"): history=[h for h in history if h.get("type")==type_]
total=len(history);start=(page-1)*per_page total=len(history);start=(page-1)*per_page
return {"total":total,"page":page,"per_page":per_page,"items":history[start:start+per_page]} return {"total":total,"page":page,"per_page":per_page,"items":history[start:start+per_page]}
@@ -373,11 +373,10 @@ def download(filename:str,user:dict=Depends(require_auth)):
if ".." in filename or "/" in filename: raise HTTPException(400,"잘못된 파일명") if ".." in filename or "/" in filename: raise HTTPException(400,"잘못된 파일명")
path=os.path.join(OUTPUT_DIR,filename) path=os.path.join(OUTPUT_DIR,filename)
if not os.path.exists(path): raise HTTPException(404,"파일을 찾을 수 없습니다") if not os.path.exists(path): raise HTTPException(404,"파일을 찾을 수 없습니다")
if filename.endswith(".xlsx"): if filename.endswith(".xlsx"): media="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
media="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
elif filename.endswith(".srt"): media="text/plain"
elif filename.endswith(".vtt"): media="text/vtt" elif filename.endswith(".vtt"): media="text/vtt"
else: media="text/plain" elif filename.endswith(".srt"): media="text/plain; charset=utf-8"
else: media="text/plain; charset=utf-8"
return FileResponse(path,media_type=media,filename=filename) return FileResponse(path,media_type=media,filename=filename)
@app.get("/api/ollama/models") @app.get("/api/ollama/models")
@@ -392,12 +391,10 @@ def openrouter_models(user:dict=Depends(require_auth)):
s=_load_settings();api_key=s.get("openrouter_api_key","");base_url=s.get("openrouter_url","https://openrouter.ai/api/v1").rstrip("/") s=_load_settings();api_key=s.get("openrouter_api_key","");base_url=s.get("openrouter_url","https://openrouter.ai/api/v1").rstrip("/")
if not api_key: return {"models":[],"connected":False,"error":"API 키가 설정되지 않았습니다"} if not api_key: return {"models":[],"connected":False,"error":"API 키가 설정되지 않았습니다"}
try: try:
resp=httpx.get(f"{base_url}/models", resp=httpx.get(f"{base_url}/models",headers={"Authorization":f"Bearer {api_key}","HTTP-Referer":"https://voicescript.local"},timeout=12.0)
headers={"Authorization":f"Bearer {api_key}","HTTP-Referer":"https://voicescript.local"},timeout=12.0)
resp.raise_for_status() resp.raise_for_status()
all_models=resp.json().get("data",[]) all_models=resp.json().get("data",[])
vision=[m["id"] for m in all_models if any(k in m["id"].lower() vision=[m["id"] for m in all_models if any(k in m["id"].lower() for k in ["vision","claude-3","gemini","gpt-4o","llava","pixtral","qwen-vl","deepseek-vl"])]
for k in ["vision","claude-3","gemini","gpt-4o","llava","pixtral","qwen-vl","deepseek-vl"])]
return {"models":[m["id"] for m in all_models],"vision_models":vision,"connected":True,"total":len(all_models)} return {"models":[m["id"] for m in all_models],"vision_models":vision,"connected":True,"total":len(all_models)}
except httpx.HTTPStatusError as e: return {"models":[],"connected":False,"error":f"HTTP {e.response.status_code}"} except httpx.HTTPStatusError as e: return {"models":[],"connected":False,"error":f"HTTP {e.response.status_code}"}
except Exception as e: return {"models":[],"connected":False,"error":str(e)} except Exception as e: return {"models":[],"connected":False,"error":str(e)}
@@ -405,10 +402,9 @@ def openrouter_models(user:dict=Depends(require_auth)):
@app.post("/api/openrouter/test") @app.post("/api/openrouter/test")
def openrouter_test(api_key:str=Form(...),base_url:str=Form("https://openrouter.ai/api/v1"),user:dict=Depends(require_auth)): def openrouter_test(api_key:str=Form(...),base_url:str=Form("https://openrouter.ai/api/v1"),user:dict=Depends(require_auth)):
try: try:
resp=httpx.get(f"{base_url.rstrip('/')}/models", resp=httpx.get(f"{base_url.rstrip('/')}/models",headers={"Authorization":f"Bearer {api_key}","HTTP-Referer":"https://voicescript.local"},timeout=10.0)
headers={"Authorization":f"Bearer {api_key}","HTTP-Referer":"https://voicescript.local"},timeout=10.0) resp.raise_for_status();count=len(resp.json().get("data",[]))
resp.raise_for_status() return {"ok":True,"message":f"연결 성공 — {count}개 모델 사용 가능"}
count=len(resp.json().get("data",[]));return {"ok":True,"message":f"연결 성공 — {count}개 모델 사용 가능"}
except httpx.HTTPStatusError as e: return {"ok":False,"message":f"인증 실패 (HTTP {e.response.status_code})"} except httpx.HTTPStatusError as e: return {"ok":False,"message":f"인증 실패 (HTTP {e.response.status_code})"}
except Exception as e: return {"ok":False,"message":f"연결 실패: {str(e)}"} except Exception as e: return {"ok":False,"message":f"연결 실패: {str(e)}"}
@@ -466,7 +462,7 @@ def admin_update_user(username:str,perm_stt:str=Form("false"),perm_ocr:str=Form(
@app.delete("/api/admin/users/{username}") @app.delete("/api/admin/users/{username}")
def admin_delete_user(username:str,user:dict=Depends(require_admin)): def admin_delete_user(username:str,user:dict=Depends(require_admin)):
ok,msg=delete_user(username); ok,msg=delete_user(username)
if not ok: raise HTTPException(400,msg) if not ok: raise HTTPException(400,msg)
return {"ok":True,"message":msg} return {"ok":True,"message":msg}

View File

@@ -60,6 +60,7 @@ header h1 span{color:var(--accent)}
.nav-tab.admin-tab.active{color:var(--orange);border-bottom-color:var(--orange)} .nav-tab.admin-tab.active{color:var(--orange);border-bottom-color:var(--orange)}
.nav-tab.settings-tab.active{color:var(--blue);border-bottom-color:var(--blue)} .nav-tab.settings-tab.active{color:var(--blue);border-bottom-color:var(--blue)}
.nav-tab.history-tab.active{color:var(--purple);border-bottom-color:var(--purple)} .nav-tab.history-tab.active{color:var(--purple);border-bottom-color:var(--purple)}
.nav-tab.subtitle-tab.active{color:var(--blue);border-bottom-color:var(--blue)}
/* ── PAGE ── */ /* ── PAGE ── */
.page{display:none;flex:1;flex-direction:column} .page{display:none;flex:1;flex-direction:column}
@@ -251,18 +252,56 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
.ollama-status{font-family:var(--mono);font-size:.63rem;padding:4px 9px;border-radius:2px} .ollama-status{font-family:var(--mono);font-size:.63rem;padding:4px 9px;border-radius:2px}
.ollama-status.ok{background:rgba(0,229,160,.1);color:var(--accent);border:1px solid rgba(0,229,160,.2)} .ollama-status.ok{background:rgba(0,229,160,.1);color:var(--accent);border:1px solid rgba(0,229,160,.2)}
.ollama-status.fail{background:rgba(255,107,53,.1);color:var(--warn);border:1px solid rgba(255,107,53,.2)} .ollama-status.fail{background:rgba(255,107,53,.1);color:var(--warn);border:1px solid rgba(255,107,53,.2)}
/* ── 자막 모드 ── */ /* ── 자막 ── */
.sub-section{margin-top:12px;padding:12px;background:var(--surf2);border:1px solid #1c2840;border-radius:4px} #page-subtitle{display:none;flex-direction:column}
.sub-section-title{font-family:var(--mono);font-size:.6rem;letter-spacing:.1em;color:var(--blue);text-transform:uppercase;margin-bottom:10px;display:flex;align-items:center;gap:6px} #page-subtitle.active{display:flex}
.lang-select{width:100%;background:var(--surf);border:1px solid var(--border2);color:var(--text);padding:9px 10px;border-radius:3px;font-family:var(--mono);font-size:.78rem;outline:none;cursor:pointer;appearance:none;-webkit-appearance:none;background-image:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='10' height='6'%3E%3Cpath d='M0 0l5 6 5-6z' fill='%2352526a'/%3E%3C/svg%3E");background-repeat:no-repeat;background-position:right 10px center;margin-top:4px} .subtitle-wrap{max-width:860px;margin:0 auto;padding:28px 16px;width:100%}
.lang-select:focus{border-color:var(--blue)} .step-indicator{display:flex;align-items:center;gap:0;margin-bottom:28px}
.fmt-btns{display:grid;grid-template-columns:1fr 1fr 1fr;gap:6px;margin-top:6px} .step-dot{width:32px;height:32px;border-radius:50%;border:2px solid var(--border2);background:var(--surf);display:flex;align-items:center;justify-content:center;font-family:var(--mono);font-size:.72rem;font-weight:600;color:var(--muted);transition:all .3s;flex-shrink:0}
.fmt-btn{padding:7px;background:var(--surf);border:1px solid var(--border2);color:var(--muted);border-radius:3px;font-family:var(--mono);font-size:.68rem;cursor:pointer;transition:all .15s;text-align:center;text-transform:uppercase} .step-dot.active{border-color:var(--blue);background:rgba(77,166,255,.1);color:var(--blue)}
.step-dot.done{border-color:var(--accent);background:rgba(0,229,160,.1);color:var(--accent)}
.step-line{flex:1;height:2px;background:var(--border);transition:background .3s}
.step-line.done{background:var(--accent)}
.step-labels{display:flex;justify-content:space-between;margin-top:6px;margin-bottom:20px}
.step-label{font-family:var(--mono);font-size:.6rem;color:var(--muted);text-align:center;flex:1;letter-spacing:.06em;text-transform:uppercase}
.step-label.active{color:var(--blue)}.step-label.done{color:var(--accent)}
.sub-card{background:var(--surf);border:1px solid var(--border2);border-radius:6px;padding:20px;margin-bottom:14px}
.sub-card h3{font-family:var(--mono);font-size:.68rem;letter-spacing:.1em;color:var(--muted);text-transform:uppercase;margin-bottom:14px;padding-bottom:10px;border-bottom:1px solid var(--border)}
.lang-grid{display:grid;grid-template-columns:1fr 1fr;gap:10px}
.sub-select{width:100%;background:var(--surf2);border:1px solid var(--border2);color:var(--text);padding:9px 10px;border-radius:3px;font-family:var(--mono);font-size:.78rem;outline:none;cursor:pointer;appearance:none;-webkit-appearance:none;background-image:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='10' height='6'%3E%3Cpath d='M0 0l5 6 5-6z' fill='%2352526a'/%3E%3C/svg%3E");background-repeat:no-repeat;background-position:right 10px center}
.sub-select:focus{border-color:var(--blue)}
.fmt-row{display:grid;grid-template-columns:1fr 1fr 1fr;gap:8px;margin-top:6px}
.fmt-btn{padding:9px;background:var(--surf);border:1px solid var(--border2);color:var(--muted);border-radius:3px;font-family:var(--mono);font-size:.7rem;cursor:pointer;transition:all .15s;text-align:center;text-transform:uppercase}
.fmt-btn.active{background:rgba(77,166,255,.08);border-color:#3a7cc4;color:var(--blue)} .fmt-btn.active{background:rgba(77,166,255,.08);border-color:#3a7cc4;color:var(--blue)}
.sub-dl-btn{flex:1;padding:8px;background:rgba(77,166,255,.07);border:1px solid #3a7cc4;color:var(--blue);border-radius:3px;font-family:var(--mono);font-size:.66rem;cursor:pointer;transition:all .15s;text-transform:uppercase} .engine-row{display:grid;grid-template-columns:1fr 1fr;gap:8px;margin-top:6px}
.sub-dl-btn:hover{background:rgba(77,166,255,.15)} .sub-prog-box{background:var(--surf2);border:1px solid var(--border2);border-radius:6px;padding:18px;margin-bottom:14px;display:none}
/* ── 배치 큐 ── */ .sub-prog-steps{display:flex;flex-direction:column;gap:10px}
.batch-queue{margin-top:14px;display:flex;flex-direction:column;gap:6px;max-height:280px;overflow-y:auto} .sub-step-row{display:flex;align-items:center;gap:12px}
.sub-step-icon{width:26px;height:26px;border-radius:50%;border:2px solid var(--border2);display:flex;align-items:center;justify-content:center;font-size:.75rem;flex-shrink:0;transition:all .3s}
.sub-step-icon.waiting{border-color:var(--border2);color:var(--muted)}
.sub-step-icon.running{border-color:var(--blue);background:rgba(77,166,255,.1);color:var(--blue)}
.sub-step-icon.done{border-color:var(--accent);background:rgba(0,229,160,.1);color:var(--accent)}
.sub-step-icon.failed{border-color:var(--warn);background:rgba(255,107,53,.1);color:var(--warn)}
.sub-step-text{flex:1}
.sub-step-name{font-family:var(--mono);font-size:.72rem;color:var(--text)}
.sub-step-msg{font-family:var(--mono);font-size:.62rem;color:var(--muted);margin-top:2px}
.sub-prog-bar-wrap{height:3px;background:var(--border);border-radius:2px;overflow:hidden;margin-top:14px}
.sub-prog-bar{height:100%;background:var(--blue);border-radius:2px;transition:width .5s ease;width:0%}
.sub-result-card{background:var(--surf);border:1px solid rgba(0,229,160,.2);border-radius:6px;padding:20px;display:none}
.sub-result-title{font-family:var(--mono);font-size:.72rem;letter-spacing:.1em;color:var(--accent);text-transform:uppercase;margin-bottom:14px}
.sub-info-grid{display:grid;grid-template-columns:1fr 1fr;gap:8px;margin-bottom:14px}
.sub-info-item{background:var(--surf2);border:1px solid var(--border);border-radius:3px;padding:10px 12px}
.sub-info-label{font-family:var(--mono);font-size:.58rem;color:var(--muted);letter-spacing:.08em;text-transform:uppercase;margin-bottom:3px}
.sub-info-val{font-family:var(--mono);font-size:.8rem;color:var(--text);font-weight:600}
.sub-dl-grid{display:grid;grid-template-columns:1fr 1fr;gap:8px}
.sub-dl-btn{padding:11px;background:none;border:1px solid var(--border2);color:var(--text);border-radius:4px;font-family:var(--mono);font-size:.7rem;cursor:pointer;transition:all .15s;text-align:center;display:flex;flex-direction:column;align-items:center;gap:4px}
.sub-dl-btn:hover{border-color:var(--accent);color:var(--accent);background:rgba(0,229,160,.05)}
.sub-dl-btn .dl-icon{font-size:1.2rem;opacity:.6}
.sub-dl-btn .dl-label{font-weight:600}.sub-dl-btn .dl-lang{font-size:.58rem;color:var(--muted)}
.sub-dl-btn.trans{border-color:#3a7cc4;color:var(--blue)}
.sub-dl-btn.trans:hover{background:rgba(77,166,255,.07)}
/* 배치 큐 공통 */
.batch-queue{margin-top:14px;display:flex;flex-direction:column;gap:6px;max-height:260px;overflow-y:auto}
.batch-item{display:grid;grid-template-columns:1fr auto auto;align-items:center;gap:8px;padding:9px 12px;background:var(--surf);border:1px solid var(--border2);border-radius:4px;transition:border-color .2s} .batch-item{display:grid;grid-template-columns:1fr auto auto;align-items:center;gap:8px;padding:9px 12px;background:var(--surf);border:1px solid var(--border2);border-radius:4px;transition:border-color .2s}
.batch-item.running{border-color:var(--accent2)}.batch-item.done{border-color:rgba(0,229,160,.3)}.batch-item.failed{border-color:rgba(255,107,53,.3)}.batch-item.waiting{opacity:.6} .batch-item.running{border-color:var(--accent2)}.batch-item.done{border-color:rgba(0,229,160,.3)}.batch-item.failed{border-color:rgba(255,107,53,.3)}.batch-item.waiting{opacity:.6}
.bi-name{font-family:var(--mono);font-size:.72rem;overflow:hidden;text-overflow:ellipsis;white-space:nowrap} .bi-name{font-family:var(--mono);font-size:.72rem;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}
@@ -274,6 +313,7 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
.batch-add-btn{margin-top:8px;padding:7px 14px;background:none;border:1px dashed var(--border2);color:var(--muted);border-radius:3px;font-family:var(--mono);font-size:.68rem;cursor:pointer;width:100%;transition:all .15s}.batch-add-btn:hover{border-color:var(--accent);color:var(--accent)} .batch-add-btn{margin-top:8px;padding:7px 14px;background:none;border:1px dashed var(--border2);color:var(--muted);border-radius:3px;font-family:var(--mono);font-size:.68rem;cursor:pointer;width:100%;transition:all .15s}.batch-add-btn:hover{border-color:var(--accent);color:var(--accent)}
.batch-clear-btn{padding:7px 14px;background:none;border:1px solid var(--border2);color:var(--muted);border-radius:3px;font-family:var(--mono);font-size:.68rem;cursor:pointer;transition:all .15s}.batch-clear-btn:hover{border-color:var(--warn);color:var(--warn)} .batch-clear-btn{padding:7px 14px;background:none;border:1px solid var(--border2);color:var(--muted);border-radius:3px;font-family:var(--mono);font-size:.68rem;cursor:pointer;transition:all .15s}.batch-clear-btn:hover{border-color:var(--warn);color:var(--warn)}
.batch-actions{display:flex;gap:8px;margin-top:10px} .batch-actions{display:flex;gap:8px;margin-top:10px}
@media(min-width:768px){.subtitle-wrap{padding:32px}.lang-grid{grid-template-columns:1fr 1fr}.sub-info-grid{grid-template-columns:1fr 1fr 1fr 1fr}.sub-dl-grid{grid-template-columns:repeat(4,1fr)}}
/* ── ADMIN ── */ /* ── ADMIN ── */
#page-admin{display:none;flex-direction:column} #page-admin{display:none;flex-direction:column}
@@ -375,6 +415,7 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
<button class="nav-tab active" data-page="stt">🎙 STT</button> <button class="nav-tab active" data-page="stt">🎙 STT</button>
<button class="nav-tab" data-page="ocr">🔍 OCR</button> <button class="nav-tab" data-page="ocr">🔍 OCR</button>
<button class="nav-tab history-tab" data-page="history">📋 이력</button> <button class="nav-tab history-tab" data-page="history">📋 이력</button>
<button class="nav-tab subtitle-tab" data-page="subtitle">🎬 자막</button>
<button class="nav-tab settings-tab" data-page="settings">⚙️ 설정</button> <button class="nav-tab settings-tab" data-page="settings">⚙️ 설정</button>
<button class="nav-tab admin-tab" data-page="admin" id="admin-tab" style="display:none">👤 관리자</button> <button class="nav-tab admin-tab" data-page="admin" id="admin-tab" style="display:none">👤 관리자</button>
</div> </div>
@@ -385,13 +426,12 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
<section class="panel"> <section class="panel">
<div class="panel-title">파일 업로드</div> <div class="panel-title">파일 업로드</div>
<div class="dropzone" id="stt-drop"> <div class="dropzone" id="stt-drop">
<input type="file" id="stt-input" accept=".mp3,.mp4,.wav,.m4a,.ogg,.flac,.aac,.wma,.webm,.mkv,.avi,.mov,.ts,.mts,.h264,.h265" multiple> <input type="file" id="stt-input" accept=".mp3,.mp4,.wav,.m4a,.ogg,.flac,.aac,.wma,.webm,.mkv,.avi,.mov" multiple>
<span class="drop-icon">🎵</span> <span class="drop-icon">🎵</span>
<div class="drop-label"><strong>탭하거나 드래그하여 선택</strong><br>영상(mp4/mkv/h265 등) · 음성 복수 선택 가능</div> <div class="drop-label"><strong>탭하거나 드래그하여 선택</strong><br>음성·영상 파일 복수 선택 가능</div>
<div class="drop-formats">mp3 · wav · m4a · ogg · flac · mp4 · webm</div> <div class="drop-formats">mp3 · wav · m4a · ogg · flac · mp4 · webm</div>
</div> </div>
<div class="file-info" id="stt-info"><div class="fname" id="stt-fname"></div><div class="fsize" id="stt-fsize"></div></div> <div class="file-info" id="stt-info"><div class="fname" id="stt-fname"></div><div class="fsize" id="stt-fsize"></div></div>
<!-- 배치 큐 -->
<div id="stt-queue" style="display:none"> <div id="stt-queue" style="display:none">
<div class="batch-queue" id="stt-queue-list"></div> <div class="batch-queue" id="stt-queue-list"></div>
<div class="batch-summary" id="stt-queue-summary"></div> <div class="batch-summary" id="stt-queue-summary"></div>
@@ -409,42 +449,6 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
<div class="sec-label" style="margin-top:0">후처리 모델</div> <div class="sec-label" style="margin-top:0">후처리 모델</div>
<select class="model-select" id="stt-ollama-model"><option value="">설정 기본 모델 사용</option></select> <select class="model-select" id="stt-ollama-model"><option value="">설정 기본 모델 사용</option></select>
</div> </div>
<!-- 자막 모드 옵션 -->
<div class="sub-section" id="sub-section">
<div class="sub-section-title">🎬 자막 모드 (영상/음성 → 자막 파일)</div>
<label style="display:flex;align-items:center;gap:8px;cursor:pointer;font-family:var(--mono);font-size:.75rem;color:var(--text)">
<input type="checkbox" id="subtitle-mode" style="accent-color:var(--blue);width:15px;height:15px">
자막 파일 생성 (SRT / VTT)
</label>
<div id="sub-opts" style="display:none;margin-top:10px">
<div class="sec-label">음성 언어 (원어)</div>
<select class="lang-select" id="force-language">
<option value="">자동 감지</option>
</select>
<div class="sec-label">자막 포맷</div>
<div class="fmt-btns">
<button class="fmt-btn active" data-fmt="srt">SRT</button>
<button class="fmt-btn" data-fmt="vtt">VTT</button>
<button class="fmt-btn" data-fmt="both">둘 다</button>
</div>
<div class="sec-label">번역 (선택 — 빈칸이면 원어 자막)</div>
<select class="lang-select" id="translate-to">
<option value="">번역 안 함 (원어 자막)</option>
</select>
<div id="trans-model-wrap" style="display:none;margin-top:8px">
<div class="sec-label">번역 엔진</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:6px;margin-top:6px">
<button class="fmt-btn active" data-via="ollama" id="trans-via-ollama">🦙 Ollama</button>
<button class="fmt-btn" data-via="openrouter" id="trans-via-or">🌐 OpenRouter</button>
</div>
<div class="sec-label">번역 모델</div>
<select class="lang-select" id="translate-model">
<option value="">STT 엔진과 같은 모델 사용</option>
</select>
</div>
</div>
</div>
<button class="btn-start green" id="stt-btn" disabled>변환 시작</button> <button class="btn-start green" id="stt-btn" disabled>변환 시작</button>
<div class="prog-box" id="stt-prog"> <div class="prog-box" id="stt-prog">
<div class="prog-header"><span class="prog-msg" id="stt-pmsg">처리 중...</span><span class="prog-pct" id="stt-ppct">0%</span></div> <div class="prog-header"><span class="prog-msg" id="stt-pmsg">처리 중...</span><span class="prog-pct" id="stt-ppct">0%</span></div>
@@ -472,10 +476,7 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
<div class="tab-content" id="stt-segs"><div class="segments-list" id="stt-seglist"></div></div> <div class="tab-content" id="stt-segs"><div class="segments-list" id="stt-seglist"></div></div>
<div class="result-actions" id="stt-actions"> <div class="result-actions" id="stt-actions">
<button class="btn-act" id="stt-copy">복사</button> <button class="btn-act" id="stt-copy">복사</button>
<button class="btn-act primary" id="stt-dl">TXT</button> <button class="btn-act primary" id="stt-dl">TXT 저장</button>
<button class="sub-dl-btn" id="stt-dl-srt" style="display:none">SRT 저장</button>
<button class="sub-dl-btn" id="stt-dl-vtt" style="display:none">VTT 저장</button>
<button class="sub-dl-btn" id="stt-dl-srt-orig" style="display:none">원어 SRT</button>
<button class="btn-act" id="stt-new">새 파일</button> <button class="btn-act" id="stt-new">새 파일</button>
</div> </div>
</section> </section>
@@ -495,7 +496,6 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
</div> </div>
<div class="file-info" id="ocr-info"><div class="fname" id="ocr-fname"></div><div class="fsize" id="ocr-fsize"></div></div> <div class="file-info" id="ocr-info"><div class="fname" id="ocr-fname"></div><div class="fsize" id="ocr-fsize"></div></div>
<div id="ocr-preview-wrap"><img id="ocr-preview"></div> <div id="ocr-preview-wrap"><img id="ocr-preview"></div>
<!-- 배치 큐 -->
<div id="ocr-queue" style="display:none"> <div id="ocr-queue" style="display:none">
<div class="batch-queue" id="ocr-queue-list"></div> <div class="batch-queue" id="ocr-queue-list"></div>
<div class="batch-summary" id="ocr-queue-summary"></div> <div class="batch-summary" id="ocr-queue-summary"></div>
@@ -647,6 +647,174 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
</div> </div>
</div> </div>
<!-- ══ 자막 PAGE ══ -->
<div class="page" id="page-subtitle">
<div class="subtitle-wrap">
<h2 style="font-family:var(--mono);font-size:.9rem;font-weight:600;letter-spacing:.06em;margin-bottom:20px">🎬 자막 생성</h2>
<!-- 스텝 인디케이터 -->
<div class="step-indicator">
<div class="step-dot" id="sdot-1">1</div>
<div class="step-line" id="sline-1"></div>
<div class="step-dot" id="sdot-2">2</div>
<div class="step-line" id="sline-2"></div>
<div class="step-dot" id="sdot-3">3</div>
</div>
<div class="step-labels">
<span class="step-label" id="slabel-1">오디오 추출</span>
<span class="step-label" id="slabel-2">음성 인식</span>
<span class="step-label" id="slabel-3">번역 (선택)</span>
</div>
<!-- 설정 카드 -->
<div class="sub-card" id="sub-config-card">
<h3>📁 영상 / 오디오 파일</h3>
<div class="dropzone" id="sub-drop" style="padding:24px 16px">
<input type="file" id="sub-input" accept=".mp4,.mkv,.avi,.mov,.webm,.ts,.mts,.m2ts,.wmv,.flv,.h264,.h265,.mp3,.wav,.m4a,.ogg,.flac">
<span class="drop-icon" style="font-size:1.6rem">🎬</span>
<div class="drop-label"><strong>탭하거나 드래그하여 선택</strong><br>mp4 · mkv · avi · mov · h.264/h.265 등</div>
</div>
<div class="file-info" id="sub-info" style="display:none;margin-top:10px">
<div class="fname" id="sub-fname"></div>
<div class="fsize" id="sub-fsize"></div>
</div>
</div>
<div class="sub-card">
<h3>⚙️ 자막 설정</h3>
<div class="lang-grid">
<div>
<div class="sec-label">음성 언어 (원어)</div>
<select class="sub-select" id="sub-src-lang">
<option value="">🔍 자동 감지 (권장)</option>
<option value="ko">🇰🇷 한국어</option>
<option value="en">🇺🇸 English</option>
<option value="ja">🇯🇵 日本語</option>
<option value="zh">🇨🇳 中文(简体)</option>
<option value="zh-tw">🇹🇼 中文(繁體)</option>
<option value="fr">🇫🇷 Français</option>
<option value="de">🇩🇪 Deutsch</option>
<option value="es">🇪🇸 Español</option>
<option value="it">🇮🇹 Italiano</option>
<option value="pt">🇵🇹 Português</option>
<option value="ru">🇷🇺 Русский</option>
<option value="ar">🇸🇦 العربية</option>
<option value="vi">🇻🇳 Tiếng Việt</option>
<option value="th">🇹🇭 ไทย</option>
<option value="id">🇮🇩 Bahasa Indonesia</option>
<option value="nl">🇳🇱 Nederlands</option>
<option value="pl">🇵🇱 Polski</option>
<option value="tr">🇹🇷 Türkçe</option>
<option value="sv">🇸🇪 Svenska</option>
<option value="uk">🇺🇦 Українська</option>
<option value="hi">🇮🇳 हिन्दी</option>
</select>
</div>
<div>
<div class="sec-label">자막 포맷</div>
<div class="fmt-row">
<button class="fmt-btn active" data-fmt="srt">SRT</button>
<button class="fmt-btn" data-fmt="vtt">VTT</button>
<button class="fmt-btn" data-fmt="both">둘 다</button>
</div>
</div>
</div>
</div>
<div class="sub-card">
<h3>🌐 번역 설정 (선택사항)</h3>
<div style="font-family:var(--mono);font-size:.65rem;color:var(--muted);margin-bottom:10px">
빈칸으로 두면 원어 자막만 생성합니다
</div>
<div class="lang-grid">
<div>
<div class="sec-label">번역 대상 언어</div>
<select class="sub-select" id="sub-trans-lang">
<option value="">번역 안 함</option>
<option value="ko">🇰🇷 한국어</option>
<option value="en">🇺🇸 English</option>
<option value="ja">🇯🇵 日本語</option>
<option value="zh">🇨🇳 中文(简体)</option>
<option value="zh-tw">🇹🇼 中文(繁體)</option>
<option value="fr">🇫🇷 Français</option>
<option value="de">🇩🇪 Deutsch</option>
<option value="es">🇪🇸 Español</option>
<option value="it">🇮🇹 Italiano</option>
<option value="pt">🇵🇹 Português</option>
<option value="ru">🇷🇺 Русский</option>
<option value="ar">🇸🇦 العربية</option>
<option value="vi">🇻🇳 Tiếng Việt</option>
<option value="th">🇹🇭 ไทย</option>
<option value="id">🇮🇩 Bahasa Indonesia</option>
<option value="nl">🇳🇱 Nederlands</option>
<option value="pl">🇵🇱 Polski</option>
<option value="tr">🇹🇷 Türkçe</option>
<option value="sv">🇸🇪 Svenska</option>
<option value="hi">🇮🇳 हिन्दी</option>
</select>
</div>
<div id="sub-trans-engine-wrap" style="display:none">
<div class="sec-label">번역 엔진</div>
<div class="engine-row">
<button class="fmt-btn active" data-via="ollama" id="sub-via-ollama">🦙 Ollama</button>
<button class="fmt-btn" data-via="openrouter" id="sub-via-or">🌐 OpenRouter</button>
</div>
<div class="sec-label">번역 모델</div>
<select class="sub-select" id="sub-trans-model" style="margin-top:4px">
<option value="">설정 기본 모델 사용</option>
</select>
</div>
</div>
</div>
<button class="btn-start blue" id="sub-btn" disabled style="background:var(--blue);color:#fff;margin-bottom:14px">자막 생성 시작</button>
<!-- 진행 상태 -->
<div class="sub-prog-box" id="sub-prog-box">
<div class="sub-prog-steps">
<div class="sub-step-row">
<div class="sub-step-icon waiting" id="sub-sicon-1"></div>
<div class="sub-step-text">
<div class="sub-step-name">Step 1 — 오디오 추출</div>
<div class="sub-step-msg" id="sub-smsg-1">ffmpeg으로 오디오 트랙 추출</div>
</div>
</div>
<div class="sub-step-row">
<div class="sub-step-icon waiting" id="sub-sicon-2"></div>
<div class="sub-step-text">
<div class="sub-step-name">Step 2 — 음성 인식</div>
<div class="sub-step-msg" id="sub-smsg-2">Whisper로 자막 생성</div>
</div>
</div>
<div class="sub-step-row">
<div class="sub-step-icon waiting" id="sub-sicon-3"></div>
<div class="sub-step-text">
<div class="sub-step-name">Step 3 — 번역 (선택)</div>
<div class="sub-step-msg" id="sub-smsg-3">LLM 번역 (미선택 시 건너뜀)</div>
</div>
</div>
</div>
<div class="sub-prog-bar-wrap"><div class="sub-prog-bar" id="sub-prog-bar"></div></div>
</div>
<div class="err-box" id="sub-err"></div>
<!-- 결과 카드 -->
<div class="sub-result-card" id="sub-result-card">
<div class="sub-result-title">✓ 자막 생성 완료</div>
<div class="sub-info-grid">
<div class="sub-info-item"><div class="sub-info-label">감지 언어</div><div class="sub-info-val" id="sub-res-lang"></div></div>
<div class="sub-info-item"><div class="sub-info-label">재생 시간</div><div class="sub-info-val" id="sub-res-dur"></div></div>
<div class="sub-info-item"><div class="sub-info-label">자막 수</div><div class="sub-info-val" id="sub-res-segs"></div></div>
<div class="sub-info-item"><div class="sub-info-label">번역</div><div class="sub-info-val" id="sub-res-trans"></div></div>
</div>
<div class="sub-dl-grid" id="sub-dl-grid"></div>
<button class="btn-act" id="sub-new" style="margin-top:12px;width:100%">새 파일</button>
</div>
</div>
</div>
<!-- ══ ADMIN ══ --> <!-- ══ ADMIN ══ -->
<div class="page" id="page-admin"> <div class="page" id="page-admin">
<div class="admin-wrap"> <div class="admin-wrap">
@@ -742,6 +910,7 @@ function applyUserUI(){
const b=document.getElementById('user-badge');b.textContent=currentUser.role==='admin'?'ADMIN':'USER';b.className='user-badge '+currentUser.role; const b=document.getElementById('user-badge');b.textContent=currentUser.role==='admin'?'ADMIN':'USER';b.className='user-badge '+currentUser.role;
document.getElementById('admin-tab').style.display=currentUser.role==='admin'?'flex':'none'; document.getElementById('admin-tab').style.display=currentUser.role==='admin'?'flex':'none';
document.getElementById('btn-hist-clear').style.display=currentUser.role==='admin'?'block':'none'; document.getElementById('btn-hist-clear').style.display=currentUser.role==='admin'?'block':'none';
if(appSettings.openrouter_api_key_masked)loadOrModels();
} }
const showLogin=()=>{document.getElementById('login-overlay').style.display='flex';stopSysMonitor()}; const showLogin=()=>{document.getElementById('login-overlay').style.display='flex';stopSysMonitor()};
const hideLogin=()=>document.getElementById('login-overlay').style.display='none'; const hideLogin=()=>document.getElementById('login-overlay').style.display='none';
@@ -801,7 +970,6 @@ function populateModelSelects(){
fill(document.getElementById('ocr-ollama-model'),appSettings.ocr_ollama_model,'설정 기본 모델 사용'); fill(document.getElementById('ocr-ollama-model'),appSettings.ocr_ollama_model,'설정 기본 모델 사용');
fill(document.getElementById('setting-stt-model'),appSettings.stt_ollama_model,'(없음)'); fill(document.getElementById('setting-stt-model'),appSettings.stt_ollama_model,'(없음)');
fill(document.getElementById('setting-ocr-model'),appSettings.ocr_ollama_model,'(없음)'); fill(document.getElementById('setting-ocr-model'),appSettings.ocr_ollama_model,'(없음)');
populateOrSelects();
} }
// ══ 설정 ══ // ══ 설정 ══
@@ -810,6 +978,9 @@ async function loadSettings(){
const th=appSettings.cpu_threads||0;cpuSlider.value=th;cpuDisplay.textContent=th===0?'0 (자동)':th+' 스레드'; const th=appSettings.cpu_threads||0;cpuSlider.value=th;cpuDisplay.textContent=th===0?'0 (자동)':th+' 스레드';
document.getElementById('stt-timeout').value=appSettings.stt_timeout||0; document.getElementById('stt-timeout').value=appSettings.stt_timeout||0;
document.getElementById('ollama-timeout').value=appSettings.ollama_timeout||600; document.getElementById('ollama-timeout').value=appSettings.ollama_timeout||600;
if(appSettings.openrouter_url)document.getElementById('or-url').value=appSettings.openrouter_url;
if(appSettings.openrouter_api_key_masked&&document.getElementById('or-api-key'))
document.getElementById('or-api-key').placeholder='저장된 키: '+appSettings.openrouter_api_key_masked;
populateModelSelects()}catch{} populateModelSelects()}catch{}
} }
document.getElementById('btn-save-settings').addEventListener('click',async()=>{ document.getElementById('btn-save-settings').addEventListener('click',async()=>{
@@ -837,64 +1008,19 @@ document.querySelectorAll('.nav-tab').forEach(btn=>{
if(btn.dataset.page==='admin')loadUsers(); if(btn.dataset.page==='admin')loadUsers();
if(btn.dataset.page==='settings'){loadSettings();fetchSysInfo()} if(btn.dataset.page==='settings'){loadSettings();fetchSysInfo()}
if(btn.dataset.page==='history'){histPage=1;loadHistory()} if(btn.dataset.page==='history'){histPage=1;loadHistory()}
if(btn.dataset.page==='subtitle')fillSubTransModels();
}); });
}); });
// ══ STT — 배치 + 자막 ══ // ══ STT — 배치 ══
const sttDrop=document.getElementById('stt-drop'),sttInput=document.getElementById('stt-input'); const sttDrop=document.getElementById('stt-drop'),sttInput=document.getElementById('stt-input');
let sttQueue=[],sttSubFmt='srt',sttTransVia='ollama'; let sttQueue=[];
let languages={}; const AUDIO_EXTS=['mp3','mp4','wav','m4a','ogg','flac','aac','wma','webm','mkv','avi','mov','ts','mts','h264','h265'];
// 언어 목록 로드
async function loadLanguages(){
try{const r=await api('GET','/api/languages');const d=await r.json();languages=d.languages||{};
const sel1=document.getElementById('force-language');
const sel2=document.getElementById('translate-to');
Object.entries(languages).forEach(([code,name])=>{
sel1.appendChild(Object.assign(document.createElement('option'),{value:code,textContent:`${name} (${code})`}));
sel2.appendChild(Object.assign(document.createElement('option'),{value:code,textContent:`${name} (${code})`}));
});
}catch{}
}
// 번역 모델 드롭다운 채우기
function fillTranslateModels(){
const sel=document.getElementById('translate-model');
const cur=sel.value;sel.innerHTML='<option value="">STT 엔진과 같은 모델 사용</option>';
const models=sttTransVia==='openrouter'?orModels:ollamaModels;
models.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;sel.appendChild(o)});
}
// 자막 모드 토글
document.getElementById('subtitle-mode').addEventListener('change',function(){
document.getElementById('sub-opts').style.display=this.checked?'block':'none';
});
// 포맷 버튼
document.querySelectorAll('.fmt-btn[data-fmt]').forEach(btn=>{
btn.addEventListener('click',()=>{document.querySelectorAll('.fmt-btn[data-fmt]').forEach(b=>b.classList.remove('active'));btn.classList.add('active');sttSubFmt=btn.dataset.fmt});
});
// 번역 언어 선택 → 모델 옵션 표시
document.getElementById('translate-to').addEventListener('change',function(){
document.getElementById('trans-model-wrap').style.display=this.value?'block':'none';
if(this.value)fillTranslateModels();
});
// 번역 엔진 선택
document.querySelectorAll('button[data-via]').forEach(btn=>{
btn.addEventListener('click',()=>{
document.querySelectorAll('button[data-via]').forEach(b=>b.classList.remove('active'));
btn.classList.add('active');sttTransVia=btn.dataset.via;fillTranslateModels();
});
});
// 파일 추가
function addSttFiles(fileList){ function addSttFiles(fileList){
const AUDIO=['mp3','mp4','wav','m4a','ogg','flac','aac','wma','webm','mkv','avi','mov','ts','mts','h264','h265']; const files=Array.from(fileList).filter(f=>AUDIO_EXTS.includes(f.name.split('.').pop().toLowerCase()));
const files=Array.from(fileList).filter(f=>AUDIO.includes(f.name.split('.').pop().toLowerCase()));
if(!files.length)return; if(!files.length)return;
files.forEach(f=>sttQueue.push({file:f,taskId:null,outputFile:null,srtFile:null,vttFile:null,srtOrigFile:null,status:'waiting',el:null})); files.forEach(f=>sttQueue.push({file:f,taskId:null,outputFile:null,status:'waiting'}));
renderSttQueue();document.getElementById('stt-btn').disabled=false; renderSttQueue();document.getElementById('stt-btn').disabled=false;
} }
sttInput.addEventListener('change',()=>addSttFiles(sttInput.files)); sttInput.addEventListener('change',()=>addSttFiles(sttInput.files));
@@ -909,26 +1035,19 @@ function renderSttQueue(){
qEl.style.display='block';list.innerHTML=''; qEl.style.display='block';list.innerHTML='';
sttQueue.forEach((item,i)=>{ sttQueue.forEach((item,i)=>{
const div=document.createElement('div');div.className='batch-item '+item.status; const div=document.createElement('div');div.className='batch-item '+item.status;
const dlBtns=item.status==='done'?[ div.innerHTML=`<div><div class="bi-name">${esc(item.file.name)}</div><div class="bi-prog" id="sbi-${i}" style="width:0%;display:${item.status==='running'?'block':'none'}"></div></div><span class="bi-status ${item.status}">${{waiting:'대기',running:'변환중',done:'완료',failed:'실패'}[item.status]}</span><span>${item.status==='done'&&item.outputFile?`<button class="bi-dl" onclick="dlFile('${esc(item.outputFile)}')">📥 TXT</button>`:''}</span>`;
item.outputFile?`<button class="bi-dl" onclick="dlFile('${esc(item.outputFile)}')">TXT</button>`:'', list.appendChild(div);
item.srtFile?`<button class="bi-dl" onclick="dlFile('${esc(item.srtFile)}')">SRT</button>`:'',
item.vttFile?`<button class="bi-dl" onclick="dlFile('${esc(item.vttFile)}')">VTT</button>`:'',
item.srtOrigFile?`<button class="bi-dl" onclick="dlFile('${esc(item.srtOrigFile)}')">원어SRT</button>`:'',
].filter(Boolean).join(''):''
div.innerHTML=`<div><div class="bi-name">${esc(item.file.name)}</div><div class="bi-prog" id="stt-bp-${i}" style="width:0%;display:${item.status==='running'?'block':'none'}"></div></div><span class="bi-status ${item.status}">${{waiting:'대기',running:'변환중',done:'완료',failed:'실패'}[item.status]}</span><span style="display:flex;gap:3px">${dlBtns}</span>`;
item.el=div;list.appendChild(div);
}); });
const done=sttQueue.filter(i=>i.status==='done').length,failed=sttQueue.filter(i=>i.status==='failed').length,running=sttQueue.filter(i=>i.status==='running').length; const done=sttQueue.filter(i=>i.status==='done').length,failed=sttQueue.filter(i=>i.status==='failed').length,running=sttQueue.filter(i=>i.status==='running').length;
sum.innerHTML=`전체 <span>${sttQueue.length}</span>개 · 완료 <span>${done}</span> · 실패 <span>${failed}</span>${running?` · 진행중 <span>${running}</span>`:''}`; sum.innerHTML=`전체 <span>${sttQueue.length}</span>개 · 완료 <span>${done}</span> · 실패 <span>${failed}</span>${running?` · 진행중 <span>${running}</span>`:''}`;
} }
// 엔진 버튼
document.querySelectorAll('#page-stt .engine-btn').forEach(btn=>{ document.querySelectorAll('#page-stt .engine-btn').forEach(btn=>{
btn.addEventListener('click',()=>{ btn.addEventListener('click',()=>{
document.querySelectorAll('#page-stt .engine-btn').forEach(b=>b.classList.remove('active')); document.querySelectorAll('#page-stt .engine-btn').forEach(b=>b.classList.remove('active'));
btn.classList.add('active');sttEngine=btn.dataset.engine; btn.classList.add('active');sttEngine=btn.dataset.engine;
document.getElementById('stt-ollama-opts').classList.toggle('visible',sttEngine==='whisper+ollama'); document.getElementById('stt-ollama-opts').classList.toggle('visible',sttEngine==='whisper+ollama');
document.getElementById('stt-or-opts').classList.toggle('visible',sttEngine==='whisper+openrouter'); document.getElementById('stt-or-opts')?.classList.toggle('visible',sttEngine==='whisper+openrouter');
document.getElementById('stt-btn').className='btn-start '+(sttEngine!=='whisper'?'purple':'green'); document.getElementById('stt-btn').className='btn-start '+(sttEngine!=='whisper'?'purple':'green');
}); });
}); });
@@ -939,22 +1058,13 @@ document.getElementById('stt-btn').addEventListener('click',async()=>{
document.getElementById('stt-err').style.display='none'; document.getElementById('stt-err').style.display='none';
document.getElementById('stt-btn').disabled=true; document.getElementById('stt-btn').disabled=true;
document.getElementById('stt-prog').style.display='block'; document.getElementById('stt-prog').style.display='block';
setProg('stt',0,`${pending.length} 파일 업로드 중...`); setProg('stt',0,`${pending.length}개 업로드 중...`);
const subMode=document.getElementById('subtitle-mode').checked;
const fd=new FormData(); const fd=new FormData();
pending.forEach(item=>fd.append('files',item.file)); pending.forEach(item=>fd.append('files',item.file));
fd.append('use_ollama',sttEngine==='whisper+ollama'?'true':'false'); fd.append('use_ollama',sttEngine==='whisper+ollama'?'true':'false');
fd.append('ollama_model',document.getElementById('stt-ollama-model')?.value||''); fd.append('ollama_model',document.getElementById('stt-ollama-model')?.value||'');
fd.append('use_openrouter',sttEngine==='whisper+openrouter'?'true':'false'); fd.append('use_openrouter',sttEngine==='whisper+openrouter'?'true':'false');
fd.append('openrouter_model',document.getElementById('stt-or-model')?.value||''); fd.append('openrouter_model',document.getElementById('stt-or-model')?.value||'');
fd.append('subtitle_mode',subMode?'true':'false');
fd.append('subtitle_format',sttSubFmt);
fd.append('force_language',document.getElementById('force-language').value||'');
fd.append('translate_to',document.getElementById('translate-to').value||'');
fd.append('translate_model',document.getElementById('translate-model').value||'');
fd.append('translate_via',sttTransVia);
try{ try{
const url=pending.length===1?'/api/transcribe':'/api/transcribe/batch'; const url=pending.length===1?'/api/transcribe':'/api/transcribe/batch';
const r=await api('POST',url,fd);const d=await r.json(); const r=await api('POST',url,fd);const d=await r.json();
@@ -963,37 +1073,23 @@ document.getElementById('stt-btn').addEventListener('click',async()=>{
let pi=0; let pi=0;
sttQueue.forEach((qItem,qi)=>{ sttQueue.forEach((qItem,qi)=>{
if(qItem.status!=='waiting')return; if(qItem.status!=='waiting')return;
const taskItem=items[pi++];if(!taskItem)return; const ti=items[pi++];if(!ti)return;
if(taskItem.error){qItem.status='failed';return} if(ti.error){qItem.status='failed';return}
qItem.status='running';qItem.taskId=taskItem.task_id;renderSttQueue(); qItem.status='running';qItem.taskId=ti.task_id;renderSttQueue();
pollSttItem(qi,taskItem.task_id);
});
setProg('stt',20,`${items.length}개 변환 중...`);
}catch(e){showErr('stt-err',e.message);document.getElementById('stt-btn').disabled=false;document.getElementById('stt-prog').style.display='none'}
});
function pollSttItem(qi,taskId){
const t=setInterval(async()=>{ const t=setInterval(async()=>{
try{ try{
const r=await api('GET','/api/status/'+taskId);if(r.status===401){clearInterval(t);showLogin();return} const r2=await api('GET','/api/status/'+ti.task_id);if(r2.status===401){clearInterval(t);showLogin();return}
const d=await r.json(); const d2=await r2.json();
if(d.state==='success'){ if(d2.state==='success'){clearInterval(t);qItem.outputFile=d2.output_file||null;qItem.status='done';renderSttQueue();
clearInterval(t); if(sttQueue.filter(i=>i.status==='done').length===1&&!sttQueue.some(i=>i.status==='running'))showSttResult(d2);
const item=sttQueue[qi]; checkSttBatchDone();}
item.outputFile=d.output_file||null;item.srtFile=d.srt_file||null; else if(d2.state==='failure'){clearInterval(t);qItem.status='failed';renderSttQueue();checkSttBatchDone();}
item.vttFile=d.vtt_file||null;item.srtOrigFile=d.srt_original_file||null; else{const done=sttQueue.filter(i=>i.status==='done').length;setProg('stt',20+Math.round((done/sttQueue.length)*75),d2.message||'처리 중...')}
item.status='done';renderSttQueue();checkSttBatchDone();
if(sttQueue.filter(i=>i.status!=='waiting'&&i.status!=='done'&&i.status!=='failed').length===0&&
sttQueue.filter(i=>i.status==='done').length===1) showSttResult(d);
} else if(d.state==='failure'){
clearInterval(t);sttQueue[qi].status='failed';renderSttQueue();checkSttBatchDone();
} else {
const done=sttQueue.filter(i=>i.status==='done').length;
setProg('stt',20+Math.round((done/sttQueue.length)*75),d.message||'처리 중...');
}
}catch{} }catch{}
},1800); },1800);
} });
}catch(e){showErr('stt-err',e.message);document.getElementById('stt-btn').disabled=false;document.getElementById('stt-prog').style.display='none'}
});
function checkSttBatchDone(){ function checkSttBatchDone(){
if(sttQueue.every(i=>['done','failed','waiting'].includes(i.status))){ if(sttQueue.every(i=>['done','failed','waiting'].includes(i.status))){
@@ -1003,48 +1099,32 @@ function checkSttBatchDone(){
document.getElementById('stt-btn').disabled=false; document.getElementById('stt-btn').disabled=false;
} }
} }
function setSttLoading(on){document.getElementById('stt-btn').disabled=on;document.getElementById('stt-prog').style.display=on?'block':'none';if(on)setProg('stt',0,'준비 중...')} function setSttLoading(on){document.getElementById('stt-btn').disabled=on;document.getElementById('stt-prog').style.display=on?'block':'none';if(on)setProg('stt',0,'준비 중...')}
function showSttResult(d){ function showSttResult(d){
sttOutputFile=d.output_file; sttOutputFile=d.output_file;
document.getElementById('stt-mlang').textContent=((d.language||'')+( d.translated?' → '+d.translate_to:'')).toUpperCase(); document.getElementById('stt-mlang').textContent=(d.language||'').toUpperCase();
document.getElementById('stt-mdur').textContent=fmtDur(d.duration); document.getElementById('stt-mdur').textContent=fmtDur(d.duration);
document.getElementById('stt-msegs').textContent=(d.segments||[]).length+'개'; document.getElementById('stt-msegs').textContent=(d.segments||[]).length+'개';
const chip=document.getElementById('stt-mollama-chip'); const chip=document.getElementById('stt-mollama-chip');if(d.ollama_used){chip.style.display='block';document.getElementById('stt-mollama').textContent=d.ollama_model}else chip.style.display='none';
if(d.ollama_used){chip.style.display='block';document.getElementById('stt-mollama').textContent=d.ollama_model}else chip.style.display='none';
document.getElementById('stt-meta').style.display='flex';document.getElementById('stt-tabs').style.display='flex'; document.getElementById('stt-meta').style.display='flex';document.getElementById('stt-tabs').style.display='flex';
document.getElementById('stt-empty').style.display='none';document.getElementById('stt-result').style.display='block';document.getElementById('stt-result').value=d.text||''; document.getElementById('stt-empty').style.display='none';document.getElementById('stt-result').style.display='block';document.getElementById('stt-result').value=d.text||'';
const sl=document.getElementById('stt-seglist');sl.innerHTML=''; const sl=document.getElementById('stt-seglist');sl.innerHTML='';
(d.segments||[]).forEach(s=>{const row=document.createElement('div');row.className='seg-item';row.innerHTML=`<div class="seg-time">${fmtTime(s.start)}<br>→${fmtTime(s.end)}</div><div class="seg-text">${esc(s.text)}</div>`;sl.appendChild(row)}); (d.segments||[]).forEach(s=>{const row=document.createElement('div');row.className='seg-item';row.innerHTML=`<div class="seg-time">${fmtTime(s.start)}<br>→${fmtTime(s.end)}</div><div class="seg-text">${esc(s.text)}</div>`;sl.appendChild(row)});
document.getElementById('stt-actions').style.display='flex'; document.getElementById('stt-actions').style.display='flex';setSttLoading(false);
// 자막 다운로드 버튼
const srtBtn=document.getElementById('stt-dl-srt'),vttBtn=document.getElementById('stt-dl-vtt'),origBtn=document.getElementById('stt-dl-srt-orig');
srtBtn.style.display=d.srt_file?'flex':'none';if(d.srt_file)srtBtn.onclick=()=>dlFile(d.srt_file);
vttBtn.style.display=d.vtt_file?'flex':'none';if(d.vtt_file)vttBtn.onclick=()=>dlFile(d.vtt_file);
origBtn.style.display=d.srt_original_file?'flex':'none';if(d.srt_original_file)origBtn.onclick=()=>dlFile(d.srt_original_file);
setSttLoading(false);
} }
document.getElementById('stt-copy').addEventListener('click',()=>copyText(document.getElementById('stt-result').value,document.getElementById('stt-copy'))); document.getElementById('stt-copy').addEventListener('click',()=>copyText(document.getElementById('stt-result').value,document.getElementById('stt-copy')));
document.getElementById('stt-dl').addEventListener('click',()=>dlFile(sttOutputFile)); document.getElementById('stt-dl').addEventListener('click',()=>dlFile(sttOutputFile));
document.getElementById('stt-new').addEventListener('click',()=>{ document.getElementById('stt-new').addEventListener('click',()=>{sttQueue=[];sttInput.value='';sttOutputFile=null;renderSttQueue();['stt-prog','stt-err','stt-meta','stt-tabs','stt-actions'].forEach(id=>document.getElementById(id).style.display='none');document.getElementById('stt-empty').style.display='flex';document.getElementById('stt-result').style.display='none';document.getElementById('stt-result').value='';document.getElementById('stt-seglist').innerHTML='';document.getElementById('stt-btn').disabled=true;resetTabs('stt-tabs')});
sttQueue=[];sttInput.value='';sttOutputFile=null;renderSttQueue();
['stt-prog','stt-err','stt-meta','stt-tabs','stt-actions'].forEach(id=>document.getElementById(id).style.display='none');
document.getElementById('stt-empty').style.display='flex';
document.getElementById('stt-result').style.display='none';document.getElementById('stt-result').value='';
document.getElementById('stt-seglist').innerHTML='';document.getElementById('stt-btn').disabled=true;resetTabs('stt-tabs');
['stt-dl-srt','stt-dl-vtt','stt-dl-srt-orig'].forEach(id=>document.getElementById(id).style.display='none');
});
// ══ OCR — 배치 ══ // ══ OCR — 배치 ══
const ocrDrop=document.getElementById('ocr-drop'),ocrInput=document.getElementById('ocr-input'); const ocrDrop=document.getElementById('ocr-drop'),ocrInput=document.getElementById('ocr-input');
let ocrQueue=[]; let ocrQueue=[];
const IMG_EXTS=['jpg','jpeg','png','bmp','tiff','tif','webp','gif'];
function addOcrFiles(fileList){ function addOcrFiles(fileList){
const IMG=['jpg','jpeg','png','bmp','tiff','tif','webp','gif']; const files=Array.from(fileList).filter(f=>IMG_EXTS.includes(f.name.split('.').pop().toLowerCase()));
const files=Array.from(fileList).filter(f=>IMG.includes(f.name.split('.').pop().toLowerCase()));
if(!files.length)return; if(!files.length)return;
files.forEach(f=>ocrQueue.push({file:f,taskId:null,txtFile:null,xlsxFile:null,status:'waiting',el:null})); files.forEach(f=>ocrQueue.push({file:f,taskId:null,txtFile:null,xlsxFile:null,status:'waiting'}));
renderOcrQueue();document.getElementById('ocr-btn').disabled=false; renderOcrQueue();document.getElementById('ocr-btn').disabled=false;
} }
ocrInput.addEventListener('change',()=>addOcrFiles(ocrInput.files)); ocrInput.addEventListener('change',()=>addOcrFiles(ocrInput.files));
@@ -1059,8 +1139,8 @@ function renderOcrQueue(){
qEl.style.display='block';list.innerHTML=''; qEl.style.display='block';list.innerHTML='';
ocrQueue.forEach((item,i)=>{ ocrQueue.forEach((item,i)=>{
const div=document.createElement('div');div.className='batch-item '+item.status; const div=document.createElement('div');div.className='batch-item '+item.status;
div.innerHTML=`<div><div class="bi-name">${esc(item.file.name)}</div><div class="bi-prog" id="ocr-bp-${i}" style="width:0%;display:${item.status==='running'?'block':'none'}"></div></div><span class="bi-status ${item.status}">${{waiting:'대기',running:'인식중',done:'완료',failed:'실패'}[item.status]}</span><span style="display:flex;gap:3px">${item.status==='done'?[item.txtFile?`<button class="bi-dl" onclick="dlFile('${esc(item.txtFile)}')">TXT</button>`:'',item.xlsxFile?`<button class="bi-dl" onclick="dlFile('${esc(item.xlsxFile)}')">XLS</button>`:''].filter(Boolean).join(''):''}</span>`; div.innerHTML=`<div><div class="bi-name">${esc(item.file.name)}</div><div class="bi-prog" id="obi-${i}" style="width:0%;display:${item.status==='running'?'block':'none'}"></div></div><span class="bi-status ${item.status}">${{waiting:'대기',running:'인식중',done:'완료',failed:'실패'}[item.status]}</span><span style="display:flex;gap:3px">${item.status==='done'?[item.txtFile?`<button class="bi-dl" onclick="dlFile('${esc(item.txtFile)}')">TXT</button>`:'',item.xlsxFile?`<button class="bi-dl" onclick="dlFile('${esc(item.xlsxFile)}')">XLS</button>`:''].filter(Boolean).join(''):''}</span>`;
item.el=div;list.appendChild(div); list.appendChild(div);
}); });
const done=ocrQueue.filter(i=>i.status==='done').length,failed=ocrQueue.filter(i=>i.status==='failed').length,running=ocrQueue.filter(i=>i.status==='running').length; const done=ocrQueue.filter(i=>i.status==='done').length,failed=ocrQueue.filter(i=>i.status==='failed').length,running=ocrQueue.filter(i=>i.status==='running').length;
sum.innerHTML=`전체 <span>${ocrQueue.length}</span>개 · 완료 <span>${done}</span> · 실패 <span>${failed}</span>${running?` · 진행중 <span>${running}</span>`:''}`; sum.innerHTML=`전체 <span>${ocrQueue.length}</span>개 · 완료 <span>${done}</span> · 실패 <span>${failed}</span>${running?` · 진행중 <span>${running}</span>`:''}`;
@@ -1071,11 +1151,11 @@ document.querySelectorAll('#page-ocr .engine-btn').forEach(btn=>{
document.querySelectorAll('#page-ocr .engine-btn').forEach(b=>b.classList.remove('active')); document.querySelectorAll('#page-ocr .engine-btn').forEach(b=>b.classList.remove('active'));
btn.classList.add('active');ocrEngine=btn.dataset.engine; btn.classList.add('active');ocrEngine=btn.dataset.engine;
document.getElementById('ocr-ollama-opts').classList.toggle('visible',ocrEngine==='ollama'); document.getElementById('ocr-ollama-opts').classList.toggle('visible',ocrEngine==='ollama');
document.getElementById('ocr-or-opts').classList.toggle('visible',ocrEngine==='openrouter'); document.getElementById('ocr-or-opts')?.classList.toggle('visible',ocrEngine==='openrouter');
document.getElementById('ocr-btn').className='btn-start '+(ocrEngine!=='paddle'?'purple':'green'); document.getElementById('ocr-btn').className='btn-start '+(ocrEngine!=='paddle'?'purple':'green');
}); });
}); });
document.getElementById('cprompt-toggle').addEventListener('click',()=>{const ta=document.getElementById('custom-prompt');const open=ta.style.display!=='block';ta.style.display=open?'block':'none';document.getElementById('cprompt-toggle').textContent=(open?'▼':'▶')+' 커스텀 프롬프트'}); document.getElementById('cprompt-toggle')?.addEventListener('click',()=>{const ta=document.getElementById('custom-prompt');const open=ta.style.display!=='block';ta.style.display=open?'block':'none';document.getElementById('cprompt-toggle').textContent=(open?'▼':'▶')+' 커스텀 프롬프트'});
document.querySelectorAll('.mode-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('.mode-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');ocrMode=btn.dataset.mode;document.getElementById('mode-desc').textContent=ocrMode==='structure'?'표 구조를 감지하고 Excel로 저장합니다':'일반 텍스트와 글자를 인식합니다'})}); document.querySelectorAll('.mode-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('.mode-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');ocrMode=btn.dataset.mode;document.getElementById('mode-desc').textContent=ocrMode==='structure'?'표 구조를 감지하고 Excel로 저장합니다':'일반 텍스트와 글자를 인식합니다'})});
document.getElementById('ocr-btn').addEventListener('click',async()=>{ document.getElementById('ocr-btn').addEventListener('click',async()=>{
@@ -1088,9 +1168,9 @@ document.getElementById('ocr-btn').addEventListener('click',async()=>{
const fd=new FormData(); const fd=new FormData();
pending.forEach(item=>fd.append('files',item.file)); pending.forEach(item=>fd.append('files',item.file));
fd.append('mode',ocrMode);fd.append('backend',ocrEngine); fd.append('mode',ocrMode);fd.append('backend',ocrEngine);
fd.append('ollama_model',ocrEngine==='ollama'?(document.getElementById('ocr-ollama-model').value||''):''); fd.append('ollama_model',ocrEngine==='ollama'?(document.getElementById('ocr-ollama-model')?.value||''):'');
fd.append('openrouter_model',ocrEngine==='openrouter'?(document.getElementById('ocr-or-model')?.value||''):''); fd.append('openrouter_model',ocrEngine==='openrouter'?(document.getElementById('ocr-or-model')?.value||''):'');
const cp=ocrEngine==='openrouter'?(document.getElementById('custom-prompt-or')?.value||''):(document.getElementById('custom-prompt').value||''); const cp=ocrEngine==='openrouter'?(document.getElementById('custom-prompt-or')?.value||''):(document.getElementById('custom-prompt')?.value||'');
fd.append('custom_prompt',cp); fd.append('custom_prompt',cp);
try{ try{
const url=pending.length===1?'/api/ocr':'/api/ocr/batch'; const url=pending.length===1?'/api/ocr':'/api/ocr/batch';
@@ -1099,23 +1179,24 @@ document.getElementById('ocr-btn').addEventListener('click',async()=>{
let pi=0; let pi=0;
ocrQueue.forEach((qItem,qi)=>{ ocrQueue.forEach((qItem,qi)=>{
if(qItem.status!=='waiting')return; if(qItem.status!=='waiting')return;
const taskItem=items[pi++];if(!taskItem)return; const ti=items[pi++];if(!ti)return;
if(taskItem.error){qItem.status='failed';return} if(ti.error){qItem.status='failed';return}
qItem.status='running';qItem.taskId=taskItem.task_id;renderOcrQueue(); qItem.status='running';qItem.taskId=ti.task_id;renderOcrQueue();
const t=setInterval(async()=>{ const t=setInterval(async()=>{
try{ try{
const r2=await api('GET','/api/status/'+taskItem.task_id);if(r2.status===401){clearInterval(t);showLogin();return} const r2=await api('GET','/api/status/'+ti.task_id);if(r2.status===401){clearInterval(t);showLogin();return}
const d2=await r2.json(); const d2=await r2.json();
if(d2.state==='success'){clearInterval(t);qItem.txtFile=d2.txt_file||null;qItem.xlsxFile=d2.xlsx_file||null;qItem.status='done';renderOcrQueue(); if(d2.state==='success'){clearInterval(t);qItem.txtFile=d2.txt_file||null;qItem.xlsxFile=d2.xlsx_file||null;qItem.status='done';renderOcrQueue();
if(ocrQueue.filter(i=>i.status==='done').length===1&&ocrQueue.filter(i=>i.status==='running').length===0)showOcrResult(d2); if(ocrQueue.filter(i=>i.status==='done').length===1&&!ocrQueue.some(i=>i.status==='running'))showOcrResult(d2);
if(ocrQueue.every(i=>['done','failed','waiting'].includes(i.status))){const done=ocrQueue.filter(i=>i.status==='done').length;setProg('ocr',100,`완료 ${done}/${ocrQueue.length}`);setTimeout(()=>document.getElementById('ocr-prog').style.display='none',2000);document.getElementById('ocr-btn').disabled=false;} if(ocrQueue.every(i=>['done','failed','waiting'].includes(i.status))){const done=ocrQueue.filter(i=>i.status==='done').length;setProg('ocr',100,`완료 ${done}/${ocrQueue.length}`);setTimeout(()=>document.getElementById('ocr-prog').style.display='none',2000);document.getElementById('ocr-btn').disabled=false;}}
} else if(d2.state==='failure'){clearInterval(t);qItem.status='failed';renderOcrQueue();} else if(d2.state==='failure'){clearInterval(t);qItem.status='failed';renderOcrQueue();}
else{const done=ocrQueue.filter(i=>i.status==='done').length;setProg('ocr',20+Math.round((done/ocrQueue.length)*75),d2.message||'처리중...')} else{const done=ocrQueue.filter(i=>i.status==='done').length;setProg('ocr',20+Math.round((done/ocrQueue.length)*75),d2.message||'처리 중...')}
}catch{} }catch{}
},1800); },1800);
}); });
}catch(e){showErr('ocr-err',e.message);document.getElementById('ocr-btn').disabled=false;document.getElementById('ocr-prog').style.display='none'} }catch(e){showErr('ocr-err',e.message);document.getElementById('ocr-btn').disabled=false;document.getElementById('ocr-prog').style.display='none'}
}); });
function setOcrLoading(on){const isAI=ocrEngine!=='paddle',c=isAI?'var(--purple)':'var(--accent)';document.getElementById('ocr-btn').disabled=on;document.getElementById('ocr-prog').style.display=on?'block':'none';document.getElementById('ocr-wave').style.display=on?'flex':'none';document.getElementById('ocr-pfill').style.background=c;document.getElementById('ocr-ppct').style.color=c;document.querySelectorAll('#ocr-wave .wave-bar').forEach(b=>b.style.background=c);if(on)setProg('ocr',0,'준비 중...')} function setOcrLoading(on){const isAI=ocrEngine!=='paddle',c=isAI?'var(--purple)':'var(--accent)';document.getElementById('ocr-btn').disabled=on;document.getElementById('ocr-prog').style.display=on?'block':'none';document.getElementById('ocr-wave').style.display=on?'flex':'none';document.getElementById('ocr-pfill').style.background=c;document.getElementById('ocr-ppct').style.color=c;document.querySelectorAll('#ocr-wave .wave-bar').forEach(b=>b.style.background=c);if(on)setProg('ocr',0,'준비 중...')}
function showOcrResult(d){ function showOcrResult(d){
ocrOutputTxt=d.txt_file||null;ocrOutputXlsx=d.xlsx_file||null;const io=d.backend!=='paddle'; ocrOutputTxt=d.txt_file||null;ocrOutputXlsx=d.xlsx_file||null;const io=d.backend!=='paddle';
@@ -1128,12 +1209,184 @@ function showOcrResult(d){
document.getElementById('ocr-copy').addEventListener('click',()=>copyText(document.getElementById('ocr-result').value,document.getElementById('ocr-copy'))); document.getElementById('ocr-copy').addEventListener('click',()=>copyText(document.getElementById('ocr-result').value,document.getElementById('ocr-copy')));
document.getElementById('ocr-dl-txt').addEventListener('click',()=>dlFile(ocrOutputTxt)); document.getElementById('ocr-dl-txt').addEventListener('click',()=>dlFile(ocrOutputTxt));
document.getElementById('ocr-dl-xlsx').addEventListener('click',()=>dlFile(ocrOutputXlsx)); document.getElementById('ocr-dl-xlsx').addEventListener('click',()=>dlFile(ocrOutputXlsx));
document.getElementById('ocr-new').addEventListener('click',()=>{ document.getElementById('ocr-new').addEventListener('click',()=>{ocrQueue=[];ocrInput.value='';ocrOutputTxt=null;ocrOutputXlsx=null;renderOcrQueue();['ocr-prog','ocr-err','ocr-meta','ocr-tabs','ocr-actions'].forEach(id=>document.getElementById(id).style.display='none');document.getElementById('ocr-empty').style.display='flex';document.getElementById('ocr-result').style.display='none';document.getElementById('ocr-result').value='';document.getElementById('ocr-linelist').innerHTML='';document.getElementById('ocr-tablelist').innerHTML='';document.getElementById('ocr-btn').disabled=true;resetTabs('ocr-tabs')});
ocrQueue=[];ocrInput.value='';ocrOutputTxt=null;ocrOutputXlsx=null;renderOcrQueue();
['ocr-prog','ocr-err','ocr-meta','ocr-tabs','ocr-actions'].forEach(id=>document.getElementById(id).style.display='none'); // ══ 자막 ══
document.getElementById('ocr-empty').style.display='flex';document.getElementById('ocr-result').style.display='none';document.getElementById('ocr-result').value='';document.getElementById('ocr-linelist').innerHTML='';document.getElementById('ocr-tablelist').innerHTML='';document.getElementById('ocr-btn').disabled=true;resetTabs('ocr-tabs'); const subDrop=document.getElementById('sub-drop'),subInput=document.getElementById('sub-input');
let subFile=null, subTransVia='ollama', subFmt='srt';
subInput.addEventListener('change',()=>setSubFile(subInput.files[0]));
subDrop.addEventListener('dragover',e=>{e.preventDefault();subDrop.classList.add('dragover')});
subDrop.addEventListener('dragleave',()=>subDrop.classList.remove('dragover'));
subDrop.addEventListener('drop',e=>{e.preventDefault();subDrop.classList.remove('dragover');setSubFile(e.dataTransfer.files[0])});
function setSubFile(f){
if(!f)return;subFile=f;
document.getElementById('sub-info').style.display='block';
document.getElementById('sub-fname').textContent=f.name;
document.getElementById('sub-fsize').textContent=fmtBytes(f.size);
document.getElementById('sub-btn').disabled=false;
document.getElementById('sub-err').style.display='none';
}
// 자막 포맷 버튼
document.querySelectorAll('#page-subtitle .fmt-btn[data-fmt]').forEach(btn=>{
btn.addEventListener('click',()=>{
document.querySelectorAll('#page-subtitle .fmt-btn[data-fmt]').forEach(b=>b.classList.remove('active'));
btn.classList.add('active');subFmt=btn.dataset.fmt;
});
}); });
// 번역 언어 선택 → 번역 엔진/모델 표시
document.getElementById('sub-trans-lang').addEventListener('change',function(){
document.getElementById('sub-trans-engine-wrap').style.display=this.value?'flex':'none';
if(this.value)fillSubTransModels();
});
// 번역 엔진 버튼
document.querySelectorAll('button[data-via]').forEach(btn=>{
btn.addEventListener('click',()=>{
document.querySelectorAll('button[data-via]').forEach(b=>b.classList.remove('active'));
btn.classList.add('active');subTransVia=btn.dataset.via;fillSubTransModels();
});
});
function fillSubTransModels(){
const sel=document.getElementById('sub-trans-model');if(!sel)return;
const cur=sel.value;
sel.innerHTML='<option value="">설정 기본 모델 사용</option>';
const models=subTransVia==='openrouter'?orModels:ollamaModels;
models.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;sel.appendChild(o)});
}
// 스텝 표시기 업데이트
function setSubStep(step, status){
// step: 1|2|3, status: waiting|running|done|failed
const icon=document.getElementById(`sub-sicon-${step}`);
if(!icon)return;
icon.className='sub-step-icon '+status;
icon.textContent=status==='done'?'✓':status==='failed'?'✗':status==='running'?'⚡':'⏳';
const dot=document.getElementById(`sdot-${step}`);
if(dot){dot.className='step-dot '+(status==='done'?'done':status==='running'?'active':'');}
if(step>1){const line=document.getElementById(`sline-${step-1}`);if(line)line.className='step-line '+(status!=='waiting'?'done':'');}
const lbl=document.getElementById(`slabel-${step}`);
if(lbl)lbl.className='step-label '+(status==='done'?'done':status==='running'?'active':'');
}
document.getElementById('sub-btn').addEventListener('click',async()=>{
if(!subFile)return;
const transLang=document.getElementById('sub-trans-lang').value;
const fd=new FormData();
fd.append('file',subFile);
fd.append('src_language',document.getElementById('sub-src-lang').value||'');
fd.append('subtitle_fmt',subFmt);
fd.append('translate_to',transLang);
fd.append('trans_model',transLang?(document.getElementById('sub-trans-model')?.value||''):'');
fd.append('trans_via',subTransVia);
document.getElementById('sub-btn').disabled=true;
document.getElementById('sub-err').style.display='none';
document.getElementById('sub-prog-box').style.display='block';
document.getElementById('sub-result-card').style.display='none';
document.getElementById('sub-prog-bar').style.width='0%';
[1,2,3].forEach(s=>setSubStep(s,'waiting'));
setSubStep(1,'running');
try{
const r=await api('POST','/api/subtitle',fd);
const d=await r.json();
if(!r.ok)throw new Error(d.detail||'업로드 실패');
pollSubtitle(d.task_id, transLang);
}catch(e){
showErr('sub-err',e.message);
document.getElementById('sub-btn').disabled=false;
document.getElementById('sub-prog-box').style.display='none';
}
});
function pollSubtitle(taskId, transLang){
let prevStep=0;
const t=setInterval(async()=>{
try{
const r=await api('GET','/api/status/'+taskId);
if(r.status===401){clearInterval(t);showLogin();return}
const d=await r.json();
if(d.state==='progress'||d.state==='success'){
const step=d.step||1;const prog=d.progress||0;
document.getElementById('sub-prog-bar').style.width=prog+'%';
// 스텝 전환
if(step!==prevStep){
if(prevStep>0&&prevStep<step) setSubStep(prevStep,'done');
if(step<=3) setSubStep(step,'running');
// step 3가 없으면 (번역 안 함) skip
if(!transLang&&step===2){
setSubStep(3,'done');
}
prevStep=step;
}
if(d.step_msg) document.getElementById(`sub-smsg-${step}`).textContent=d.step_msg;
}
if(d.state==='success'){
clearInterval(t);
[1,2,3].forEach(s=>setSubStep(s,'done'));
document.getElementById('sub-prog-bar').style.width='100%';
setTimeout(()=>showSubResult(d),400);
} else if(d.state==='failure'){
clearInterval(t);
if(prevStep>0) setSubStep(prevStep,'failed');
showErr('sub-err',d.message||'자막 생성 실패');
document.getElementById('sub-btn').disabled=false;
}
}catch{}
},1800);
}
const LANG_NAMES={ko:'한국어',en:'English',ja:'日本語',zh:'中文(简体)',
'zh-tw':'中文(繁體)',fr:'Français',de:'Deutsch',es:'Español',
it:'Italiano',pt:'Português',ru:'Русский',ar:'العربية',
vi:'Tiếng Việt',th:'ไทย',id:'Bahasa Indonesia',nl:'Nederlands',
pl:'Polski',tr:'Türkçe',sv:'Svenska',uk:'Українська',hi:'हिन्दी'};
function langName(code){return LANG_NAMES[code]||code||'알 수 없음'}
function showSubResult(d){
document.getElementById('sub-prog-box').style.display='none';
const rc=document.getElementById('sub-result-card');rc.style.display='block';
document.getElementById('sub-res-lang').textContent=langName(d.detected_language);
document.getElementById('sub-res-dur').textContent=fmtDur(d.duration);
document.getElementById('sub-res-segs').textContent=(d.segment_count||0)+'개';
document.getElementById('sub-res-trans').textContent=d.translated?langName(d.translate_to):'없음';
const grid=document.getElementById('sub-dl-grid');grid.innerHTML='';
const addBtn=(label,lang,file,cls='')=>{
if(!file)return;
const ext=file.split('.').pop().toUpperCase();
const btn=document.createElement('button');
btn.className='sub-dl-btn '+(cls);
btn.innerHTML=`<span class="dl-icon">📄</span><span class="dl-label">${ext} ${label}</span><span class="dl-lang">${langName(lang)}</span>`;
btn.onclick=()=>dlFile(file);
grid.appendChild(btn);
};
addBtn('원어',d.detected_language,d.srt_orig);
addBtn('원어',d.detected_language,d.vtt_orig);
addBtn('번역',d.translate_to,d.srt_trans,'trans');
addBtn('번역',d.translate_to,d.vtt_trans,'trans');
document.getElementById('sub-btn').disabled=false;
}
document.getElementById('sub-new').addEventListener('click',()=>{
subFile=null;subInput.value='';
document.getElementById('sub-info').style.display='none';
document.getElementById('sub-prog-box').style.display='none';
document.getElementById('sub-result-card').style.display='none';
document.getElementById('sub-err').style.display='none';
document.getElementById('sub-btn').disabled=true;
document.getElementById('sub-prog-bar').style.width='0%';
[1,2,3].forEach(s=>setSubStep(s,'waiting'));
});
// ══ HISTORY ══ // ══ HISTORY ══
document.querySelectorAll('.hist-filter-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('.hist-filter-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');histType=btn.dataset.type;histPage=1;loadHistory()})}); document.querySelectorAll('.hist-filter-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('.hist-filter-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');histType=btn.dataset.type;histPage=1;loadHistory()})});
@@ -1387,24 +1640,19 @@ function esc(s){return String(s||'').replace(/&/g,'&amp;').replace(/</g,'&lt;').
async function copyText(text,btn){try{await navigator.clipboard.writeText(text);const o=btn.textContent;btn.textContent='복사됨 ✓';setTimeout(()=>btn.textContent=o,1500)}catch{}} async function copyText(text,btn){try{await navigator.clipboard.writeText(text);const o=btn.textContent;btn.textContent='복사됨 ✓';setTimeout(()=>btn.textContent=o,1500)}catch{}}
// ══ OPENROUTER ══ // ══ OPENROUTER ══
let orModels=[],orVisionModels=[];
async function loadOrModels(){ async function loadOrModels(){
try{const r=await api('GET','/api/openrouter/models');const d=await r.json(); try{const r=await api('GET','/api/openrouter/models');const d=await r.json();
const wrap=document.getElementById('or-models-wrap'); if(d.connected){orModels=d.models||[];orVisionModels=d.vision_models||[];populateOrSelects();}
if(d.connected){orModels=d.models||[];orVisionModels=d.vision_models||[];
wrap.style.display='block';document.getElementById('or-connected-badge').textContent=`✓ 연결됨 — Vision ${orVisionModels.length}개 / 전체 ${orModels.length}`;populateOrSelects('vision');}
else wrap.style.display='none';
}catch{} }catch{}
} }
let orFilter='vision'; function populateOrSelects(){
document.querySelectorAll('.or-model-tab').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('.or-model-tab').forEach(b=>b.classList.remove('active'));btn.classList.add('active');orFilter=btn.dataset.filter;populateOrSelects(orFilter)})}); const fill=(sel,def,list)=>{if(!sel)return;const cur=sel.value||def||'';sel.innerHTML='<option value="">(없음)</option>';list.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;sel.appendChild(o)})};
function populateOrSelects(filter){ fill(document.getElementById('setting-or-stt-model'),appSettings.openrouter_stt_model,orModels);
filter=filter||orFilter;const list=filter==='vision'?orVisionModels:filter==='text'?orTextModels:orModels; fill(document.getElementById('setting-or-ocr-model'),appSettings.openrouter_ocr_model,orVisionModels);
const fillOr=(sel,def)=>{if(!sel)return;const cur=sel.value||def||'';sel.innerHTML='<option value="">(없음)</option>';list.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;sel.appendChild(o)})}; fill(document.getElementById('stt-or-model'),appSettings.openrouter_stt_model,orModels);
fillOr(document.getElementById('setting-or-stt-model'),appSettings.openrouter_stt_model); fill(document.getElementById('ocr-or-model'),appSettings.openrouter_ocr_model,orVisionModels);
const ocrSel=document.getElementById('setting-or-ocr-model'); fillSubTransModels();
if(ocrSel){const cur=ocrSel.value||appSettings.openrouter_ocr_model||'';ocrSel.innerHTML='<option value="">(없음)</option>';orVisionModels.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;ocrSel.appendChild(o)})}
fillOr(document.getElementById('stt-or-model'),appSettings.openrouter_stt_model);
const ocrPage=document.getElementById('ocr-or-model');if(ocrPage){const cur=ocrPage.value||appSettings.openrouter_ocr_model||'';ocrPage.innerHTML='<option value="">설정 기본 모델 사용</option>';orVisionModels.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;ocrPage.appendChild(o)})}
} }
document.getElementById('btn-or-test')?.addEventListener('click',async()=>{ document.getElementById('btn-or-test')?.addEventListener('click',async()=>{
const key=document.getElementById('or-api-key').value.trim(),url=document.getElementById('or-url').value.trim()||'https://openrouter.ai/api/v1'; const key=document.getElementById('or-api-key').value.trim(),url=document.getElementById('or-url').value.trim()||'https://openrouter.ai/api/v1';
@@ -1416,8 +1664,8 @@ document.getElementById('btn-or-test')?.addEventListener('click',async()=>{
result.style.color=d.ok?'var(--accent)':'var(--warn)';result.textContent=d.message;if(d.ok)loadOrModels();} result.style.color=d.ok?'var(--accent)':'var(--warn)';result.textContent=d.message;if(d.ok)loadOrModels();}
catch{result.style.color='var(--warn)';result.textContent='요청 실패'} catch{result.style.color='var(--warn)';result.textContent='요청 실패'}
}); });
document.getElementById('btn-refresh-models')?.addEventListener('click',()=>{loadOllamaModels();loadOrModels()});
loadLanguages();
checkAuth(); checkAuth();
</script> </script>
</body> </body>

View File

@@ -1,10 +1,12 @@
""" """
STT Celery Tasks STT + Subtitle Pipeline Celery Tasks
- faster-whisper 변환
- Ollama / OpenRouter 후처리 (교정 또는 번역) subtitle_pipeline_task:
- SRT / VTT / TXT 자막 파일 생성 Step 1: ffmpeg → 16kHz WAV 추출
Step 2: Whisper → 원어 SRT / VTT 생성
Step 3: LLM → 번역 SRT / VTT 생성 (선택)
""" """
import os, json import os, json, subprocess, tempfile
import httpx import httpx
from celery import Celery from celery import Celery
from ocr_tasks import ocr_task # noqa: F401 from ocr_tasks import ocr_task # noqa: F401
@@ -29,103 +31,73 @@ celery_app.conf.update(
accept_content=["json"], task_track_started=True, result_expires=3600, accept_content=["json"], task_track_started=True, result_expires=3600,
) )
_model = None _whisper_model = None
def get_model(): def get_model():
global _model global _whisper_model
if _model is None: if _whisper_model is None:
from faster_whisper import WhisperModel from faster_whisper import WhisperModel
kwargs = dict(device=DEVICE, compute_type=COMPUTE_TYPE) kwargs = dict(device=DEVICE, compute_type=COMPUTE_TYPE)
if CPU_THREADS is not None: kwargs["cpu_threads"] = CPU_THREADS if CPU_THREADS is not None: kwargs["cpu_threads"] = CPU_THREADS
print(f"[Whisper] 로딩: {MODEL_SIZE}/{DEVICE}/{COMPUTE_TYPE}/threads={CPU_THREADS or 'auto'}") print(f"[Whisper] 로딩: {MODEL_SIZE}/{DEVICE}/{COMPUTE_TYPE}/threads={CPU_THREADS or 'auto'}")
_model = WhisperModel(MODEL_SIZE, **kwargs) _whisper_model = WhisperModel(MODEL_SIZE, **kwargs)
print("[Whisper] 로드 완료") print("[Whisper] 로드 완료")
return _model return _whisper_model
# ══════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════
# 언어 코드 매핑 # 언어 코드 → 표시명
# ══════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════
LANG_NAMES = { LANG_NAMES = {
"ko":"한국어","en":"English","ja":"日本語","zh":"中文","fr":"Français", "ko":"한국어","en":"English","ja":"日本語","zh":"中文(简体)",
"de":"Deutsch","es":"Español","it":"Italiano","pt":"Português","ru":"Русский", "zh-tw":"中文(繁體)","fr":"Français","de":"Deutsch","es":"Español",
"ar":"العربية","vi":"Tiếng Việt","th":"ไทย","id":"Bahasa Indonesia", "it":"Italiano","pt":"Português","ru":"Русский","ar":"العربية",
"nl":"Nederlands","pl":"Polski","tr":"Türkçe","sv":"Svenska","uk":"Українська", "vi":"Tiếng Việt","th":"ไทย","id":"Bahasa Indonesia",
"nl":"Nederlands","pl":"Polski","tr":"Türkçe","sv":"Svenska",
"uk":"Українська","hi":"हिन्दी","bn":"বাংলা",
} }
def _lang_name(code): return LANG_NAMES.get(code, code)
def _lang_name(code: str) -> str:
return LANG_NAMES.get(code, code)
# ══════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════
# 자막 포맷 생성 # 자막 포맷 생성
# ══════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════
def _fmt_srt_time(s: float) -> str: def _srt_time(s: float) -> str:
"""초 → SRT 시간 포맷 00:00:00,000"""
ms = int(round(s * 1000)) ms = int(round(s * 1000))
h, rem = divmod(ms, 3600000) h, r = divmod(ms, 3600000); m, r = divmod(r, 60000); sec, ms = divmod(r, 1000)
m, rem = divmod(rem, 60000)
sec, ms = divmod(rem, 1000)
return f"{h:02d}:{m:02d}:{sec:02d},{ms:03d}" return f"{h:02d}:{m:02d}:{sec:02d},{ms:03d}"
def _fmt_vtt_time(s: float) -> str: def _vtt_time(s: float) -> str:
"""초 → VTT 시간 포맷 00:00:00.000""" return _srt_time(s).replace(",", ".")
return _fmt_srt_time(s).replace(",", ".")
def _make_srt(segments: list) -> str: def make_srt(segments: list) -> str:
lines = [] out = []
for i, seg in enumerate(segments, 1): for i, seg in enumerate(segments, 1):
lines.append(str(i)) out += [str(i), f"{_srt_time(seg['start'])} --> {_srt_time(seg['end'])}", seg["text"].strip(), ""]
lines.append(f"{_fmt_srt_time(seg['start'])} --> {_fmt_srt_time(seg['end'])}") return "\n".join(out)
lines.append(seg["text"].strip())
lines.append("")
return "\n".join(lines)
def _make_vtt(segments: list) -> str: def make_vtt(segments: list) -> str:
lines = ["WEBVTT", ""] out = ["WEBVTT", ""]
for i, seg in enumerate(segments, 1): for i, seg in enumerate(segments, 1):
lines.append(f"{i}") out += [str(i), f"{_vtt_time(seg['start'])} --> {_vtt_time(seg['end'])}", seg["text"].strip(), ""]
lines.append(f"{_fmt_vtt_time(seg['start'])} --> {_fmt_vtt_time(seg['end'])}") return "\n".join(out)
lines.append(seg["text"].strip())
lines.append("")
return "\n".join(lines)
# ══════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════
# 번역 (Ollama / OpenRouter) # LLM 번역 (세그먼트 배치)
# ══════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════
def _translate_segments(segments: list, target_lang: str, def _translate_batch(texts: list, target_lang: str,
use_openrouter: bool, model: str, use_openrouter: bool, model: str,
openrouter_url: str, openrouter_key: str, openrouter_url: str, openrouter_key: str) -> list:
task_self=None) -> list: """texts 리스트 → 번역된 texts 리스트"""
"""세그먼트 텍스트를 target_lang으로 번역해서 새 세그먼트 리스트 반환""" if not texts or not model: return texts
if not model or not target_lang:
return segments
lang_name = _lang_name(target_lang) lang_name = _lang_name(target_lang)
translated = []
# 세그먼트를 청크로 묶어서 번역 (API 호출 최소화)
# 최대 20개씩 묶음
CHUNK = 20
chunks = [segments[i:i+CHUNK] for i in range(0, len(segments), CHUNK)]
for ci, chunk in enumerate(chunks):
if task_self:
pct = 85 + int((ci / len(chunks)) * 10)
task_self.update_state(state="PROGRESS",
meta={"progress": pct,
"message": f"번역 중... ({ci*CHUNK+1}/{len(segments)})"})
# JSON 배열로 텍스트만 전달
texts = [seg["text"].strip() for seg in chunk]
prompt = ( prompt = (
f"다음 문장들{lang_name}로 번역해줘.\n" f"아래 자막 문장 배열{lang_name}로 번역해줘.\n"
f"JSON 배열 형식으로만 답해. 설명 없이 번역된 문장 배열만 출력.\n" f"반드시 JSON 문자열 배열로만 답해. 설명·마크다운 없이 배열만 출력.\n"
f"입력 배열과 동일한 개수, 동일한 순서로 출력해.\n\n" f"입력과 동일한 개수와 순서를 유지해.\n\n"
f"입력: {json.dumps(texts, ensure_ascii=False)}" f"{json.dumps(texts, ensure_ascii=False)}"
) )
try: try:
if use_openrouter and openrouter_key: if use_openrouter and openrouter_key:
resp = httpx.post( resp = httpx.post(
@@ -149,101 +121,177 @@ def _translate_segments(segments: list, target_lang: str,
resp.raise_for_status() resp.raise_for_status()
raw = resp.json().get("message",{}).get("content","").strip() raw = resp.json().get("message",{}).get("content","").strip()
# JSON 파싱 # 코드블록 제거 후 JSON 파싱
# 코드블록 제거
if "```" in raw: if "```" in raw:
raw = raw.split("```")[1].lstrip("json").strip() raw = raw.split("```")[1].lstrip("json\n").rstrip()
trans_texts = json.loads(raw) result = json.loads(raw)
if not isinstance(trans_texts, list): if isinstance(result, list) and len(result) == len(texts):
trans_texts = texts # 파싱 실패 시 원본 유지 return [str(r) for r in result]
return texts
except Exception as e: except Exception as e:
print(f"[번역 실패 chunk {ci}] {e}") print(f"[번역 실패] {e}")
trans_texts = texts # 실패 시 원본 유지 return texts # 실패 시 원본 유지
# 번역된 텍스트를 세그먼트에 결합
for seg, t_text in zip(chunk, trans_texts):
translated.append({**seg, "text": t_text})
# 남은 세그먼트 (번역 누락)
if len(trans_texts) < len(chunk):
for seg in chunk[len(trans_texts):]:
translated.append(seg)
return translated
# ══════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════
# Ollama 텍스트 후처리 (교정) # STT + Ollama/OpenRouter 후처리 (기존 음성변환용)
# ══════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════
def _ollama_postprocess(text: str, model: str) -> str: def _ollama_postprocess(text: str, model: str) -> str:
if not model or not text.strip(): return text if not model or not text.strip(): return text
prompt = ( prompt = ("다음은 음성 인식으로 추출된 텍스트입니다. "
"다음은 음성 인식으로 추출된 텍스트입니다. "
"내용은 절대 변경하지 말고, 문장 부호를 추가하고 자연스럽게 다듬어줘. " "내용은 절대 변경하지 말고, 문장 부호를 추가하고 자연스럽게 다듬어줘. "
"결과 텍스트만 출력하고 설명은 하지 마.\n\n" + text "결과 텍스트만 출력하고 설명은 하지 마.\n\n" + text)
)
try: try:
resp = httpx.post(f"{OLLAMA_URL}/api/chat", resp = httpx.post(f"{OLLAMA_URL}/api/chat",
json={"model":model,"messages":[{"role":"user","content":prompt}], json={"model":model,"messages":[{"role":"user","content":prompt}],
"stream":False,"options":{"temperature":0.1}}, "stream":False,"options":{"temperature":0.1}},
timeout=float(OLLAMA_TIMEOUT)) timeout=float(OLLAMA_TIMEOUT))
resp.raise_for_status() resp.raise_for_status()
result = resp.json().get("message",{}).get("content","").strip() return resp.json().get("message",{}).get("content","").strip() or text
return result if result else text except: return text
except Exception as e:
print(f"[Ollama 후처리 실패] {e}"); return text
def _openrouter_postprocess(text: str, model: str, base_url: str, api_key: str) -> str: def _openrouter_postprocess(text: str, model: str, base_url: str, api_key: str) -> str:
if not model or not api_key or not text.strip(): return text if not model or not api_key or not text.strip(): return text
prompt = ( prompt = ("다음은 음성 인식으로 추출된 텍스트입니다. "
"다음은 음성 인식으로 추출된 텍스트입니다. "
"내용은 절대 변경하지 말고, 문장 부호를 추가하고 자연스럽게 다듬어줘. " "내용은 절대 변경하지 말고, 문장 부호를 추가하고 자연스럽게 다듬어줘. "
"결과 텍스트만 출력하고 설명은 하지 마.\n\n" + text "결과 텍스트만 출력하고 설명은 하지 마.\n\n" + text)
)
try: try:
resp = httpx.post(f"{base_url.rstrip('/')}/chat/completions", resp = httpx.post(f"{base_url.rstrip('/')}/chat/completions",
headers={"Authorization":f"Bearer {api_key}","HTTP-Referer":"https://voicescript.local","Content-Type":"application/json"}, headers={"Authorization":f"Bearer {api_key}","HTTP-Referer":"https://voicescript.local","Content-Type":"application/json"},
json={"model":model,"messages":[{"role":"user","content":prompt}],"temperature":0.1}, json={"model":model,"messages":[{"role":"user","content":prompt}],"temperature":0.1},
timeout=float(OLLAMA_TIMEOUT)) timeout=float(OLLAMA_TIMEOUT))
resp.raise_for_status() resp.raise_for_status()
result = resp.json()["choices"][0]["message"]["content"].strip() return resp.json()["choices"][0]["message"]["content"].strip() or text
return result if result else text except: return text
except Exception as e:
print(f"[OpenRouter 후처리 실패] {e}"); return text
# ══════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════
# 메인 STT Task # 기존 STT 태스크 (음성변환 탭용)
# ══════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════
@celery_app.task(bind=True, name="tasks.transcribe_task", queue="stt") @celery_app.task(bind=True, name="tasks.transcribe_task", queue="stt")
def transcribe_task( def transcribe_task(
self, self,
file_id: str, file_id: str, audio_path: str,
audio_path: str, use_ollama: bool = False, ollama_model: str = "",
# 후처리 use_openrouter: bool = False, openrouter_model: str = "",
use_ollama: bool = False, openrouter_url: str = "", openrouter_key: str = "",
ollama_model: str = "",
use_openrouter: bool = False,
openrouter_model: str = "",
openrouter_url: str = "",
openrouter_key: str = "",
# 자막
subtitle_mode: bool = False, # True → 자막 파일 생성
subtitle_format: str = "srt", # srt | vtt | both
translate_to: str = "", # 번역 대상 언어 코드 (없으면 원어 자막)
translate_model: str = "", # 번역에 쓸 모델
translate_via: str = "ollama",# ollama | openrouter
# 원본 언어 강제 지정 (없으면 auto)
force_language: str = "",
): ):
self.update_state(state="PROGRESS", meta={"progress":5,"message":"모델 준비 중..."}) self.update_state(state="PROGRESS", meta={"progress":5,"message":"모델 준비 중..."})
try: try:
model = get_model() model = get_model()
self.update_state(state="PROGRESS", meta={"progress":15,"message":"오디오 분석 중..."}) self.update_state(state="PROGRESS", meta={"progress":15,"message":"오디오 분석 중..."})
lang = force_language.strip() or LANGUAGE
segments_gen, info = model.transcribe( segments_gen, info = model.transcribe(
audio_path, audio_path, language=LANGUAGE, beam_size=BEAM_SIZE,
initial_prompt=INITIAL_PROMPT, vad_filter=True,
vad_parameters=dict(min_silence_duration_ms=500), word_timestamps=False,
)
self.update_state(state="PROGRESS", meta={"progress":30,"message":"텍스트 변환 중..."})
segments, parts = [], []
duration = info.duration
for seg in segments_gen:
segments.append({"start":round(seg.start,3),"end":round(seg.end,3),"text":seg.text.strip()})
parts.append(seg.text.strip())
if duration > 0:
pct = 30 + int((seg.end/duration)*50)
self.update_state(state="PROGRESS",
meta={"progress":min(pct,80),"message":f"변환 중... {seg.end:.0f}s / {duration:.0f}s"})
raw_text = "\n".join(parts)
full_text = raw_text
if use_ollama and ollama_model:
self.update_state(state="PROGRESS",meta={"progress":85,"message":f"Ollama({ollama_model}) 교정 중..."})
full_text = _ollama_postprocess(raw_text, ollama_model)
elif use_openrouter and openrouter_model and openrouter_key:
self.update_state(state="PROGRESS",meta={"progress":85,"message":f"OpenRouter({openrouter_model}) 교정 중..."})
full_text = _openrouter_postprocess(raw_text, openrouter_model, openrouter_url, openrouter_key)
self.update_state(state="PROGRESS",meta={"progress":95,"message":"파일 저장 중..."})
os.makedirs(OUTPUT_DIR, exist_ok=True)
output_filename = f"{file_id}.txt"
with open(os.path.join(OUTPUT_DIR, output_filename),"w",encoding="utf-8") as f:
f.write(f"# 변환 결과\n# 언어: {info.language} | 재생 시간: {duration:.1f}\n\n## 전체 텍스트\n\n{full_text}\n\n## 타임스탬프별 세그먼트\n\n")
for seg in segments:
m,s=divmod(int(seg['start']),60)
f.write(f"[{m:02d}:{s:02d}] {seg['text']}\n")
try: os.remove(audio_path)
except: pass
return {
"text":full_text,"raw_text":raw_text,"segments":segments,
"language":info.language,"duration":round(duration,1),
"output_file":output_filename,
"ollama_used":use_ollama and bool(ollama_model),
"ollama_model":ollama_model if (use_ollama and ollama_model) else "",
"openrouter_used":use_openrouter and bool(openrouter_model) and bool(openrouter_key),
"openrouter_model":openrouter_model if (use_openrouter and openrouter_model) else "",
}
except Exception as e:
raise Exception(f"변환 실패: {str(e)}")
# ══════════════════════════════════════════════════════════════
# 자막 파이프라인 태스크
# Step 1: ffmpeg → WAV
# Step 2: Whisper → 원어 SRT/VTT
# Step 3: LLM → 번역 SRT/VTT (선택)
# ══════════════════════════════════════════════════════════════
@celery_app.task(bind=True, name="tasks.subtitle_pipeline_task", queue="stt")
def subtitle_pipeline_task(
self,
file_id: str,
video_path: str,
src_language: str = "", # 원어 코드 (빈칸=자동)
subtitle_fmt: str = "srt", # srt | vtt | both
translate_to: str = "", # 번역 대상 (빈칸=번역 안 함)
trans_model: str = "", # 번역 모델
trans_via: str = "ollama",# ollama | openrouter
openrouter_url: str = "",
openrouter_key: str = "",
):
os.makedirs(OUTPUT_DIR, exist_ok=True)
wav_path = os.path.join(os.path.dirname(video_path), f"{file_id}_audio.wav")
result_files = {}
try:
# ── Step 1: ffmpeg 오디오 추출 ────────────────────────
self.update_state(state="PROGRESS", meta={
"progress": 5,
"step": 1,
"step_msg": "오디오 추출 중...",
"message": "Step 1/3 — ffmpeg 오디오 추출 중..."
})
cmd = [
"ffmpeg", "-y",
"-i", video_path,
"-vn", # 비디오 스트림 제거
"-ar", "16000", # 16kHz — Whisper 최적
"-ac", "1", # 모노
"-c:a", "pcm_s16le",# WAV 무손실
wav_path
]
proc = subprocess.run(cmd, capture_output=True, timeout=600)
if proc.returncode != 0:
err = proc.stderr.decode(errors="replace")[-500:]
raise Exception(f"ffmpeg 오디오 추출 실패: {err}")
if not os.path.exists(wav_path) or os.path.getsize(wav_path) < 1000:
raise Exception("ffmpeg가 오디오를 추출하지 못했습니다. 영상에 오디오 트랙이 있는지 확인하세요.")
try: os.remove(video_path)
except: pass
# ── Step 2: Whisper STT → 원어 자막 ───────────────────
self.update_state(state="PROGRESS", meta={
"progress": 15,
"step": 2,
"step_msg": "음성 인식 중...",
"message": "Step 2/3 — Whisper 음성 인식 시작..."
})
whisper = get_model()
lang = src_language.strip() or None
segments_gen, info = whisper.transcribe(
wav_path,
language=lang, language=lang,
beam_size=BEAM_SIZE, beam_size=BEAM_SIZE,
initial_prompt=INITIAL_PROMPT, initial_prompt=INITIAL_PROMPT,
@@ -252,131 +300,115 @@ def transcribe_task(
word_timestamps=False, word_timestamps=False,
) )
self.update_state(state="PROGRESS", meta={"progress":30,"message":"텍스트 변환 중..."}) segments = []
segments, parts = [], []
duration = info.duration duration = info.duration
for seg in segments_gen:
segments.append({"start":round(seg.start,3),"end":round(seg.end,3),"text":seg.text.strip()})
parts.append(seg.text.strip())
if duration > 0:
pct = 30 + int((seg.end/duration)*45)
self.update_state(state="PROGRESS",
meta={"progress":min(pct,75),
"message":f"변환 중... {seg.end:.0f}s / {duration:.0f}s"})
raw_text = "\n".join(parts)
full_text = raw_text
detected_lang = info.language detected_lang = info.language
# ── 텍스트 후처리 (교정) ────────────────────────────── for seg in segments_gen:
if use_ollama and ollama_model and not subtitle_mode: segments.append({
self.update_state(state="PROGRESS", "start": round(seg.start, 3),
meta={"progress":80,"message":f"Ollama({ollama_model}) 교정 중..."}) "end": round(seg.end, 3),
full_text = _ollama_postprocess(raw_text, ollama_model) "text": seg.text.strip(),
})
if duration > 0:
pct = 15 + int((seg.end / duration) * 55)
self.update_state(state="PROGRESS", meta={
"progress": min(pct, 70),
"step": 2,
"step_msg": f"{seg.end:.0f}s / {duration:.0f}s 인식 완료",
"message": f"Step 2/3 — {seg.end:.0f}s / {duration:.0f}s",
})
elif use_openrouter and openrouter_model and openrouter_key and not subtitle_mode: try: os.remove(wav_path)
self.update_state(state="PROGRESS",
meta={"progress":80,"message":f"OpenRouter({openrouter_model}) 교정 중..."})
full_text = _openrouter_postprocess(raw_text, openrouter_model, openrouter_url, openrouter_key)
# ── 자막 모드: 번역 ──────────────────────────────────
translated_segments = segments
is_translated = False
if subtitle_mode and translate_to and translate_to != detected_lang:
t_model = translate_model or (ollama_model if translate_via=="ollama" else openrouter_model)
t_via_or = (translate_via == "openrouter" and bool(openrouter_key))
self.update_state(state="PROGRESS",
meta={"progress":82,
"message":f"{_lang_name(translate_to)}로 번역 중..."})
translated_segments = _translate_segments(
segments, translate_to,
use_openrouter=t_via_or,
model=t_model,
openrouter_url=openrouter_url,
openrouter_key=openrouter_key,
task_self=self,
)
is_translated = True
# 번역된 전체 텍스트
full_text = "\n".join(s["text"] for s in translated_segments)
self.update_state(state="PROGRESS", meta={"progress":93,"message":"파일 저장 중..."})
os.makedirs(OUTPUT_DIR, exist_ok=True)
result_files = {}
# ── TXT 저장 ─────────────────────────────────────────
txt_filename = f"{file_id}.txt"
with open(os.path.join(OUTPUT_DIR, txt_filename), "w", encoding="utf-8") as f:
f.write(f"# 변환 결과\n")
f.write(f"# 언어: {detected_lang} | 재생 시간: {duration:.1f}\n")
if is_translated:
f.write(f"# 번역: {_lang_name(translate_to)}\n")
f.write(f"\n## 전체 텍스트\n\n{full_text}\n\n")
f.write(f"## 타임스탬프별 세그먼트\n\n")
for seg in (translated_segments if is_translated else segments):
f.write(f"[{_fmt_ts(seg['start'])}{_fmt_ts(seg['end'])}] {seg['text']}\n")
result_files["txt"] = txt_filename
# ── 자막 파일 저장 ────────────────────────────────────
if subtitle_mode:
sub_segs = translated_segments if is_translated else segments
lang_suffix = f".{translate_to}" if is_translated else f".{detected_lang}"
if subtitle_format in ("srt", "both"):
srt_fn = f"{file_id}{lang_suffix}.srt"
with open(os.path.join(OUTPUT_DIR, srt_fn), "w", encoding="utf-8") as f:
f.write(_make_srt(sub_segs))
result_files["srt"] = srt_fn
if subtitle_format in ("vtt", "both"):
vtt_fn = f"{file_id}{lang_suffix}.vtt"
with open(os.path.join(OUTPUT_DIR, vtt_fn), "w", encoding="utf-8") as f:
f.write(_make_vtt(sub_segs))
result_files["vtt"] = vtt_fn
# 원본 언어 SRT도 함께 (번역 시)
if is_translated and subtitle_format in ("srt","both"):
orig_fn = f"{file_id}.{detected_lang}.srt"
with open(os.path.join(OUTPUT_DIR, orig_fn), "w", encoding="utf-8") as f:
f.write(_make_srt(segments))
result_files["srt_original"] = orig_fn
try: os.remove(audio_path)
except: pass except: pass
if not segments:
raise Exception("음성이 감지되지 않았습니다. 영상에 음성이 있는지 확인하세요.")
# 원어 자막 저장
lang_suffix = detected_lang
if subtitle_fmt in ("srt", "both"):
fn = f"{file_id}.{lang_suffix}.srt"
with open(os.path.join(OUTPUT_DIR, fn), "w", encoding="utf-8") as f:
f.write(make_srt(segments))
result_files["srt_orig"] = fn
if subtitle_fmt in ("vtt", "both"):
fn = f"{file_id}.{lang_suffix}.vtt"
with open(os.path.join(OUTPUT_DIR, fn), "w", encoding="utf-8") as f:
f.write(make_vtt(segments))
result_files["vtt_orig"] = fn
# ── Step 3: LLM 번역 (선택) ───────────────────────────
translated_segments = None
if translate_to and translate_to != detected_lang and trans_model:
target_name = _lang_name(translate_to)
use_or = (trans_via == "openrouter" and bool(openrouter_key))
total = len(segments)
CHUNK = 25 # 한 번에 25개씩 번역
translated_texts = []
for ci, start in enumerate(range(0, total, CHUNK)):
chunk = segments[start:start+CHUNK]
pct = 72 + int((ci * CHUNK / total) * 22)
self.update_state(state="PROGRESS", meta={
"progress": min(pct, 94),
"step": 3,
"step_msg": f"{min(start+CHUNK, total)}/{total}개 번역 완료",
"message": f"Step 3/3 — {target_name}로 번역 중... ({min(start+CHUNK,total)}/{total})",
})
batch_texts = [s["text"] for s in chunk]
translated = _translate_batch(
batch_texts, translate_to,
use_openrouter=use_or,
model=trans_model,
openrouter_url=openrouter_url,
openrouter_key=openrouter_key,
)
translated_texts.extend(translated)
# 번역된 텍스트 → 세그먼트 조합 (타임스탬프 유지)
translated_segments = [
{**seg, "text": translated_texts[i] if i < len(translated_texts) else seg["text"]}
for i, seg in enumerate(segments)
]
# 번역 자막 저장
trans_suffix = translate_to
if subtitle_fmt in ("srt", "both"):
fn = f"{file_id}.{trans_suffix}.srt"
with open(os.path.join(OUTPUT_DIR, fn), "w", encoding="utf-8") as f:
f.write(make_srt(translated_segments))
result_files["srt_trans"] = fn
if subtitle_fmt in ("vtt", "both"):
fn = f"{file_id}.{trans_suffix}.vtt"
with open(os.path.join(OUTPUT_DIR, fn), "w", encoding="utf-8") as f:
f.write(make_vtt(translated_segments))
result_files["vtt_trans"] = fn
self.update_state(state="PROGRESS", meta={
"progress": 98, "step": 3,
"step_msg": "완료", "message": "자막 파일 저장 완료"
})
return { return {
# 기본 STT 결과 "detected_language": detected_lang,
"text": full_text,
"raw_text": raw_text,
"segments": translated_segments if is_translated else segments,
"orig_segments": segments,
"language": detected_lang,
"duration": round(duration, 1), "duration": round(duration, 1),
# 후처리 "segment_count": len(segments),
"ollama_used": use_ollama and bool(ollama_model) and not subtitle_mode, "translated": bool(translated_segments),
"ollama_model": ollama_model if (use_ollama and not subtitle_mode) else "", "translate_to": translate_to if translated_segments else "",
"openrouter_used": use_openrouter and bool(openrouter_model) and not subtitle_mode, "subtitle_fmt": subtitle_fmt,
"openrouter_model": openrouter_model if (use_openrouter and not subtitle_mode) else "",
# 자막
"subtitle_mode": subtitle_mode,
"subtitle_format": subtitle_format,
"translated": is_translated,
"translate_to": translate_to if is_translated else "",
"translate_model": translate_model if is_translated else "",
# 파일 # 파일
"output_file": result_files.get("txt",""), "srt_orig": result_files.get("srt_orig", ""),
"srt_file": result_files.get("srt",""), "vtt_orig": result_files.get("vtt_orig", ""),
"vtt_file": result_files.get("vtt",""), "srt_trans": result_files.get("srt_trans", ""),
"srt_original_file": result_files.get("srt_original",""), "vtt_trans": result_files.get("vtt_trans", ""),
} }
except Exception as e: except Exception as e:
raise Exception(f"변환 실패: {str(e)}") # 임시 파일 정리
for p in [video_path, wav_path]:
try: os.remove(p)
def _fmt_ts(s: float) -> str: except: pass
m, sec = divmod(int(s), 60) raise Exception(f"자막 생성 실패: {str(e)}")
return f"{m:02d}:{sec:02d}"