feat: 복수 파일 배치 변환 (STT/OCR)

This commit is contained in:
root
2026-05-02 02:14:44 +09:00
parent 4af20f72e0
commit 4fc3da1a2d
6 changed files with 1252 additions and 1339 deletions

View File

@@ -9,6 +9,8 @@ RUN apt-get update && apt-get install -y \
libxext6 \
libxrender1 \
libgl1 \
libgles2 \
libegl1 \
wget \
curl \
&& rm -rf /var/lib/apt/lists/*
@@ -17,9 +19,8 @@ WORKDIR /app
COPY requirements.txt .
# PaddlePaddle CPU (AMD64) — paddleocr 3.x 호환
RUN pip install --no-cache-dir paddlepaddle==3.0.0 \
-i https://pypi.tuna.tsinghua.edu.cn/simple
# PaddlePaddle CPU — PyPI 공식 (청화 미러 접속 불안정으로 제거)
RUN pip install --no-cache-dir paddlepaddle==3.0.0
# 나머지 패키지
RUN pip install --no-cache-dir -r requirements.txt

View File

@@ -5,6 +5,7 @@ from datetime import datetime
from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Form, Request
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from typing import List
from auth import (authenticate, create_access_token, init_users,
require_auth, require_admin, require_stt, require_ocr,
@@ -28,20 +29,25 @@ HISTORY_MAX = 300
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
AUDIO_EXT = {"mp3","mp4","wav","m4a","ogg","flac","aac","wma","webm","mkv","avi","mov"}
AUDIO_EXT = {"mp3","mp4","wav","m4a","ogg","flac","aac","wma","webm",
"mkv","avi","mov","ts","mts","m2ts","wmv","flv","rmvb",
"h264","h265","hevc","264","265"}
IMAGE_EXT = {"jpg","jpeg","png","bmp","tiff","tif","webp","gif"}
SUPPORTED_LANGS = {
"ko":"한국어","en":"English","ja":"日本語","zh":"中文(简体)",
"zh-tw":"中文(繁體)","fr":"Français","de":"Deutsch","es":"Español",
"it":"Italiano","pt":"Português","ru":"Русский","ar":"العربية",
"vi":"Tiếng Việt","th":"ไทย","id":"Bahasa Indonesia",
"nl":"Nederlands","pl":"Polski","tr":"Türkçe","sv":"Svenska",
"uk":"Українська","hi":"हिन्दी","bn":"বাংলা",
}
_DEFAULT_SETTINGS = {
"stt_ollama_model": "",
"ocr_ollama_model": "granite3.2-vision:latest",
"cpu_threads": 0,
"stt_timeout": 0,
"ollama_timeout": 600,
# OpenRouter
"openrouter_url": "https://openrouter.ai/api/v1",
"openrouter_api_key": "",
"openrouter_stt_model": "",
"openrouter_ocr_model": "",
"stt_ollama_model":"","ocr_ollama_model":"granite3.2-vision:latest",
"cpu_threads":0,"stt_timeout":0,"ollama_timeout":600,
"openrouter_url":"https://openrouter.ai/api/v1",
"openrouter_api_key":"","openrouter_stt_model":"","openrouter_ocr_model":"",
}
_hist_lock = threading.Lock()
@@ -49,91 +55,93 @@ _hist_lock = threading.Lock()
# ── 설정 I/O ─────────────────────────────────────────────────
def _load_settings() -> dict:
if not SETTINGS_FILE.exists(): return dict(_DEFAULT_SETTINGS)
with open(SETTINGS_FILE, "r", encoding="utf-8") as f: data = json.load(f)
for k, v in _DEFAULT_SETTINGS.items(): data.setdefault(k, v)
with open(SETTINGS_FILE,"r",encoding="utf-8") as f: data=json.load(f)
for k,v in _DEFAULT_SETTINGS.items(): data.setdefault(k,v)
return data
def _save_settings(data: dict):
SETTINGS_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(SETTINGS_FILE, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
def _save_settings(data:dict):
SETTINGS_FILE.parent.mkdir(parents=True,exist_ok=True)
with open(SETTINGS_FILE,"w",encoding="utf-8") as f: json.dump(data,f,ensure_ascii=False,indent=2)
# ── 이력 I/O ─────────────────────────────────────────────────
def _load_history() -> list:
def _load_history()->list:
with _hist_lock:
if not HISTORY_FILE.exists(): return []
try:
with open(HISTORY_FILE, "r", encoding="utf-8") as f: return json.load(f)
with open(HISTORY_FILE,"r",encoding="utf-8") as f: return json.load(f)
except: return []
def _write_history(history: list):
HISTORY_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(HISTORY_FILE, "w", encoding="utf-8") as f:
json.dump(history, f, ensure_ascii=False, indent=2)
def _write_history(h:list):
HISTORY_FILE.parent.mkdir(parents=True,exist_ok=True)
with open(HISTORY_FILE,"w",encoding="utf-8") as f: json.dump(h,f,ensure_ascii=False,indent=2)
def append_history(record: dict):
def append_history(record:dict):
with _hist_lock:
try:
history = []
history=[]
if HISTORY_FILE.exists():
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
history.insert(0, record)
_write_history(history[:HISTORY_MAX])
with open(HISTORY_FILE,"r",encoding="utf-8") as f: history=json.load(f)
history.insert(0,record); _write_history(history[:HISTORY_MAX])
except: pass
def _update_history_by_task(task_id: str, result: dict, success: bool, error_msg: str = ""):
def _update_history_by_task(task_id:str,result:dict,success:bool,error_msg:str=""):
with _hist_lock:
if not HISTORY_FILE.exists(): return
try:
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
with open(HISTORY_FILE,"r",encoding="utf-8") as f: history=json.load(f)
for h in history:
if h.get("task_id") != task_id: continue
if h.get("status") != "processing": break
if not success:
h["status"] = "failed"; h["output"] = {"error": error_msg[:300]}; break
h["status"] = "success"
if h["type"] == "stt":
h["output"] = {
"filename": result.get("output_file", ""),
"language": result.get("language", ""),
"duration_s": result.get("duration", 0),
"segments": len(result.get("segments", [])),
"text_preview": result.get("text","")[:200] + ("" if len(result.get("text",""))>200 else ""),
"ollama_used": result.get("ollama_used", False),
"ollama_model": result.get("ollama_model", ""),
"openrouter_used": result.get("openrouter_used", False),
"openrouter_model": result.get("openrouter_model", ""),
if h.get("task_id")!=task_id: continue
if h.get("status")!="processing": break
if not success: h["status"]="failed";h["output"]={"error":error_msg[:300]};break
h["status"]="success"
if h["type"]=="stt":
text=result.get("text","")
h["output"]={
"filename":result.get("output_file",""),
"language":result.get("language",""),
"duration_s":result.get("duration",0),
"segments":len(result.get("segments",[])),
"text_preview":text[:200]+("" if len(text)>200 else ""),
"ollama_used":result.get("ollama_used",False),
"ollama_model":result.get("ollama_model",""),
"openrouter_used":result.get("openrouter_used",False),
"openrouter_model":result.get("openrouter_model",""),
"subtitle_mode":result.get("subtitle_mode",False),
"translated":result.get("translated",False),
"translate_to":result.get("translate_to",""),
"srt_file":result.get("srt_file",""),
"vtt_file":result.get("vtt_file",""),
}
else:
ft = result.get("full_text", "")
h["output"] = {
"txt_file": result.get("txt_file", ""),
"xlsx_file": result.get("xlsx_file", ""),
"line_count": result.get("line_count", 0),
"table_count": len(result.get("tables", [])),
"backend": result.get("backend", ""),
"ollama_model": result.get("ollama_model", ""),
"openrouter_model": result.get("openrouter_model", ""),
"text_preview": ft[:200] + ("" if len(ft)>200 else ""),
ft=result.get("full_text","")
h["output"]={
"txt_file":result.get("txt_file",""),
"xlsx_file":result.get("xlsx_file",""),
"line_count":result.get("line_count",0),
"table_count":len(result.get("tables",[])),
"backend":result.get("backend",""),
"ollama_model":result.get("ollama_model",""),
"openrouter_model":result.get("openrouter_model",""),
"text_preview":ft[:200]+("" if len(ft)>200 else ""),
}
break
_write_history(history)
except: pass
def delete_history_item(history_id: str) -> bool:
def delete_history_item(history_id:str)->bool:
with _hist_lock:
if not HISTORY_FILE.exists(): return False
try:
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
new = [h for h in history if h.get("id") != history_id]
if len(new) == len(history): return False
with open(HISTORY_FILE,"r",encoding="utf-8") as f: history=json.load(f)
new=[h for h in history if h.get("id")!=history_id]
if len(new)==len(history): return False
_write_history(new); return True
except: return False
def clear_history():
with _hist_lock:
if HISTORY_FILE.exists(): HISTORY_FILE.write_text("[]", encoding="utf-8")
if HISTORY_FILE.exists(): HISTORY_FILE.write_text("[]",encoding="utf-8")
# ════════════════════════════════════════════════════════════════
@@ -148,177 +156,207 @@ async def on_startup():
# 인증
# ════════════════════════════════════════════════════════════════
@app.post("/api/login")
def login(username: str = Form(...), password: str = Form(...)):
user = authenticate(username, password)
if not user: raise HTTPException(401, "아이디 또는 비밀번호가 올바르지 않습니다")
return {"access_token": create_access_token(username), "token_type": "bearer"}
def login(username:str=Form(...),password:str=Form(...)):
user=authenticate(username,password)
if not user: raise HTTPException(401,"아이디 또는 비밀번호가 올바르지 않습니다")
return {"access_token":create_access_token(username),"token_type":"bearer"}
@app.get("/api/me")
def me(user: dict = Depends(require_auth)):
return {"username": user["username"], "role": user.get("role","user"),
"permissions": user.get("permissions", {"stt":False,"ocr":False})}
def me(user:dict=Depends(require_auth)):
return {"username":user["username"],"role":user.get("role","user"),
"permissions":user.get("permissions",{"stt":False,"ocr":False})}
@app.get("/api/languages")
def get_languages(user:dict=Depends(require_auth)):
return {"languages":SUPPORTED_LANGS}
# ════════════════════════════════════════════════════════════════
# 시스템 정보
# ════════════════════════════════════════════════════════════════
@app.get("/api/system")
def system_info(user: dict = Depends(require_auth)):
mem = psutil.virtual_memory(); swap = psutil.swap_memory(); s = _load_settings()
def system_info(user:dict=Depends(require_auth)):
mem=psutil.virtual_memory();swap=psutil.swap_memory();s=_load_settings()
return {
"ram_total_gb": round(mem.total / 1024**3, 1),
"ram_used_gb": round(mem.used / 1024**3, 1),
"ram_avail_gb": round(mem.available / 1024**3, 1),
"ram_percent": mem.percent,
"swap_total_gb": round(swap.total / 1024**3, 1),
"swap_used_gb": round(swap.used / 1024**3, 1),
"cpu_logical": psutil.cpu_count(logical=True),
"cpu_physical": psutil.cpu_count(logical=False),
"cpu_percent": psutil.cpu_percent(interval=0.3),
"cpu_threads_setting": s.get("cpu_threads", 0),
"stt_timeout": s.get("stt_timeout", 0),
"ollama_timeout":s.get("ollama_timeout", 600),
"ram_total_gb":round(mem.total/1024**3,1),"ram_used_gb":round(mem.used/1024**3,1),
"ram_avail_gb":round(mem.available/1024**3,1),"ram_percent":mem.percent,
"swap_total_gb":round(swap.total/1024**3,1),"swap_used_gb":round(swap.used/1024**3,1),
"cpu_logical":psutil.cpu_count(logical=True),"cpu_physical":psutil.cpu_count(logical=False),
"cpu_percent":psutil.cpu_percent(interval=0.3),
"cpu_threads_setting":s.get("cpu_threads",0),
"stt_timeout":s.get("stt_timeout",0),"ollama_timeout":s.get("ollama_timeout",600),
}
# ════════════════════════════════════════════════════════════════
# STT
# STT 공통 디스패치
# ════════════════════════════════════════════════════════════════
@app.post("/api/transcribe")
async def transcribe(
request: Request, file: UploadFile = File(...),
use_ollama: str = Form("false"),
ollama_model: str = Form(""),
use_openrouter: str = Form("false"),
openrouter_model: str = Form(""),
user: dict = Depends(require_stt),
async def _dispatch_stt(
request, files,
use_ollama, ollama_model,
use_openrouter, openrouter_model,
subtitle_mode, subtitle_format,
force_language,
translate_to, translate_model, translate_via,
user,
):
_check_size(request)
ext = _ext(file.filename)
if ext not in AUDIO_EXT: raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(AUDIO_EXT))}")
file_id = str(uuid.uuid4())
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
await _save(file, save_path)
file_size = os.path.getsize(save_path)
s = _load_settings()
_use_ollama = use_ollama.lower() == "true"
_use_openrouter = use_openrouter.lower() == "true"
_sub_mode = subtitle_mode.lower() == "true"
if _use_ollama and not ollama_model.strip(): ollama_model = s.get("stt_ollama_model","")
if _use_openrouter and not openrouter_model.strip():openrouter_model= s.get("openrouter_stt_model","")
if not translate_model.strip():
translate_model = ollama_model if translate_via=="ollama" else openrouter_model
if _use_ollama and not ollama_model.strip():
ollama_model = s.get("stt_ollama_model", "")
if _use_openrouter and not openrouter_model.strip():
openrouter_model = s.get("openrouter_stt_model", "")
task = transcribe_task.delay(
file_id, save_path,
_use_ollama, ollama_model,
_use_openrouter, openrouter_model,
s.get("openrouter_url", ""), s.get("openrouter_api_key", ""),
)
append_history({
"id": file_id, "task_id": task.id, "type": "stt",
"status": "processing",
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"username": user["username"],
"input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()},
"settings": {
"model": os.getenv("WHISPER_MODEL","medium"),
"language": os.getenv("WHISPER_LANGUAGE","ko"),
"compute_type": os.getenv("WHISPER_COMPUTE_TYPE","int8"),
"cpu_threads": s.get("cpu_threads",0),
"stt_timeout": s.get("stt_timeout",0),
"use_ollama": _use_ollama,
"ollama_model": ollama_model if _use_ollama else "",
"use_openrouter": _use_openrouter,
"openrouter_model": openrouter_model if _use_openrouter else "",
},
"output": None,
})
return {"task_id": task.id, "file_id": file_id, "filename": file.filename}
results=[]
for file in files:
_check_size(request)
ext=_ext(file.filename)
if ext not in AUDIO_EXT:
results.append({"error":f"{file.filename}: 지원하지 않는 형식","filename":file.filename})
continue
file_id=str(uuid.uuid4())
save_path=os.path.join(UPLOAD_DIR,f"{file_id}.{ext}")
await _save_upload(file,save_path)
file_size=os.path.getsize(save_path)
task=transcribe_task.delay(
file_id, save_path,
_use_ollama, ollama_model,
_use_openrouter, openrouter_model,
s.get("openrouter_url",""), s.get("openrouter_api_key",""),
_sub_mode, subtitle_format or "srt",
translate_to or "",
translate_model or "",
translate_via or "ollama",
force_language or "",
)
append_history({
"id":file_id,"task_id":task.id,"type":"stt",
"status":"processing",
"timestamp":datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"username":user["username"],
"input":{"filename":file.filename,"size_bytes":file_size,"format":ext.upper()},
"settings":{
"model":os.getenv("WHISPER_MODEL","medium"),
"language":force_language or os.getenv("WHISPER_LANGUAGE","auto"),
"compute_type":os.getenv("WHISPER_COMPUTE_TYPE","int8"),
"cpu_threads":s.get("cpu_threads",0),
"subtitle_mode":_sub_mode,
"subtitle_format":subtitle_format,
"translate_to":translate_to,
"translate_model":translate_model,
"use_ollama":_use_ollama,"ollama_model":ollama_model if _use_ollama else "",
"use_openrouter":_use_openrouter,"openrouter_model":openrouter_model if _use_openrouter else "",
},
"output":None,
})
results.append({"task_id":task.id,"file_id":file_id,"filename":file.filename})
return results
# ════════════════════════════════════════════════════════════════
# OCR
# STT — 단일 / 배치
# ════════════════════════════════════════════════════════════════
@app.post("/api/ocr")
async def ocr(
request: Request, file: UploadFile = File(...),
mode: str = Form("text"),
backend: str = Form("paddle"), # paddle | ollama | openrouter
ollama_model: str = Form(""),
openrouter_model: str = Form(""),
custom_prompt: str = Form(""),
user: dict = Depends(require_ocr),
@app.post("/api/transcribe")
async def transcribe(
request:Request, file:UploadFile=File(...),
use_ollama:str=Form("false"), ollama_model:str=Form(""),
use_openrouter:str=Form("false"), openrouter_model:str=Form(""),
subtitle_mode:str=Form("false"), subtitle_format:str=Form("srt"),
force_language:str=Form(""),
translate_to:str=Form(""), translate_model:str=Form(""), translate_via:str=Form("ollama"),
user:dict=Depends(require_stt),
):
_check_size(request)
ext = _ext(file.filename)
if ext not in IMAGE_EXT: raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(IMAGE_EXT))}")
if mode not in ("text","structure"): mode = "text"
if backend not in ("paddle","ollama","openrouter"): backend = "paddle"
items=await _dispatch_stt(request,[file],use_ollama,ollama_model,use_openrouter,openrouter_model,
subtitle_mode,subtitle_format,force_language,translate_to,translate_model,translate_via,user)
return items[0]
s = _load_settings()
if backend == "ollama" and not ollama_model.strip():
ollama_model = s.get("ocr_ollama_model","granite3.2-vision:latest")
if backend == "openrouter" and not openrouter_model.strip():
openrouter_model = s.get("openrouter_ocr_model","")
file_id = str(uuid.uuid4())
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
await _save(file, save_path)
file_size = os.path.getsize(save_path)
task = ocr_task.delay(
file_id, save_path, mode, backend,
ollama_model, openrouter_model,
s.get("openrouter_url",""), s.get("openrouter_api_key",""),
custom_prompt,
)
append_history({
"id": file_id, "task_id": task.id, "type": "ocr",
"status": "processing",
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"username": user["username"],
"input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()},
"settings": {
"backend": backend,
"mode": mode,
"ocr_lang": os.getenv("OCR_LANG","korean"),
"ollama_model": ollama_model if backend=="ollama" else "",
"openrouter_model": openrouter_model if backend=="openrouter" else "",
"ollama_timeout": s.get("ollama_timeout",600),
"custom_prompt": custom_prompt[:200] if custom_prompt else "",
},
"output": None,
})
return {"task_id": task.id, "file_id": file_id,
"filename": file.filename, "mode": mode, "backend": backend}
@app.post("/api/transcribe/batch")
async def transcribe_batch(
request:Request, files:List[UploadFile]=File(...),
use_ollama:str=Form("false"), ollama_model:str=Form(""),
use_openrouter:str=Form("false"), openrouter_model:str=Form(""),
subtitle_mode:str=Form("false"), subtitle_format:str=Form("srt"),
force_language:str=Form(""),
translate_to:str=Form(""), translate_model:str=Form(""), translate_via:str=Form("ollama"),
user:dict=Depends(require_stt),
):
if not files: raise HTTPException(400,"파일이 없습니다")
if len(files)>20: raise HTTPException(400,"한 번에 최대 20개까지 업로드할 수 있습니다")
items=await _dispatch_stt(request,files,use_ollama,ollama_model,use_openrouter,openrouter_model,
subtitle_mode,subtitle_format,force_language,translate_to,translate_model,translate_via,user)
return {"items":items,"total":len(items)}
# ════════════════════════════════════════════════════════════════
# 상태
# OCR 공통 디스패치
# ════════════════════════════════════════════════════════════════
async def _dispatch_ocr(request,files,mode,backend,ollama_model,openrouter_model,custom_prompt,user):
if mode not in ("text","structure"): mode="text"
if backend not in ("paddle","ollama","openrouter"): backend="paddle"
s=_load_settings()
if backend=="ollama" and not ollama_model.strip(): ollama_model=s.get("ocr_ollama_model","granite3.2-vision:latest")
if backend=="openrouter" and not openrouter_model.strip(): openrouter_model=s.get("openrouter_ocr_model","")
results=[]
for file in files:
_check_size(request)
ext=_ext(file.filename)
if ext not in IMAGE_EXT:
results.append({"error":f"{file.filename}: 지원하지 않는 형식","filename":file.filename}); continue
file_id=str(uuid.uuid4())
save_path=os.path.join(UPLOAD_DIR,f"{file_id}.{ext}")
await _save_upload(file,save_path); file_size=os.path.getsize(save_path)
task=ocr_task.delay(file_id,save_path,mode,backend,ollama_model,openrouter_model,
s.get("openrouter_url",""),s.get("openrouter_api_key",""),custom_prompt)
append_history({"id":file_id,"task_id":task.id,"type":"ocr","status":"processing",
"timestamp":datetime.now().strftime("%Y-%m-%d %H:%M:%S"),"username":user["username"],
"input":{"filename":file.filename,"size_bytes":file_size,"format":ext.upper()},
"settings":{"backend":backend,"mode":mode,"ocr_lang":os.getenv("OCR_LANG","korean"),
"ollama_model":ollama_model if backend=="ollama" else "",
"openrouter_model":openrouter_model if backend=="openrouter" else "",
"ollama_timeout":s.get("ollama_timeout",600),"custom_prompt":custom_prompt[:200] if custom_prompt else ""},
"output":None})
results.append({"task_id":task.id,"file_id":file_id,"filename":file.filename})
return results
@app.post("/api/ocr")
async def ocr(request:Request,file:UploadFile=File(...),
mode:str=Form("text"),backend:str=Form("paddle"),
ollama_model:str=Form(""),openrouter_model:str=Form(""),custom_prompt:str=Form(""),
user:dict=Depends(require_ocr)):
items=await _dispatch_ocr(request,[file],mode,backend,ollama_model,openrouter_model,custom_prompt,user)
return items[0]
@app.post("/api/ocr/batch")
async def ocr_batch(request:Request,files:List[UploadFile]=File(...),
mode:str=Form("text"),backend:str=Form("paddle"),
ollama_model:str=Form(""),openrouter_model:str=Form(""),custom_prompt:str=Form(""),
user:dict=Depends(require_ocr)):
if not files: raise HTTPException(400,"파일이 없습니다")
if len(files)>20: raise HTTPException(400,"한 번에 최대 20개까지")
items=await _dispatch_ocr(request,files,mode,backend,ollama_model,openrouter_model,custom_prompt,user)
return {"items":items,"total":len(items)}
# ════════════════════════════════════════════════════════════════
# 상태 / 이력 / 다운로드 / Ollama / OpenRouter / 설정 / 관리자
# ════════════════════════════════════════════════════════════════
@app.get("/api/status/{task_id}")
def get_status(task_id: str, user: dict = Depends(require_auth)):
r = celery_app.AsyncResult(task_id)
if r.state == "PENDING": return {"state":"pending", "progress":0, "message":"대기 중..."}
if r.state == "PROGRESS": m=r.info or {}; return {"state":"progress","progress":m.get("progress",0),"message":m.get("message","처리 중...")}
if r.state == "SUCCESS": _update_history_by_task(task_id, r.result or {}, True); return {"state":"success","progress":100,**(r.result or {})}
if r.state == "FAILURE": _update_history_by_task(task_id, {}, False, str(r.info)); return {"state":"failure","progress":0,"message":str(r.info)}
def get_status(task_id:str,user:dict=Depends(require_auth)):
r=celery_app.AsyncResult(task_id)
if r.state=="PENDING": return {"state":"pending","progress":0,"message":"대기 중..."}
if r.state=="PROGRESS": m=r.info or {};return {"state":"progress","progress":m.get("progress",0),"message":m.get("message","처리 중...")}
if r.state=="SUCCESS": _update_history_by_task(task_id,r.result or {},True);return {"state":"success","progress":100,**(r.result or {})}
if r.state=="FAILURE": _update_history_by_task(task_id,{},False,str(r.info));return {"state":"failure","progress":0,"message":str(r.info)}
return {"state":r.state.lower(),"progress":0}
# ════════════════════════════════════════════════════════════════
# 이력
# ════════════════════════════════════════════════════════════════
@app.get("/api/history")
def get_history(page:int=1,per_page:int=15,type_:str="",user:dict=Depends(require_auth)):
history = _load_history()
if user.get("role") != "admin": history = [h for h in history if h.get("username")==user["username"]]
if type_ in ("stt","ocr"): history = [h for h in history if h.get("type")==type_]
total = len(history); start = (page-1)*per_page
history=_load_history()
if user.get("role")!="admin": history=[h for h in history if h.get("username")==user["username"]]
if type_ in ("stt","ocr"): history=[h for h in history if h.get("type")==type_]
total=len(history);start=(page-1)*per_page
return {"total":total,"page":page,"per_page":per_page,"items":history[start:start+per_page]}
@app.delete("/api/history/{history_id}")
@@ -328,161 +366,87 @@ def delete_history(history_id:str,user:dict=Depends(require_auth)):
@app.delete("/api/history")
def clear_all_history(user:dict=Depends(require_admin)):
clear_history(); return {"ok":True}
clear_history();return {"ok":True}
# ════════════════════════════════════════════════════════════════
# 다운로드
# ════════════════════════════════════════════════════════════════
@app.get("/api/download/{filename}")
def download(filename:str,user:dict=Depends(require_auth)):
if ".." in filename or "/" in filename: raise HTTPException(400,"잘못된 파일명")
path = os.path.join(OUTPUT_DIR, filename)
path=os.path.join(OUTPUT_DIR,filename)
if not os.path.exists(path): raise HTTPException(404,"파일을 찾을 수 없습니다")
media = ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
if filename.endswith(".xlsx") else "text/plain")
return FileResponse(path, media_type=media, filename=filename)
if filename.endswith(".xlsx"):
media="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
elif filename.endswith(".srt"): media="text/plain"
elif filename.endswith(".vtt"): media="text/vtt"
else: media="text/plain"
return FileResponse(path,media_type=media,filename=filename)
# ════════════════════════════════════════════════════════════════
# Ollama 모델 목록
# ════════════════════════════════════════════════════════════════
@app.get("/api/ollama/models")
def ollama_models(user:dict=Depends(require_auth)):
try:
resp = httpx.get(f"{OLLAMA_URL}/api/tags", timeout=8.0); resp.raise_for_status()
resp=httpx.get(f"{OLLAMA_URL}/api/tags",timeout=8.0);resp.raise_for_status()
return {"models":[m["name"] for m in resp.json().get("models",[])], "connected":True}
except Exception as e:
return {"models":[], "connected":False, "error":str(e)}
except Exception as e: return {"models":[],"connected":False,"error":str(e)}
# ════════════════════════════════════════════════════════════════
# OpenRouter 모델 목록 & 연결 테스트
# ════════════════════════════════════════════════════════════════
@app.get("/api/openrouter/models")
def openrouter_models(user: dict = Depends(require_auth)):
s = _load_settings()
api_key = s.get("openrouter_api_key", "")
base_url = s.get("openrouter_url", "https://openrouter.ai/api/v1").rstrip("/")
if not api_key:
return {"models": [], "connected": False, "error": "API 키가 설정되지 않았습니다"}
def openrouter_models(user:dict=Depends(require_auth)):
s=_load_settings();api_key=s.get("openrouter_api_key","");base_url=s.get("openrouter_url","https://openrouter.ai/api/v1").rstrip("/")
if not api_key: return {"models":[],"connected":False,"error":"API 키가 설정되지 않았습니다"}
try:
resp = httpx.get(
f"{base_url}/models",
headers={"Authorization": f"Bearer {api_key}",
"HTTP-Referer": "https://voicescript.local"},
timeout=12.0,
)
resp=httpx.get(f"{base_url}/models",
headers={"Authorization":f"Bearer {api_key}","HTTP-Referer":"https://voicescript.local"},timeout=12.0)
resp.raise_for_status()
data = resp.json()
# Vision 모델 필터링 (multimodal 지원 모델)
all_models = data.get("data", [])
vision = [m["id"] for m in all_models
if any(k in str(m.get("architecture", {}).get("modality","")).lower()
for k in ["image","vision","multimodal"])
or any(k in m["id"].lower()
for k in ["vision","claude-3","gemini","gpt-4o","llava","pixtral","qwen-vl","intern","deepseek-vl"])]
text = [m["id"] for m in all_models if m["id"] not in vision]
return {
"models": [m["id"] for m in all_models],
"vision_models": vision,
"text_models": text,
"connected": True,
"total": len(all_models),
}
except httpx.HTTPStatusError as e:
return {"models":[], "connected":False, "error":f"HTTP {e.response.status_code}: API 키를 확인하세요"}
except Exception as e:
return {"models":[], "connected":False, "error":str(e)}
all_models=resp.json().get("data",[])
vision=[m["id"] for m in all_models if any(k in m["id"].lower()
for k in ["vision","claude-3","gemini","gpt-4o","llava","pixtral","qwen-vl","deepseek-vl"])]
return {"models":[m["id"] for m in all_models],"vision_models":vision,"connected":True,"total":len(all_models)}
except httpx.HTTPStatusError as e: return {"models":[],"connected":False,"error":f"HTTP {e.response.status_code}"}
except Exception as e: return {"models":[],"connected":False,"error":str(e)}
@app.post("/api/openrouter/test")
def openrouter_test(
api_key: str = Form(...),
base_url: str = Form("https://openrouter.ai/api/v1"),
user: dict = Depends(require_auth),
):
"""API 키 연결 테스트"""
def openrouter_test(api_key:str=Form(...),base_url:str=Form("https://openrouter.ai/api/v1"),user:dict=Depends(require_auth)):
try:
resp = httpx.get(
f"{base_url.rstrip('/')}/models",
headers={"Authorization": f"Bearer {api_key}",
"HTTP-Referer": "https://voicescript.local"},
timeout=10.0,
)
resp=httpx.get(f"{base_url.rstrip('/')}/models",
headers={"Authorization":f"Bearer {api_key}","HTTP-Referer":"https://voicescript.local"},timeout=10.0)
resp.raise_for_status()
count = len(resp.json().get("data", []))
return {"ok": True, "message": f"연결 성공 — {count}개 모델 사용 가능"}
except httpx.HTTPStatusError as e:
return {"ok": False, "message": f"인증 실패 (HTTP {e.response.status_code}) — API 키를 확인하세요"}
except Exception as e:
return {"ok": False, "message": f"연결 실패: {str(e)}"}
count=len(resp.json().get("data",[]));return {"ok":True,"message":f"연결 성공 — {count}개 모델 사용 가능"}
except httpx.HTTPStatusError as e: return {"ok":False,"message":f"인증 실패 (HTTP {e.response.status_code})"}
except Exception as e: return {"ok":False,"message":f"연결 실패: {str(e)}"}
# ════════════════════════════════════════════════════════════════
# 설정
# ════════════════════════════════════════════════════════════════
@app.get("/api/settings")
def get_settings(user: dict = Depends(require_auth)):
s = _load_settings()
# API 키는 마스킹해서 반환
result = dict(s)
def get_settings(user:dict=Depends(require_auth)):
s=_load_settings();result=dict(s)
if result.get("openrouter_api_key"):
key = result["openrouter_api_key"]
result["openrouter_api_key_masked"] = key[:8] + "..." + key[-4:] if len(key) > 12 else "****"
else:
result["openrouter_api_key_masked"] = ""
result["openrouter_api_key"] = "" # 평문은 반환 안 함
return result
key=result["openrouter_api_key"]
result["openrouter_api_key_masked"]=key[:8]+"..."+key[-4:] if len(key)>12 else "****"
else: result["openrouter_api_key_masked"]=""
result["openrouter_api_key"]="";return result
@app.post("/api/settings")
def save_settings_endpoint(
stt_ollama_model: str = Form(""),
ocr_ollama_model: str = Form(""),
cpu_threads: str = Form("0"),
stt_timeout: str = Form("0"),
ollama_timeout: str = Form("600"),
openrouter_url: str = Form("https://openrouter.ai/api/v1"),
openrouter_api_key: str = Form(""),
openrouter_stt_model: str = Form(""),
openrouter_ocr_model: str = Form(""),
user: dict = Depends(require_auth),
stt_ollama_model:str=Form(""),ocr_ollama_model:str=Form(""),
cpu_threads:str=Form("0"),stt_timeout:str=Form("0"),ollama_timeout:str=Form("600"),
openrouter_url:str=Form("https://openrouter.ai/api/v1"),openrouter_api_key:str=Form(""),
openrouter_stt_model:str=Form(""),openrouter_ocr_model:str=Form(""),
user:dict=Depends(require_auth),
):
def _int(v, d):
try: return max(0, int(v))
def _int(v,d):
try: return max(0,int(v))
except: return d
current=_load_settings()
final_key=openrouter_api_key.strip() if openrouter_api_key.strip() else current.get("openrouter_api_key","")
data={"stt_ollama_model":stt_ollama_model,"ocr_ollama_model":ocr_ollama_model,
"cpu_threads":_int(cpu_threads,0),"stt_timeout":_int(stt_timeout,0),"ollama_timeout":_int(ollama_timeout,600),
"openrouter_url":openrouter_url.strip() or "https://openrouter.ai/api/v1",
"openrouter_api_key":final_key,"openrouter_stt_model":openrouter_stt_model,"openrouter_ocr_model":openrouter_ocr_model}
_save_settings(data);return {"ok":True,"settings":{k:v for k,v in data.items() if k!="openrouter_api_key"}}
current = _load_settings()
# API 키가 비어있으면 기존 값 유지
final_key = openrouter_api_key.strip() if openrouter_api_key.strip() else current.get("openrouter_api_key","")
data = {
"stt_ollama_model": stt_ollama_model,
"ocr_ollama_model": ocr_ollama_model,
"cpu_threads": _int(cpu_threads, 0),
"stt_timeout": _int(stt_timeout, 0),
"ollama_timeout": _int(ollama_timeout, 600),
"openrouter_url": openrouter_url.strip() or "https://openrouter.ai/api/v1",
"openrouter_api_key": final_key,
"openrouter_stt_model": openrouter_stt_model,
"openrouter_ocr_model": openrouter_ocr_model,
}
_save_settings(data)
return {"ok": True, "settings": {k: v for k, v in data.items() if k != "openrouter_api_key"}}
# ════════════════════════════════════════════════════════════════
# 관리자
# ════════════════════════════════════════════════════════════════
@app.get("/api/admin/users")
def admin_list_users(user:dict=Depends(require_admin)): return {"users":list_users()}
@app.post("/api/admin/users")
def admin_create_user(
username:str=Form(...),password:str=Form(...),
def admin_create_user(username:str=Form(...),password:str=Form(...),
perm_stt:str=Form("false"),perm_ocr:str=Form("false"),
allowed_stt_models:str=Form(""),allowed_ocr_models:str=Form(""),
user:dict=Depends(require_admin),
):
allowed_stt_models:str=Form(""),allowed_ocr_models:str=Form(""),user:dict=Depends(require_admin)):
def _p(s): return [m.strip() for m in s.split(",") if m.strip()]
perms={"stt":perm_stt.lower()=="true","ocr":perm_ocr.lower()=="true",
"allowed_stt_models":_p(allowed_stt_models),"allowed_ocr_models":_p(allowed_ocr_models)}
@@ -491,11 +455,8 @@ def admin_create_user(
return {"ok":True,"message":msg}
@app.put("/api/admin/users/{username}")
def admin_update_user(
username:str,perm_stt:str=Form("false"),perm_ocr:str=Form("false"),
password:str=Form(""),allowed_stt_models:str=Form(""),allowed_ocr_models:str=Form(""),
user:dict=Depends(require_admin),
):
def admin_update_user(username:str,perm_stt:str=Form("false"),perm_ocr:str=Form("false"),
password:str=Form(""),allowed_stt_models:str=Form(""),allowed_ocr_models:str=Form(""),user:dict=Depends(require_admin)):
def _p(s): return [m.strip() for m in s.split(",") if m.strip()]
perms={"stt":perm_stt.lower()=="true","ocr":perm_ocr.lower()=="true",
"allowed_stt_models":_p(allowed_stt_models),"allowed_ocr_models":_p(allowed_ocr_models)}
@@ -505,7 +466,7 @@ def admin_update_user(
@app.delete("/api/admin/users/{username}")
def admin_delete_user(username:str,user:dict=Depends(require_admin)):
ok,msg=delete_user(username)
ok,msg=delete_user(username);
if not ok: raise HTTPException(400,msg)
return {"ok":True,"message":msg}
@@ -516,30 +477,29 @@ def cleanup(user:dict=Depends(require_auth)): return {"removed":_cleanup_outputs
# ════════════════════════════════════════════════════════════════
# 유틸
# ════════════════════════════════════════════════════════════════
def _check_size(request):
cl = request.headers.get("content-length")
if cl and int(cl) > MAX_UPLOAD_BYTES:
raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
def _check_size(request:Request):
cl=request.headers.get("content-length")
if cl and int(cl)>MAX_UPLOAD_BYTES*20: raise HTTPException(413,"파일이 너무 큽니다")
def _cleanup_outputs():
if OUTPUT_KEEP_SECS == 0: return 0
cutoff = time.time() - OUTPUT_KEEP_SECS; removed = 0
if OUTPUT_KEEP_SECS==0: return 0
cutoff=time.time()-OUTPUT_KEEP_SECS;removed=0
for f in glob.glob(os.path.join(OUTPUT_DIR,"*")):
try:
if os.path.getmtime(f) < cutoff: os.remove(f); removed += 1
if os.path.getmtime(f)<cutoff: os.remove(f);removed+=1
except: pass
return removed
def _ext(fn): return fn.rsplit(".",1)[-1].lower() if "." in fn else ""
async def _save(file, path):
written = 0
async def _save_upload(file:UploadFile,path:str):
written=0
async with aiofiles.open(path,"wb") as f:
while chunk := await file.read(1024*1024):
written += len(chunk)
if written > MAX_UPLOAD_BYTES:
await f.close(); os.remove(path)
raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
while chunk:=await file.read(1024*1024):
written+=len(chunk)
if written>MAX_UPLOAD_BYTES:
await f.close();os.remove(path)
raise HTTPException(413,f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
await f.write(chunk)
app.mount("/", StaticFiles(directory="static", html=True), name="static")

View File

@@ -1,8 +1,12 @@
"""
OCR Celery Tasks
backend: paddle | ollama | openrouter
OCR Celery Tasks — PaddleOCR 3.x + Ollama Vision + OpenRouter Vision
backend:
paddle → PaddleOCR 3.x 로컬 (PPStructure 제거됨, 표는 마크다운 파싱)
ollama → Ollama Vision API
openrouter → OpenRouter Vision API (OpenAI 호환)
"""
import os, base64
import os, base64, json
import httpx
from celery import Celery
import openpyxl
@@ -16,12 +20,15 @@ OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "600"))
celery_app = Celery("ocr_tasks", broker=REDIS_URL, backend=REDIS_URL)
celery_app.conf.update(
task_serializer="json", result_serializer="json",
accept_content=["json"], task_track_started=True, result_expires=3600,
task_serializer="json",
result_serializer="json",
accept_content=["json"],
task_track_started=True,
result_expires=3600,
)
_ocr_engine = None
_struct_engine = None
# PaddleOCR 싱글톤
_ocr_engine = None
def get_ocr():
global _ocr_engine
@@ -32,44 +39,45 @@ def get_ocr():
print("[PaddleOCR] 완료")
return _ocr_engine
def get_structure():
global _struct_engine
if _struct_engine is None:
from paddleocr import PPStructure
print("[PPStructure] 로딩")
_struct_engine = PPStructure(table=True, ocr=True, lang=OCR_LANG)
print("[PPStructure] 완료")
return _struct_engine
# ════════════════════════════════════════════════════════════════
# 메인 Task
# 메인 Celery Task
# 인자: file_id, image_path, mode, backend,
# ollama_model, openrouter_model,
# openrouter_url, openrouter_key,
# custom_prompt
# ════════════════════════════════════════════════════════════════
@celery_app.task(bind=True, name="tasks.ocr_task", queue="ocr")
def ocr_task(
self,
file_id: str,
image_path: str,
mode: str = "text",
backend: str = "paddle",
ollama_model: str = "granite3.2-vision",
openrouter_model: str = "",
openrouter_url: str = "",
openrouter_key: str = "",
custom_prompt: str = "",
mode: str = "text",
backend: str = "paddle",
ollama_model: str = "granite3.2-vision",
openrouter_model: str = "",
openrouter_url: str = "",
openrouter_key: str = "",
custom_prompt: str = "",
):
self.update_state(state="PROGRESS", meta={"progress":8,"message":"엔진 준비 중..."})
self.update_state(state="PROGRESS", meta={"progress": 8, "message": "엔진 준비 중..."})
try:
if backend == "openrouter":
result = _run_openrouter(self, file_id, image_path, mode,
openrouter_model, openrouter_url, openrouter_key, custom_prompt)
result = _run_openrouter(
self, file_id, image_path, mode,
openrouter_model, openrouter_url, openrouter_key, custom_prompt
)
elif backend == "ollama":
result = _run_ollama(self, file_id, image_path, mode, ollama_model, custom_prompt)
result = _run_ollama(
self, file_id, image_path, mode, ollama_model, custom_prompt
)
else:
result = _run_paddle(self, file_id, image_path, mode)
try: os.remove(image_path)
except: pass
return result
except Exception as e:
try: os.remove(image_path)
except: pass
@@ -77,36 +85,50 @@ def ocr_task(
# ════════════════════════════════════════════════════════════════
# OpenRouter Vision 백엔드 (OpenAI 호환)
# 공통 프롬프트
# ════════════════════════════════════════════════════════════════
_PROMPTS = {
"text": "이 이미지에서 모든 텍스트를 정확하게 추출해줘. 원본의 줄 구분과 단락 구조를 유지해줘.",
"structure": "이 이미지를 분석해서 표는 마크다운 표 형식으로, 나머지 텍스트는 원본 구조를 유지하며 추출해줘.",
}
_PROMPT_TEXT = (
"이 이미지에서 모든 텍스트를 정확하게 추출해줘. "
"원본의 줄 구분과 단락 구조를 최대한 유지해줘. "
"이미지에 없는 내용은 절대 추가하지 마."
)
_PROMPT_STRUCTURE = (
"이 이미지를 분석해서 다음을 수행해줘:\n"
"1. 표(table)가 있으면 반드시 마크다운 표 형식(| col | col |)으로 변환\n"
"2. 나머지 텍스트는 원본 구조를 유지하며 추출\n"
"3. 표와 텍스트를 구분해서 순서대로 출력\n"
"이미지에 없는 내용은 추가하지 마."
)
def _get_prompt(mode, custom_prompt):
if custom_prompt and custom_prompt.strip():
return custom_prompt.strip()
return _PROMPT_STRUCTURE if mode == "structure" else _PROMPT_TEXT
# ════════════════════════════════════════════════════════════════
# OpenRouter Vision 백엔드
# ════════════════════════════════════════════════════════════════
def _run_openrouter(task, file_id, image_path, mode,
model, base_url, api_key, custom_prompt):
if not api_key:
raise Exception("OpenRouter API 키가 설정되지 않았습니다")
raise Exception("OpenRouter API 키가 설정되지 않았습니다. 설정 → OpenRouter에서 저장하세요.")
if not model:
raise Exception("OpenRouter 모델이 선택되지 않았습니다")
raise Exception("OpenRouter 모델이 선택되지 않았습니다.")
task.update_state(state="PROGRESS",
meta={"progress":15,"message":f"OpenRouter ({model}) 연결 중..."})
meta={"progress": 15, "message": f"OpenRouter ({model}) 연결 중..."})
# 이미지 → base64 data URL
with open(image_path, "rb") as f:
raw = f.read()
# 이미지 MIME 타입 감지
ext = image_path.rsplit(".", 1)[-1].lower()
ext = image_path.rsplit(".", 1)[-1].lower()
mime = {"jpg":"image/jpeg","jpeg":"image/jpeg","png":"image/png",
"bmp":"image/bmp","gif":"image/gif","webp":"image/webp"}.get(ext, "image/jpeg")
b64 = base64.b64encode(raw).decode()
data_url = f"data:{mime};base64,{b64}"
data_url = f"data:{mime};base64,{base64.b64encode(raw).decode()}"
prompt = custom_prompt.strip() or _PROMPTS.get(mode, _PROMPTS["text"])
task.update_state(state="PROGRESS", meta={"progress":30,"message":"모델 추론 중..."})
prompt = _get_prompt(mode, custom_prompt)
task.update_state(state="PROGRESS", meta={"progress": 30, "message": "모델 추론 중..."})
try:
resp = httpx.post(
@@ -133,40 +155,31 @@ def _run_openrouter(task, file_id, image_path, mode,
resp.raise_for_status()
except httpx.HTTPStatusError as e:
body = ""
try: body = e.response.json().get("error",{}).get("message","")
try: body = e.response.json().get("error", {}).get("message", "")
except: pass
if e.response.status_code == 400:
raise Exception(f"이 모델은 이미지를 지원하지 않습니다 — Vision 모델을 선택하세요\n({model})")
raise Exception(
f"이 모델은 이미지를 지원하지 않습니다.\n"
f"Vision 기능을 지원하는 모델을 선택하세요 (Claude-3, GPT-4o, Gemini 등)\n"
f"모델: {model}"
)
raise Exception(f"OpenRouter 오류 ({e.response.status_code}): {body or str(e)}")
except httpx.TimeoutException:
raise Exception(f"OpenRouter 응답 시간 초과. OLLAMA_TIMEOUT 값을 늘려주세요.")
raise Exception(f"OpenRouter 응답 시간 초과 ({OLLAMA_TIMEOUT}초). OLLAMA_TIMEOUT 값을 늘려주세요.")
task.update_state(state="PROGRESS", meta={"progress":85,"message":"결과 저장 중..."})
task.update_state(state="PROGRESS", meta={"progress": 85, "message": "결과 저장 중..."})
full_text = resp.json()["choices"][0]["message"]["content"].strip()
choices = resp.json().get("choices", [])
if not choices:
raise Exception("OpenRouter 빈 응답")
full_text = choices[0]["message"]["content"].strip()
if not full_text:
raise Exception("OpenRouter 빈 응답")
tables = _parse_md_tables(full_text) if mode == "structure" else []
os.makedirs(OUTPUT_DIR, exist_ok=True)
txt_file = f"{file_id}_ocr.txt"
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
f.write(f"# OCR 결과 (OpenRouter / {model})\n\n{full_text}")
xlsx_file = None
if tables:
xlsx_file = f"{file_id}_tables.xlsx"
_save_excel(tables, os.path.join(OUTPUT_DIR, xlsx_file))
tables_html = [_md_table_to_html(t) for t in tables]
lines = [{"text":l,"confidence":1.0,"bbox":[]} for l in full_text.splitlines() if l.strip()]
return {
"mode": mode, "backend": "openrouter", "openrouter_model": model,
"ollama_model": "",
"full_text": full_text, "lines": lines, "line_count": len(lines),
"txt_file": txt_file,
"tables": [{"html":h,"rows":len(t),"cols":max(len(r) for r in t) if t else 0}
for h, t in zip(tables_html, tables)],
"xlsx_file": xlsx_file,
}
return _build_result(
task, file_id, full_text, mode,
backend="openrouter", ollama_model="", openrouter_model=model
)
# ════════════════════════════════════════════════════════════════
@@ -174,166 +187,226 @@ def _run_openrouter(task, file_id, image_path, mode,
# ════════════════════════════════════════════════════════════════
def _run_ollama(task, file_id, image_path, mode, ollama_model, custom_prompt):
task.update_state(state="PROGRESS",
meta={"progress":15,"message":f"Ollama ({ollama_model}) 연결 중..."})
meta={"progress": 15, "message": f"Ollama ({ollama_model}) 연결 중..."})
with open(image_path, "rb") as f:
img_b64 = base64.b64encode(f.read()).decode()
prompt = custom_prompt.strip() or _PROMPTS.get(mode, _PROMPTS["text"])
task.update_state(state="PROGRESS", meta={"progress":30,"message":"모델 추론 중..."})
prompt = _get_prompt(mode, custom_prompt)
task.update_state(state="PROGRESS", meta={"progress": 30, "message": "모델 추론 중..."})
try:
resp = httpx.post(f"{OLLAMA_URL}/api/chat", json={
"model": ollama_model,
"messages": [{"role":"user","content":prompt,"images":[img_b64]}],
"stream": False, "options": {"temperature":0.1},
}, timeout=float(OLLAMA_TIMEOUT))
resp = httpx.post(
f"{OLLAMA_URL}/api/chat",
json={
"model": ollama_model,
"messages": [{"role": "user", "content": prompt, "images": [img_b64]}],
"stream": False,
"options": {"temperature": 0.1},
},
timeout=float(OLLAMA_TIMEOUT),
)
resp.raise_for_status()
except httpx.ConnectError:
raise Exception(f"Ollama 서버 연결 실패 ({OLLAMA_URL})")
except httpx.TimeoutException:
raise Exception(f"Ollama 응답 시간 초과 ({OLLAMA_TIMEOUT}초)")
raise Exception(f"Ollama 응답 시간 초과 ({OLLAMA_TIMEOUT}초). 설정에서 타임아웃을 늘려주세요.")
task.update_state(state="PROGRESS", meta={"progress":85,"message":"결과 저장 중..."})
full_text = resp.json().get("message",{}).get("content","").strip()
if not full_text: raise Exception("Ollama 빈 응답. 모델이 Vision을 지원하는지 확인하세요.")
task.update_state(state="PROGRESS", meta={"progress": 85, "message": "결과 저장 중..."})
tables = _parse_md_tables(full_text) if mode == "structure" else []
os.makedirs(OUTPUT_DIR, exist_ok=True)
txt_file = f"{file_id}_ocr.txt"
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
f.write(f"# OCR 결과 (Ollama / {ollama_model})\n\n{full_text}")
xlsx_file = None
if tables:
xlsx_file = f"{file_id}_tables.xlsx"
_save_excel(tables, os.path.join(OUTPUT_DIR, xlsx_file))
tables_html = [_md_table_to_html(t) for t in tables]
lines = [{"text":l,"confidence":1.0,"bbox":[]} for l in full_text.splitlines() if l.strip()]
return {
"mode": mode, "backend": "ollama", "ollama_model": ollama_model,
"openrouter_model": "",
"full_text": full_text, "lines": lines, "line_count": len(lines),
"txt_file": txt_file,
"tables": [{"html":h,"rows":len(t),"cols":max(len(r) for r in t) if t else 0}
for h, t in zip(tables_html, tables)],
"xlsx_file": xlsx_file,
}
full_text = resp.json().get("message", {}).get("content", "").strip()
if not full_text:
raise Exception(
f"Ollama 빈 응답.\n"
f"이 모델이 Vision(이미지)을 지원하는지 확인하세요: {ollama_model}\n"
f"Vision 지원 모델: granite3.2-vision, llava 등"
)
return _build_result(
task, file_id, full_text, mode,
backend="ollama", ollama_model=ollama_model, openrouter_model=""
)
# ════════════════════════════════════════════════════════════════
# PaddleOCR 백엔드
# PaddleOCR 백엔드 (3.x — PPStructure 미사용)
# ════════════════════════════════════════════════════════════════
def _run_paddle(task, file_id, image_path, mode):
import cv2
img = cv2.imread(image_path)
if img is None: raise ValueError("이미지를 읽을 수 없습니다")
os.makedirs(OUTPUT_DIR, exist_ok=True)
return _paddle_structure(task, file_id, img) if mode == "structure" else _paddle_text(task, file_id, img)
if img is None:
raise ValueError("이미지를 읽을 수 없습니다. 지원 형식: jpg, png, bmp, tiff, webp")
task.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 인식 중..."})
os.makedirs(OUTPUT_DIR, exist_ok=True)
def _paddle_text(task, file_id, img):
task.update_state(state="PROGRESS", meta={"progress":30,"message":"텍스트 인식 중..."})
result = get_ocr().ocr(img)
task.update_state(state="PROGRESS", meta={"progress":80,"message":"결과 정리 중..."})
task.update_state(state="PROGRESS", meta={"progress": 80, "message": "결과 정리 중..."})
lines = []
if result and len(result) > 0:
r = result[0]
if isinstance(r, dict):
for text, conf in zip(r.get("rec_texts",[]), r.get("rec_scores",[])):
if text.strip(): lines.append({"text":text,"confidence":round(float(conf),3),"bbox":[]})
# PaddleOCR 3.x 딕셔너리 형태
texts = r.get("rec_texts", [])
scores = r.get("rec_scores", [])
polys = r.get("rec_polys", [None] * len(texts))
for text, conf, poly in zip(texts, scores, polys):
if text.strip():
lines.append({
"text": text,
"confidence": round(float(conf), 3),
"bbox": poly.tolist() if poly is not None and hasattr(poly, 'tolist') else [],
})
elif isinstance(r, list):
# 구버전 호환 [[bbox, (text, conf)], ...]
for item in r:
if item and len(item)==2:
_, (text, conf) = item
if text.strip(): lines.append({"text":text,"confidence":round(float(conf),3),"bbox":[]})
full_text = "\n".join(l["text"] for l in lines)
txt_file = f"{file_id}_ocr.txt"
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f: f.write(full_text)
return {"mode":"text","backend":"paddle","ollama_model":"","openrouter_model":"",
"full_text":full_text,"lines":lines,"line_count":len(lines),
"txt_file":txt_file,"tables":[],"xlsx_file":None}
if item and len(item) == 2:
bbox, (text, conf) = item
if text.strip():
lines.append({"text": text, "confidence": round(float(conf), 3), "bbox": []})
def _paddle_structure(task, file_id, img):
task.update_state(state="PROGRESS", meta={"progress":20,"message":"레이아웃 분석 중..."})
result = get_structure()(img)
task.update_state(state="PROGRESS", meta={"progress":60,"message":"표 구조 추출 중..."})
text_blocks, tables_html, tables_data = [], [], []
for region in result:
rtype = region.get("type","").lower()
if rtype == "table":
html = region.get("res",{}).get("html","")
if html: tables_html.append(html); tables_data.append(_html_table_to_list(html))
elif rtype in ("text","title","figure_caption"):
for line in (region.get("res",[]) or []):
if isinstance(line,(list,tuple)) and len(line)==2:
_, (text, _conf) = line; text_blocks.append(text)
full_text = "\n".join(text_blocks)
task.update_state(state="PROGRESS", meta={"progress":80,"message":"Excel 생성 중..."})
full_text = "\n".join(l["text"] for l in lines)
# structure 모드: 텍스트에서 마크다운 표 파싱 시도
tables = []
xlsx_file = None
if tables_data:
xlsx_file = f"{file_id}_tables.xlsx"
_save_excel(tables_data, os.path.join(OUTPUT_DIR, xlsx_file))
if mode == "structure":
tables = _parse_md_tables(full_text)
if tables:
xlsx_file = f"{file_id}_tables.xlsx"
_save_excel(tables, os.path.join(OUTPUT_DIR, xlsx_file))
txt_file = f"{file_id}_ocr.txt"
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
f.write("# 텍스트\n\n" + full_text)
lines = [{"text":t,"confidence":1.0,"bbox":[]} for t in text_blocks]
tables_meta = [{"html":h,"rows":len(d),"cols":max(len(r) for r in d) if d else 0}
for h, d in zip(tables_html, tables_data)]
return {"mode":"structure","backend":"paddle","ollama_model":"","openrouter_model":"",
"full_text":full_text,"lines":lines,"line_count":len(lines),
"txt_file":txt_file,"tables":tables_meta,"xlsx_file":xlsx_file}
f.write(full_text)
tables_html = [_md_table_to_html(t) for t in tables]
tables_meta = [{"html": h, "rows": len(t), "cols": max(len(r) for r in t) if t else 0}
for h, t in zip(tables_html, tables)]
return {
"mode": mode,
"backend": "paddle",
"ollama_model": "",
"openrouter_model": "",
"full_text": full_text,
"lines": lines,
"line_count": len(lines),
"txt_file": txt_file,
"tables": tables_meta,
"xlsx_file": xlsx_file,
}
# ════════════════════════════════════════════════════════════════
# 공통 유틸
# 공통 결과 빌더 (Ollama / OpenRouter 공용)
# ════════════════════════════════════════════════════════════════
def _parse_md_tables(text):
def _build_result(task, file_id, full_text, mode,
backend, ollama_model, openrouter_model):
"""마크다운 표 파싱 → Excel 생성 → 결과 딕셔너리 반환"""
os.makedirs(OUTPUT_DIR, exist_ok=True)
tables = _parse_md_tables(full_text) if mode == "structure" else []
txt_file = f"{file_id}_ocr.txt"
label = ollama_model if backend == "ollama" else openrouter_model
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
f.write(f"# OCR 결과 ({backend} / {label})\n\n{full_text}")
xlsx_file = None
if tables:
task.update_state(state="PROGRESS", meta={"progress": 92, "message": "Excel 생성 중..."})
xlsx_file = f"{file_id}_tables.xlsx"
_save_excel(tables, os.path.join(OUTPUT_DIR, xlsx_file))
tables_html = [_md_table_to_html(t) for t in tables]
tables_meta = [{"html": h, "rows": len(t), "cols": max(len(r) for r in t) if t else 0}
for h, t in zip(tables_html, tables)]
lines = [{"text": l, "confidence": 1.0, "bbox": []}
for l in full_text.splitlines() if l.strip()]
return {
"mode": mode,
"backend": backend,
"ollama_model": ollama_model,
"openrouter_model": openrouter_model,
"full_text": full_text,
"lines": lines,
"line_count": len(lines),
"txt_file": txt_file,
"tables": tables_meta,
"xlsx_file": xlsx_file,
}
# ════════════════════════════════════════════════════════════════
# 마크다운 표 파싱
# ════════════════════════════════════════════════════════════════
def _parse_md_tables(text: str) -> list:
"""텍스트에서 마크다운 표 추출 → [[row, row, ...], ...]"""
tables, current = [], []
for line in text.splitlines():
s = line.strip()
if s.startswith("|") and s.endswith("|"):
if all(c in "| -:" for c in s): continue
current.append([c.strip() for c in s.strip("|").split("|")])
# 구분선 (|---|---) 건너뜀
if all(c in "| -:" for c in s):
continue
cells = [c.strip() for c in s.strip("|").split("|")]
current.append(cells)
else:
if len(current) >= 2: tables.append(current)
if len(current) >= 2:
tables.append(current)
current = []
if len(current) >= 2: tables.append(current)
if len(current) >= 2:
tables.append(current)
return tables
def _md_table_to_html(table):
def _md_table_to_html(table: list) -> str:
if not table: return ""
rows = ""
for i, row in enumerate(table):
tag = "th" if i==0 else "td"
rows += "<tr>"+"".join(f"<{tag}>{c}</{tag}>" for c in row)+"</tr>"
tag = "th" if i == 0 else "td"
rows += "<tr>" + "".join(f"<{tag}>{c}</{tag}>" for c in row) + "</tr>"
return f"<table>{rows}</table>"
def _html_table_to_list(html):
from html.parser import HTMLParser
class P(HTMLParser):
def __init__(self):
super().__init__()
self.rows,self._row,self._cell,self._in=[],[],[],False
def handle_starttag(self,tag,attrs):
if tag=="tr": self._row=[]
elif tag in("td","th"): self._cell=[];self._in=True
def handle_endtag(self,tag):
if tag in("td","th"): self._row.append("".join(self._cell).strip());self._in=False
elif tag=="tr":
if self._row: self.rows.append(self._row)
def handle_data(self,data):
if self._in: self._cell.append(data)
p=P();p.feed(html);return p.rows
def _save_excel(tables, path):
wb=openpyxl.Workbook();wb.remove(wb.active)
for i,table in enumerate(tables,1):
ws=wb.create_sheet(f"{i}")
thin=Side(style="thin",color="2A2A33");bdr=Border(left=thin,right=thin,top=thin,bottom=thin)
for r_idx,row in enumerate(table,1):
for c_idx,val in enumerate(row,1):
cell=ws.cell(row=r_idx,column=c_idx,value=val)
cell.border=bdr;cell.alignment=Alignment(horizontal="center",vertical="center",wrap_text=True)
if r_idx==1: cell.fill=PatternFill("solid",fgColor="1A1A2E");cell.font=Font(color="00E5A0",bold=True,size=10)
else: cell.font=Font(size=10)
# ════════════════════════════════════════════════════════════════
# Excel 저장
# ════════════════════════════════════════════════════════════════
def _save_excel(tables: list, path: str):
wb = openpyxl.Workbook()
wb.remove(wb.active)
hfill = PatternFill("solid", fgColor="1A1A2E")
hfont = Font(color="00E5A0", bold=True, size=10)
cfont = Font(size=10)
center = Alignment(horizontal="center", vertical="center", wrap_text=True)
thin = Side(style="thin", color="2A2A33")
bdr = Border(left=thin, right=thin, top=thin, bottom=thin)
for i, table in enumerate(tables, 1):
ws = wb.create_sheet(f"{i}")
if not table:
continue
for r_idx, row in enumerate(table, 1):
for c_idx, val in enumerate(row, 1):
cell = ws.cell(row=r_idx, column=c_idx, value=val)
cell.border = bdr
cell.alignment = center
if r_idx == 1:
cell.fill = hfill
cell.font = hfont
else:
cell.font = cfont
if r_idx % 2 == 0:
cell.fill = PatternFill("solid", fgColor="0F0F14")
for col in ws.columns:
w=max((len(str(c.value or "")) for c in col),default=8)
ws.column_dimensions[col[0].column_letter].width=min(w+4,40)
if not wb.sheetnames: wb.create_sheet("Sheet1")
w = max((len(str(c.value or "")) for c in col), default=8)
ws.column_dimensions[col[0].column_letter].width = min(w + 4, 40)
if not wb.sheetnames:
wb.create_sheet("Sheet1")
wb.save(path)

View File

@@ -251,17 +251,29 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
.ollama-status{font-family:var(--mono);font-size:.63rem;padding:4px 9px;border-radius:2px}
.ollama-status.ok{background:rgba(0,229,160,.1);color:var(--accent);border:1px solid rgba(0,229,160,.2)}
.ollama-status.fail{background:rgba(255,107,53,.1);color:var(--warn);border:1px solid rgba(255,107,53,.2)}
.openrouter-status.ok{background:rgba(77,166,255,.1);color:var(--blue);border:1px solid rgba(77,166,255,.2)}
.openrouter-status.fail{background:rgba(255,107,53,.1);color:var(--warn);border:1px solid rgba(255,107,53,.2)}
.or-section{margin-top:10px;padding:12px;background:var(--surf2);border:1px solid #1c2840;border-radius:4px}
.key-input-wrap{display:flex;gap:6px;margin-top:6px}
.key-input-wrap input{flex:1;background:var(--surf);border:1px solid var(--border2);color:var(--text);padding:9px 10px;border-radius:3px;font-family:var(--mono);font-size:.78rem;outline:none;-webkit-appearance:none}
.key-input-wrap input:focus{border-color:var(--blue)}
.btn-test{padding:9px 14px;background:none;border:1px solid #3a7cc4;color:var(--blue);border-radius:3px;font-family:var(--mono);font-size:.68rem;cursor:pointer;white-space:nowrap;transition:all .15s}
.btn-test:hover{background:rgba(77,166,255,.08)}
.or-model-tabs{display:flex;gap:5px;margin-top:8px;flex-wrap:wrap}
.or-model-tab{font-family:var(--mono);font-size:.6rem;padding:4px 10px;border:1px solid var(--border2);background:none;color:var(--muted);border-radius:2px;cursor:pointer;transition:all .12s;text-transform:uppercase}
.or-model-tab.active{border-color:var(--blue);color:var(--blue);background:rgba(77,166,255,.07)}
/* ── 자막 모드 ── */
.sub-section{margin-top:12px;padding:12px;background:var(--surf2);border:1px solid #1c2840;border-radius:4px}
.sub-section-title{font-family:var(--mono);font-size:.6rem;letter-spacing:.1em;color:var(--blue);text-transform:uppercase;margin-bottom:10px;display:flex;align-items:center;gap:6px}
.lang-select{width:100%;background:var(--surf);border:1px solid var(--border2);color:var(--text);padding:9px 10px;border-radius:3px;font-family:var(--mono);font-size:.78rem;outline:none;cursor:pointer;appearance:none;-webkit-appearance:none;background-image:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='10' height='6'%3E%3Cpath d='M0 0l5 6 5-6z' fill='%2352526a'/%3E%3C/svg%3E");background-repeat:no-repeat;background-position:right 10px center;margin-top:4px}
.lang-select:focus{border-color:var(--blue)}
.fmt-btns{display:grid;grid-template-columns:1fr 1fr 1fr;gap:6px;margin-top:6px}
.fmt-btn{padding:7px;background:var(--surf);border:1px solid var(--border2);color:var(--muted);border-radius:3px;font-family:var(--mono);font-size:.68rem;cursor:pointer;transition:all .15s;text-align:center;text-transform:uppercase}
.fmt-btn.active{background:rgba(77,166,255,.08);border-color:#3a7cc4;color:var(--blue)}
.sub-dl-btn{flex:1;padding:8px;background:rgba(77,166,255,.07);border:1px solid #3a7cc4;color:var(--blue);border-radius:3px;font-family:var(--mono);font-size:.66rem;cursor:pointer;transition:all .15s;text-transform:uppercase}
.sub-dl-btn:hover{background:rgba(77,166,255,.15)}
/* ── 배치 큐 ── */
.batch-queue{margin-top:14px;display:flex;flex-direction:column;gap:6px;max-height:280px;overflow-y:auto}
.batch-item{display:grid;grid-template-columns:1fr auto auto;align-items:center;gap:8px;padding:9px 12px;background:var(--surf);border:1px solid var(--border2);border-radius:4px;transition:border-color .2s}
.batch-item.running{border-color:var(--accent2)}.batch-item.done{border-color:rgba(0,229,160,.3)}.batch-item.failed{border-color:rgba(255,107,53,.3)}.batch-item.waiting{opacity:.6}
.bi-name{font-family:var(--mono);font-size:.72rem;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}
.bi-status{font-family:var(--mono);font-size:.6rem;padding:3px 7px;border-radius:2px;white-space:nowrap}
.bi-status.waiting{background:rgba(255,255,255,.04);color:var(--muted);border:1px solid var(--border)}.bi-status.running{background:rgba(0,229,160,.07);color:var(--accent);border:1px solid rgba(0,229,160,.2)}.bi-status.done{background:rgba(0,229,160,.07);color:var(--accent2);border:1px solid rgba(0,229,160,.2)}.bi-status.failed{background:rgba(255,107,53,.07);color:var(--warn);border:1px solid rgba(255,107,53,.2)}
.bi-dl{font-family:var(--mono);font-size:.6rem;padding:3px 8px;border:1px solid var(--border2);background:none;color:var(--text);border-radius:2px;cursor:pointer;white-space:nowrap}.bi-dl:hover{border-color:var(--accent);color:var(--accent)}
.bi-prog{height:2px;background:var(--accent);border-radius:1px;transition:width .4s;margin-top:3px}
.batch-summary{font-family:var(--mono);font-size:.68rem;color:var(--muted);margin-top:8px;display:flex;gap:12px;flex-wrap:wrap}.batch-summary span{color:var(--text)}
.batch-add-btn{margin-top:8px;padding:7px 14px;background:none;border:1px dashed var(--border2);color:var(--muted);border-radius:3px;font-family:var(--mono);font-size:.68rem;cursor:pointer;width:100%;transition:all .15s}.batch-add-btn:hover{border-color:var(--accent);color:var(--accent)}
.batch-clear-btn{padding:7px 14px;background:none;border:1px solid var(--border2);color:var(--muted);border-radius:3px;font-family:var(--mono);font-size:.68rem;cursor:pointer;transition:all .15s}.batch-clear-btn:hover{border-color:var(--warn);color:var(--warn)}
.batch-actions{display:flex;gap:8px;margin-top:10px}
/* ── ADMIN ── */
#page-admin{display:none;flex-direction:column}
@@ -373,26 +385,65 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
<section class="panel">
<div class="panel-title">파일 업로드</div>
<div class="dropzone" id="stt-drop">
<input type="file" id="stt-input" accept=".mp3,.mp4,.wav,.m4a,.ogg,.flac,.aac,.wma,.webm,.mkv,.avi,.mov">
<input type="file" id="stt-input" accept=".mp3,.mp4,.wav,.m4a,.ogg,.flac,.aac,.wma,.webm,.mkv,.avi,.mov,.ts,.mts,.h264,.h265" multiple>
<span class="drop-icon">🎵</span>
<div class="drop-label"><strong>탭하거나 드래그하여 선택</strong><br>음성 또는 영상 파일</div>
<div class="drop-label"><strong>탭하거나 드래그하여 선택</strong><br>영상(mp4/mkv/h265 등) · 음성 복수 선택 가능</div>
<div class="drop-formats">mp3 · wav · m4a · ogg · flac · mp4 · webm</div>
</div>
<div class="file-info" id="stt-info"><div class="fname" id="stt-fname"></div><div class="fsize" id="stt-fsize"></div></div>
<!-- 배치 큐 -->
<div id="stt-queue" style="display:none">
<div class="batch-queue" id="stt-queue-list"></div>
<div class="batch-summary" id="stt-queue-summary"></div>
<div class="batch-actions">
<button class="batch-add-btn" onclick="document.getElementById('stt-input').click()">+ 파일 더 추가</button>
<button class="batch-clear-btn" id="stt-queue-clear">큐 초기화</button>
</div>
</div>
<div class="sec-label">STT 엔진</div>
<div class="engine-btns">
<button class="engine-btn active" data-engine="whisper"><span class="e-icon"></span><span class="e-name">faster-whisper</span><span class="e-desc">로컬 CPU 변환<br>빠르고 안정적</span></button>
<button class="engine-btn" data-engine="whisper+ollama"><span class="e-icon">🦙</span><span class="e-name">+ Ollama 교정</span><span class="e-desc">Whisper 후<br>Ollama 교정</span></button>
<button class="engine-btn" data-engine="whisper+openrouter" style="grid-column:1/-1"><span class="e-icon">🌐</span><span class="e-name">+ OpenRouter 교정</span><span class="e-desc">외부 AI 모델로 문장 부호·맞춤법 교정 (텍스트 전용 모델도 사용 가능)</span></button>
</div>
<div class="ollama-opts" id="stt-ollama-opts">
<div class="sec-label" style="margin-top:0">후처리 모델</div>
<select class="model-select" id="stt-ollama-model"><option value="">설정 기본 모델 사용</option></select>
</div>
<div class="ollama-opts" id="stt-or-opts">
<div class="sec-label" style="margin-top:0">OpenRouter 후처리 모델</div>
<select class="model-select" id="stt-or-model"><option value="">설정 기본 모델 사용</option></select>
<div style="font-family:var(--mono);font-size:.6rem;color:var(--muted);margin-top:5px">⚙️ 설정 → OpenRouter에서 API 키 및 기본 모델을 설정하세요</div>
<!-- 자막 모드 옵션 -->
<div class="sub-section" id="sub-section">
<div class="sub-section-title">🎬 자막 모드 (영상/음성 → 자막 파일)</div>
<label style="display:flex;align-items:center;gap:8px;cursor:pointer;font-family:var(--mono);font-size:.75rem;color:var(--text)">
<input type="checkbox" id="subtitle-mode" style="accent-color:var(--blue);width:15px;height:15px">
자막 파일 생성 (SRT / VTT)
</label>
<div id="sub-opts" style="display:none;margin-top:10px">
<div class="sec-label">음성 언어 (원어)</div>
<select class="lang-select" id="force-language">
<option value="">자동 감지</option>
</select>
<div class="sec-label">자막 포맷</div>
<div class="fmt-btns">
<button class="fmt-btn active" data-fmt="srt">SRT</button>
<button class="fmt-btn" data-fmt="vtt">VTT</button>
<button class="fmt-btn" data-fmt="both">둘 다</button>
</div>
<div class="sec-label">번역 (선택 — 빈칸이면 원어 자막)</div>
<select class="lang-select" id="translate-to">
<option value="">번역 안 함 (원어 자막)</option>
</select>
<div id="trans-model-wrap" style="display:none;margin-top:8px">
<div class="sec-label">번역 엔진</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:6px;margin-top:6px">
<button class="fmt-btn active" data-via="ollama" id="trans-via-ollama">🦙 Ollama</button>
<button class="fmt-btn" data-via="openrouter" id="trans-via-or">🌐 OpenRouter</button>
</div>
<div class="sec-label">번역 모델</div>
<select class="lang-select" id="translate-model">
<option value="">STT 엔진과 같은 모델 사용</option>
</select>
</div>
</div>
</div>
<button class="btn-start green" id="stt-btn" disabled>변환 시작</button>
<div class="prog-box" id="stt-prog">
@@ -421,7 +472,10 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
<div class="tab-content" id="stt-segs"><div class="segments-list" id="stt-seglist"></div></div>
<div class="result-actions" id="stt-actions">
<button class="btn-act" id="stt-copy">복사</button>
<button class="btn-act primary" id="stt-dl">TXT 저장</button>
<button class="btn-act primary" id="stt-dl">TXT</button>
<button class="sub-dl-btn" id="stt-dl-srt" style="display:none">SRT 저장</button>
<button class="sub-dl-btn" id="stt-dl-vtt" style="display:none">VTT 저장</button>
<button class="sub-dl-btn" id="stt-dl-srt-orig" style="display:none">원어 SRT</button>
<button class="btn-act" id="stt-new">새 파일</button>
</div>
</section>
@@ -434,18 +488,26 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
<section class="panel">
<div class="panel-title">이미지 업로드</div>
<div class="dropzone" id="ocr-drop">
<input type="file" id="ocr-input" accept=".jpg,.jpeg,.png,.bmp,.tiff,.tif,.webp,.gif">
<input type="file" id="ocr-input" accept=".jpg,.jpeg,.png,.bmp,.tiff,.tif,.webp,.gif" multiple>
<span class="drop-icon">🖼</span>
<div class="drop-label"><strong>탭하거나 드래그하여 선택</strong><br>이미지 파일</div>
<div class="drop-label"><strong>탭하거나 드래그하여 선택</strong><br>복수 이미지 동시 선택 가능</div>
<div class="drop-formats">jpg · png · bmp · tiff · webp · gif</div>
</div>
<div class="file-info" id="ocr-info"><div class="fname" id="ocr-fname"></div><div class="fsize" id="ocr-fsize"></div></div>
<div id="ocr-preview-wrap"><img id="ocr-preview"></div>
<!-- 배치 큐 -->
<div id="ocr-queue" style="display:none">
<div class="batch-queue" id="ocr-queue-list"></div>
<div class="batch-summary" id="ocr-queue-summary"></div>
<div class="batch-actions">
<button class="batch-add-btn" onclick="document.getElementById('ocr-input').click()">+ 파일 더 추가</button>
<button class="batch-clear-btn" id="ocr-queue-clear">큐 초기화</button>
</div>
</div>
<div class="sec-label">OCR 엔진</div>
<div class="engine-btns">
<button class="engine-btn active" data-engine="paddle"><span class="e-icon">🐾</span><span class="e-name">PaddleOCR</span><span class="e-desc">로컬 실행<br>표 구조 분석</span></button>
<button class="engine-btn" data-engine="ollama"><span class="e-icon">🦙</span><span class="e-name">Ollama Vision</span><span class="e-desc">자연어 지시<br>커스텀 프롬프트</span></button>
<button class="engine-btn" data-engine="openrouter" style="grid-column:1/-1"><span class="e-icon">🌐</span><span class="e-name">OpenRouter Vision</span><span class="e-desc">Claude / GPT-4o / Gemini 등 외부 Vision 모델 사용</span></button>
</div>
<div class="ollama-opts" id="ocr-ollama-opts">
<div class="sec-label" style="margin-top:0">Vision 모델</div>
@@ -453,13 +515,6 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
<span class="cprompt-toggle" id="cprompt-toggle">▶ 커스텀 프롬프트</span>
<textarea class="cprompt" id="custom-prompt" placeholder="예: 이 영수증의 품목과 금액을 JSON으로 추출해줘"></textarea>
</div>
<div class="ollama-opts" id="ocr-or-opts">
<div class="sec-label" style="margin-top:0">OpenRouter Vision 모델</div>
<select class="model-select" id="ocr-or-model"><option value="">설정 기본 모델 사용</option></select>
<span class="cprompt-toggle" id="cprompt-toggle-or">▶ 커스텀 프롬프트</span>
<textarea class="cprompt" id="custom-prompt-or" placeholder="예: 이 영수증의 품목과 금액을 JSON으로 추출해줘"></textarea>
<div style="font-family:var(--mono);font-size:.6rem;color:var(--muted);margin-top:5px">⚠️ Vision 기능을 지원하는 모델만 이미지 처리 가능 (Claude-3, GPT-4o, Gemini 등)</div>
</div>
<div class="sec-label">인식 모드</div>
<div class="mode-btns">
<button class="mode-btn active" data-mode="text">📄 텍스트 추출</button>
@@ -585,40 +640,6 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
<select class="settings-select" id="setting-ocr-model" style="margin-top:8px"><option value="">(없음)</option></select>
</div>
<!-- OpenRouter -->
<div class="settings-section">
<h3>🌐 OpenRouter 외부 AI 연동</h3>
<label class="settings-label">API 키<small>openrouter.ai에서 발급 — 저장 후 "연결 테스트"로 확인</small></label>
<div class="key-input-wrap">
<input type="password" id="or-api-key" placeholder="sk-or-v1-..." autocomplete="off">
<button class="btn-test" id="btn-or-test">연결 테스트</button>
</div>
<div id="or-test-result" style="font-family:var(--mono);font-size:.68rem;margin-top:6px;display:none"></div>
<label class="settings-label" style="margin-top:12px">API URL<small>기본값 사용 권장</small></label>
<input type="text" id="or-url" value="https://openrouter.ai/api/v1"
style="width:100%;background:var(--surf2);border:1px solid var(--border2);color:var(--text);padding:9px 10px;border-radius:3px;font-family:var(--mono);font-size:.75rem;outline:none;margin-top:6px">
<div id="or-models-wrap" style="display:none;margin-top:14px">
<div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;flex-wrap:wrap">
<span id="or-connected-badge" class="openrouter-status ok"></span>
<div class="or-model-tabs">
<button class="or-model-tab active" data-filter="vision">Vision 모델</button>
<button class="or-model-tab" data-filter="text">텍스트 모델</button>
<button class="or-model-tab" data-filter="all">전체</button>
</div>
</div>
<label class="settings-label">STT 교정 기본 모델<small>텍스트 전용 모델도 사용 가능</small></label>
<select class="settings-select" id="setting-or-stt-model" style="margin-top:6px">
<option value="">(없음)</option>
</select>
<label class="settings-label" style="margin-top:10px">OCR 기본 Vision 모델<small>반드시 Vision 지원 모델 선택</small></label>
<select class="settings-select" id="setting-or-ocr-model" style="margin-top:6px">
<option value="">(없음)</option>
</select>
</div>
</div>
<div style="display:flex;gap:10px;justify-content:flex-end;align-items:center">
<div id="settings-msg" style="font-family:var(--mono);font-size:.68rem;color:var(--accent);display:none">✓ 저장됨 (CPU·타임아웃: worker 재시작 후 반영)</div>
<button class="btn-settings blue" id="btn-save-settings">저장</button>
@@ -702,7 +723,6 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
// ══ STATE ══
let token=null,currentUser=null,ollamaModels=[],appSettings={};
let sttFile=null,sttOutputFile=null,sttEngine='whisper';
let orModels=[],orVisionModels=[],orTextModels=[];
let ocrFile=null,ocrOutputTxt=null,ocrOutputXlsx=null,ocrEngine='paddle',ocrMode='text';
let editTarget=null,sysTimer=null;
let histPage=1,histType='',histTotal=0;
@@ -781,7 +801,6 @@ function populateModelSelects(){
fill(document.getElementById('ocr-ollama-model'),appSettings.ocr_ollama_model,'설정 기본 모델 사용');
fill(document.getElementById('setting-stt-model'),appSettings.stt_ollama_model,'(없음)');
fill(document.getElementById('setting-ocr-model'),appSettings.ocr_ollama_model,'(없음)');
// OpenRouter 드롭다운
populateOrSelects();
}
@@ -791,11 +810,7 @@ async function loadSettings(){
const th=appSettings.cpu_threads||0;cpuSlider.value=th;cpuDisplay.textContent=th===0?'0 (자동)':th+' 스레드';
document.getElementById('stt-timeout').value=appSettings.stt_timeout||0;
document.getElementById('ollama-timeout').value=appSettings.ollama_timeout||600;
if(appSettings.openrouter_url)document.getElementById('or-url').value=appSettings.openrouter_url;
if(appSettings.openrouter_api_key_masked)document.getElementById('or-api-key').placeholder='저장된 키: '+appSettings.openrouter_api_key_masked;
populateModelSelects();
// 기존 OR 모델 로드
if(appSettings.openrouter_api_key_masked)loadOrModels();}catch{}
populateModelSelects()}catch{}
}
document.getElementById('btn-save-settings').addEventListener('click',async()=>{
const fd=new FormData();
@@ -804,10 +819,10 @@ document.getElementById('btn-save-settings').addEventListener('click',async()=>{
fd.append('cpu_threads',cpuSlider.value);
fd.append('stt_timeout',document.getElementById('stt-timeout').value||'0');
fd.append('ollama_timeout',document.getElementById('ollama-timeout').value||'600');
fd.append('openrouter_url',document.getElementById('or-url').value||'https://openrouter.ai/api/v1');
const orKey=document.getElementById('or-api-key').value.trim();if(orKey)fd.append('openrouter_api_key',orKey);
fd.append('openrouter_stt_model',document.getElementById('setting-or-stt-model').value);
fd.append('openrouter_ocr_model',document.getElementById('setting-or-ocr-model').value);
fd.append('openrouter_url',document.getElementById('or-url')?.value||'https://openrouter.ai/api/v1');
const orKey=document.getElementById('or-api-key')?.value?.trim();if(orKey)fd.append('openrouter_api_key',orKey);
fd.append('openrouter_stt_model',document.getElementById('setting-or-stt-model')?.value||'');
fd.append('openrouter_ocr_model',document.getElementById('setting-or-ocr-model')?.value||'');
try{const r=await api('POST','/api/settings',fd);if(r.ok){appSettings=(await r.json()).settings;const msg=document.getElementById('settings-msg');msg.style.display='block';setTimeout(()=>msg.style.display='none',3500)}}catch{}
});
document.getElementById('btn-refresh-models').addEventListener('click',loadOllamaModels);
@@ -825,71 +840,286 @@ document.querySelectorAll('.nav-tab').forEach(btn=>{
});
});
// ══ STT ══
// ══ STT — 배치 + 자막 ══
const sttDrop=document.getElementById('stt-drop'),sttInput=document.getElementById('stt-input');
sttInput.addEventListener('change',()=>setSttFile(sttInput.files[0]));
let sttQueue=[],sttSubFmt='srt',sttTransVia='ollama';
let languages={};
// 언어 목록 로드
async function loadLanguages(){
try{const r=await api('GET','/api/languages');const d=await r.json();languages=d.languages||{};
const sel1=document.getElementById('force-language');
const sel2=document.getElementById('translate-to');
Object.entries(languages).forEach(([code,name])=>{
sel1.appendChild(Object.assign(document.createElement('option'),{value:code,textContent:`${name} (${code})`}));
sel2.appendChild(Object.assign(document.createElement('option'),{value:code,textContent:`${name} (${code})`}));
});
}catch{}
}
// 번역 모델 드롭다운 채우기
function fillTranslateModels(){
const sel=document.getElementById('translate-model');
const cur=sel.value;sel.innerHTML='<option value="">STT 엔진과 같은 모델 사용</option>';
const models=sttTransVia==='openrouter'?orModels:ollamaModels;
models.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;sel.appendChild(o)});
}
// 자막 모드 토글
document.getElementById('subtitle-mode').addEventListener('change',function(){
document.getElementById('sub-opts').style.display=this.checked?'block':'none';
});
// 포맷 버튼
document.querySelectorAll('.fmt-btn[data-fmt]').forEach(btn=>{
btn.addEventListener('click',()=>{document.querySelectorAll('.fmt-btn[data-fmt]').forEach(b=>b.classList.remove('active'));btn.classList.add('active');sttSubFmt=btn.dataset.fmt});
});
// 번역 언어 선택 → 모델 옵션 표시
document.getElementById('translate-to').addEventListener('change',function(){
document.getElementById('trans-model-wrap').style.display=this.value?'block':'none';
if(this.value)fillTranslateModels();
});
// 번역 엔진 선택
document.querySelectorAll('button[data-via]').forEach(btn=>{
btn.addEventListener('click',()=>{
document.querySelectorAll('button[data-via]').forEach(b=>b.classList.remove('active'));
btn.classList.add('active');sttTransVia=btn.dataset.via;fillTranslateModels();
});
});
// 파일 추가
function addSttFiles(fileList){
const AUDIO=['mp3','mp4','wav','m4a','ogg','flac','aac','wma','webm','mkv','avi','mov','ts','mts','h264','h265'];
const files=Array.from(fileList).filter(f=>AUDIO.includes(f.name.split('.').pop().toLowerCase()));
if(!files.length)return;
files.forEach(f=>sttQueue.push({file:f,taskId:null,outputFile:null,srtFile:null,vttFile:null,srtOrigFile:null,status:'waiting',el:null}));
renderSttQueue();document.getElementById('stt-btn').disabled=false;
}
sttInput.addEventListener('change',()=>addSttFiles(sttInput.files));
sttDrop.addEventListener('dragover',e=>{e.preventDefault();sttDrop.classList.add('dragover')});
sttDrop.addEventListener('dragleave',()=>sttDrop.classList.remove('dragover'));
sttDrop.addEventListener('drop',e=>{e.preventDefault();sttDrop.classList.remove('dragover');setSttFile(e.dataTransfer.files[0])});
function setSttFile(f){if(!f)return;sttFile=f;showFileInfo('stt',f);document.getElementById('stt-btn').disabled=false;document.getElementById('stt-err').style.display='none'}
document.querySelectorAll('#page-stt .engine-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('#page-stt .engine-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');sttEngine=btn.dataset.engine;document.getElementById('stt-ollama-opts').classList.toggle('visible',sttEngine==='whisper+ollama');document.getElementById('stt-or-opts').classList.toggle('visible',sttEngine==='whisper+openrouter');const isOr=sttEngine==='whisper+openrouter',isOllama=sttEngine==='whisper+ollama';document.getElementById('stt-btn').className='btn-start '+(isOr||isOllama?'purple':'green')})});
document.getElementById('stt-btn').addEventListener('click',async()=>{
if(!sttFile)return;document.getElementById('stt-err').style.display='none';setSttLoading(true);
const fd=new FormData();fd.append('file',sttFile);fd.append('use_ollama',sttEngine==='whisper+ollama'?'true':'false');fd.append('ollama_model',document.getElementById('stt-ollama-model').value||'');
fd.append('use_openrouter',sttEngine==='whisper+openrouter'?'true':'false');fd.append('openrouter_model',document.getElementById('stt-or-model').value||'');
try{const r=await api('POST','/api/transcribe',fd);const d=await r.json();if(!r.ok)throw new Error(d.detail||'업로드 실패');pollTask(d.task_id,dt=>setProg('stt',dt.progress||0,dt.message||'처리 중...'),showSttResult,e=>{showErr('stt-err',e);setSttLoading(false)})}
catch(e){showErr('stt-err',e.message);setSttLoading(false)}
sttDrop.addEventListener('drop',e=>{e.preventDefault();sttDrop.classList.remove('dragover');addSttFiles(e.dataTransfer.files)});
document.getElementById('stt-queue-clear').addEventListener('click',()=>{sttQueue=[];renderSttQueue();document.getElementById('stt-btn').disabled=true});
function renderSttQueue(){
const qEl=document.getElementById('stt-queue'),list=document.getElementById('stt-queue-list'),sum=document.getElementById('stt-queue-summary');
if(!sttQueue.length){qEl.style.display='none';return}
qEl.style.display='block';list.innerHTML='';
sttQueue.forEach((item,i)=>{
const div=document.createElement('div');div.className='batch-item '+item.status;
const dlBtns=item.status==='done'?[
item.outputFile?`<button class="bi-dl" onclick="dlFile('${esc(item.outputFile)}')">TXT</button>`:'',
item.srtFile?`<button class="bi-dl" onclick="dlFile('${esc(item.srtFile)}')">SRT</button>`:'',
item.vttFile?`<button class="bi-dl" onclick="dlFile('${esc(item.vttFile)}')">VTT</button>`:'',
item.srtOrigFile?`<button class="bi-dl" onclick="dlFile('${esc(item.srtOrigFile)}')">원어SRT</button>`:'',
].filter(Boolean).join(''):''
div.innerHTML=`<div><div class="bi-name">${esc(item.file.name)}</div><div class="bi-prog" id="stt-bp-${i}" style="width:0%;display:${item.status==='running'?'block':'none'}"></div></div><span class="bi-status ${item.status}">${{waiting:'대기',running:'변환중',done:'완료',failed:'실패'}[item.status]}</span><span style="display:flex;gap:3px">${dlBtns}</span>`;
item.el=div;list.appendChild(div);
});
const done=sttQueue.filter(i=>i.status==='done').length,failed=sttQueue.filter(i=>i.status==='failed').length,running=sttQueue.filter(i=>i.status==='running').length;
sum.innerHTML=`전체 <span>${sttQueue.length}</span>개 · 완료 <span>${done}</span> · 실패 <span>${failed}</span>${running?` · 진행중 <span>${running}</span>`:''}`;
}
// 엔진 버튼
document.querySelectorAll('#page-stt .engine-btn').forEach(btn=>{
btn.addEventListener('click',()=>{
document.querySelectorAll('#page-stt .engine-btn').forEach(b=>b.classList.remove('active'));
btn.classList.add('active');sttEngine=btn.dataset.engine;
document.getElementById('stt-ollama-opts').classList.toggle('visible',sttEngine==='whisper+ollama');
document.getElementById('stt-or-opts').classList.toggle('visible',sttEngine==='whisper+openrouter');
document.getElementById('stt-btn').className='btn-start '+(sttEngine!=='whisper'?'purple':'green');
});
});
document.getElementById('stt-btn').addEventListener('click',async()=>{
const pending=sttQueue.filter(i=>i.status==='waiting');
if(!pending.length){showErr('stt-err','변환할 파일이 없습니다');return}
document.getElementById('stt-err').style.display='none';
document.getElementById('stt-btn').disabled=true;
document.getElementById('stt-prog').style.display='block';
setProg('stt',0,`${pending.length}개 파일 업로드 중...`);
const subMode=document.getElementById('subtitle-mode').checked;
const fd=new FormData();
pending.forEach(item=>fd.append('files',item.file));
fd.append('use_ollama',sttEngine==='whisper+ollama'?'true':'false');
fd.append('ollama_model',document.getElementById('stt-ollama-model')?.value||'');
fd.append('use_openrouter',sttEngine==='whisper+openrouter'?'true':'false');
fd.append('openrouter_model',document.getElementById('stt-or-model')?.value||'');
fd.append('subtitle_mode',subMode?'true':'false');
fd.append('subtitle_format',sttSubFmt);
fd.append('force_language',document.getElementById('force-language').value||'');
fd.append('translate_to',document.getElementById('translate-to').value||'');
fd.append('translate_model',document.getElementById('translate-model').value||'');
fd.append('translate_via',sttTransVia);
try{
const url=pending.length===1?'/api/transcribe':'/api/transcribe/batch';
const r=await api('POST',url,fd);const d=await r.json();
if(!r.ok)throw new Error(d.detail||'업로드 실패');
const items=pending.length===1?[d]:(d.items||[]);
let pi=0;
sttQueue.forEach((qItem,qi)=>{
if(qItem.status!=='waiting')return;
const taskItem=items[pi++];if(!taskItem)return;
if(taskItem.error){qItem.status='failed';return}
qItem.status='running';qItem.taskId=taskItem.task_id;renderSttQueue();
pollSttItem(qi,taskItem.task_id);
});
setProg('stt',20,`${items.length}개 변환 중...`);
}catch(e){showErr('stt-err',e.message);document.getElementById('stt-btn').disabled=false;document.getElementById('stt-prog').style.display='none'}
});
function pollSttItem(qi,taskId){
const t=setInterval(async()=>{
try{
const r=await api('GET','/api/status/'+taskId);if(r.status===401){clearInterval(t);showLogin();return}
const d=await r.json();
if(d.state==='success'){
clearInterval(t);
const item=sttQueue[qi];
item.outputFile=d.output_file||null;item.srtFile=d.srt_file||null;
item.vttFile=d.vtt_file||null;item.srtOrigFile=d.srt_original_file||null;
item.status='done';renderSttQueue();checkSttBatchDone();
if(sttQueue.filter(i=>i.status!=='waiting'&&i.status!=='done'&&i.status!=='failed').length===0&&
sttQueue.filter(i=>i.status==='done').length===1) showSttResult(d);
} else if(d.state==='failure'){
clearInterval(t);sttQueue[qi].status='failed';renderSttQueue();checkSttBatchDone();
} else {
const done=sttQueue.filter(i=>i.status==='done').length;
setProg('stt',20+Math.round((done/sttQueue.length)*75),d.message||'처리 중...');
}
}catch{}
},1800);
}
function checkSttBatchDone(){
if(sttQueue.every(i=>['done','failed','waiting'].includes(i.status))){
const done=sttQueue.filter(i=>i.status==='done').length;
setProg('stt',100,`완료 ${done}/${sttQueue.length}`);
setTimeout(()=>document.getElementById('stt-prog').style.display='none',2000);
document.getElementById('stt-btn').disabled=false;
}
}
function setSttLoading(on){document.getElementById('stt-btn').disabled=on;document.getElementById('stt-prog').style.display=on?'block':'none';if(on)setProg('stt',0,'준비 중...')}
function showSttResult(d){
sttOutputFile=d.output_file;
document.getElementById('stt-mlang').textContent=(d.language||'').toUpperCase();
document.getElementById('stt-mlang').textContent=((d.language||'')+( d.translated?' → '+d.translate_to:'')).toUpperCase();
document.getElementById('stt-mdur').textContent=fmtDur(d.duration);
document.getElementById('stt-msegs').textContent=(d.segments||[]).length+'개';
const chip=document.getElementById('stt-mollama-chip');if(d.ollama_used){chip.style.display='block';document.getElementById('stt-mollama').textContent=d.ollama_model}else chip.style.display='none';
const chip=document.getElementById('stt-mollama-chip');
if(d.ollama_used){chip.style.display='block';document.getElementById('stt-mollama').textContent=d.ollama_model}else chip.style.display='none';
document.getElementById('stt-meta').style.display='flex';document.getElementById('stt-tabs').style.display='flex';
document.getElementById('stt-empty').style.display='none';document.getElementById('stt-result').style.display='block';document.getElementById('stt-result').value=d.text||'';
const sl=document.getElementById('stt-seglist');sl.innerHTML='';
(d.segments||[]).forEach(s=>{const row=document.createElement('div');row.className='seg-item';row.innerHTML=`<div class="seg-time">${fmtTime(s.start)}<br>→${fmtTime(s.end)}</div><div class="seg-text">${esc(s.text)}</div>`;sl.appendChild(row)});
document.getElementById('stt-actions').style.display='flex';setSttLoading(false);
document.getElementById('stt-actions').style.display='flex';
// 자막 다운로드 버튼
const srtBtn=document.getElementById('stt-dl-srt'),vttBtn=document.getElementById('stt-dl-vtt'),origBtn=document.getElementById('stt-dl-srt-orig');
srtBtn.style.display=d.srt_file?'flex':'none';if(d.srt_file)srtBtn.onclick=()=>dlFile(d.srt_file);
vttBtn.style.display=d.vtt_file?'flex':'none';if(d.vtt_file)vttBtn.onclick=()=>dlFile(d.vtt_file);
origBtn.style.display=d.srt_original_file?'flex':'none';if(d.srt_original_file)origBtn.onclick=()=>dlFile(d.srt_original_file);
setSttLoading(false);
}
document.getElementById('stt-copy').addEventListener('click',()=>copyText(document.getElementById('stt-result').value,document.getElementById('stt-copy')));
document.getElementById('stt-dl').addEventListener('click',()=>dlFile(sttOutputFile));
document.getElementById('stt-new').addEventListener('click',()=>{sttFile=null;sttInput.value='';sttOutputFile=null;['stt-info','stt-prog','stt-err','stt-meta','stt-tabs','stt-actions'].forEach(id=>document.getElementById(id).style.display='none');document.getElementById('stt-empty').style.display='flex';document.getElementById('stt-result').style.display='none';document.getElementById('stt-result').value='';document.getElementById('stt-seglist').innerHTML='';document.getElementById('stt-btn').disabled=true;resetTabs('stt-tabs')});
document.getElementById('stt-new').addEventListener('click',()=>{
sttQueue=[];sttInput.value='';sttOutputFile=null;renderSttQueue();
['stt-prog','stt-err','stt-meta','stt-tabs','stt-actions'].forEach(id=>document.getElementById(id).style.display='none');
document.getElementById('stt-empty').style.display='flex';
document.getElementById('stt-result').style.display='none';document.getElementById('stt-result').value='';
document.getElementById('stt-seglist').innerHTML='';document.getElementById('stt-btn').disabled=true;resetTabs('stt-tabs');
['stt-dl-srt','stt-dl-vtt','stt-dl-srt-orig'].forEach(id=>document.getElementById(id).style.display='none');
});
// ══ OCR ══
// ══ OCR — 배치 ══
const ocrDrop=document.getElementById('ocr-drop'),ocrInput=document.getElementById('ocr-input');
ocrInput.addEventListener('change',()=>setOcrFile(ocrInput.files[0]));
let ocrQueue=[];
function addOcrFiles(fileList){
const IMG=['jpg','jpeg','png','bmp','tiff','tif','webp','gif'];
const files=Array.from(fileList).filter(f=>IMG.includes(f.name.split('.').pop().toLowerCase()));
if(!files.length)return;
files.forEach(f=>ocrQueue.push({file:f,taskId:null,txtFile:null,xlsxFile:null,status:'waiting',el:null}));
renderOcrQueue();document.getElementById('ocr-btn').disabled=false;
}
ocrInput.addEventListener('change',()=>addOcrFiles(ocrInput.files));
ocrDrop.addEventListener('dragover',e=>{e.preventDefault();ocrDrop.classList.add('dragover')});
ocrDrop.addEventListener('dragleave',()=>ocrDrop.classList.remove('dragover'));
ocrDrop.addEventListener('drop',e=>{e.preventDefault();ocrDrop.classList.remove('dragover');setOcrFile(e.dataTransfer.files[0])});
function setOcrFile(f){if(!f)return;ocrFile=f;showFileInfo('ocr',f);document.getElementById('ocr-btn').disabled=false;document.getElementById('ocr-err').style.display='none';const p=document.getElementById('ocr-preview'),w=document.getElementById('ocr-preview-wrap');p.src=URL.createObjectURL(f);w.style.display='block'}
document.querySelectorAll('#page-ocr .engine-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('#page-ocr .engine-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');ocrEngine=btn.dataset.engine;document.getElementById('ocr-ollama-opts').classList.toggle('visible',ocrEngine==='ollama');document.getElementById('ocr-or-opts').classList.toggle('visible',ocrEngine==='openrouter');const isOr=ocrEngine==='openrouter',isOllama=ocrEngine==='ollama';document.getElementById('ocr-btn').className='btn-start '+(isOr||isOllama?'purple':'green')})});
document.getElementById('cprompt-toggle').addEventListener('click',()=>{const ta=document.getElementById('custom-prompt');const open=ta.style.display!=='block';ta.style.display=open?'block':'none';document.getElementById('cprompt-toggle').textContent=(open?'▼':'▶')+' 커스텀 프롬프트'});
document.getElementById('cprompt-toggle-or').addEventListener('click',()=>{const ta=document.getElementById('custom-prompt-or');const open=ta.style.display!=='block';ta.style.display=open?'block':'none';document.getElementById('cprompt-toggle-or').textContent=(open?'▼':'▶')+' 커스텀 프롬프트'});
document.querySelectorAll('.mode-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('.mode-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');ocrMode=btn.dataset.mode;document.getElementById('mode-desc').textContent=ocrMode==='structure'?'표 구조를 감지하고 Excel로 저장합니다':'일반 텍스트와 글자를 인식합니다'})});
document.getElementById('ocr-btn').addEventListener('click',async()=>{
if(!ocrFile)return;document.getElementById('ocr-err').style.display='none';setOcrLoading(true);
const fd=new FormData();
fd.append('file',ocrFile);
fd.append('mode',ocrMode);
fd.append('backend',ocrEngine);
// Ollama 모델
fd.append('ollama_model', ocrEngine==='ollama' ? (document.getElementById('ocr-ollama-model').value||'') : '');
// OpenRouter 모델
fd.append('openrouter_model', ocrEngine==='openrouter' ? (document.getElementById('ocr-or-model').value||'') : '');
// 커스텀 프롬프트 (엔진별)
const cp = ocrEngine==='openrouter'
? (document.getElementById('custom-prompt-or').value||'')
: (document.getElementById('custom-prompt').value||'');
fd.append('custom_prompt', cp);
try{const r=await api('POST','/api/ocr',fd);const d=await r.json();if(!r.ok)throw new Error(d.detail||'업로드 실패');pollTask(d.task_id,dt=>setProg('ocr',dt.progress||0,dt.message||'처리 중...'),showOcrResult,e=>{showErr('ocr-err',e);setOcrLoading(false)})}
catch(e){showErr('ocr-err',e.message);setOcrLoading(false)}
ocrDrop.addEventListener('drop',e=>{e.preventDefault();ocrDrop.classList.remove('dragover');addOcrFiles(e.dataTransfer.files)});
document.getElementById('ocr-queue-clear').addEventListener('click',()=>{ocrQueue=[];renderOcrQueue();document.getElementById('ocr-btn').disabled=true});
function renderOcrQueue(){
const qEl=document.getElementById('ocr-queue'),list=document.getElementById('ocr-queue-list'),sum=document.getElementById('ocr-queue-summary');
if(!ocrQueue.length){qEl.style.display='none';return}
qEl.style.display='block';list.innerHTML='';
ocrQueue.forEach((item,i)=>{
const div=document.createElement('div');div.className='batch-item '+item.status;
div.innerHTML=`<div><div class="bi-name">${esc(item.file.name)}</div><div class="bi-prog" id="ocr-bp-${i}" style="width:0%;display:${item.status==='running'?'block':'none'}"></div></div><span class="bi-status ${item.status}">${{waiting:'대기',running:'인식중',done:'완료',failed:'실패'}[item.status]}</span><span style="display:flex;gap:3px">${item.status==='done'?[item.txtFile?`<button class="bi-dl" onclick="dlFile('${esc(item.txtFile)}')">TXT</button>`:'',item.xlsxFile?`<button class="bi-dl" onclick="dlFile('${esc(item.xlsxFile)}')">XLS</button>`:''].filter(Boolean).join(''):''}</span>`;
item.el=div;list.appendChild(div);
});
const done=ocrQueue.filter(i=>i.status==='done').length,failed=ocrQueue.filter(i=>i.status==='failed').length,running=ocrQueue.filter(i=>i.status==='running').length;
sum.innerHTML=`전체 <span>${ocrQueue.length}</span>개 · 완료 <span>${done}</span> · 실패 <span>${failed}</span>${running?` · 진행중 <span>${running}</span>`:''}`;
}
document.querySelectorAll('#page-ocr .engine-btn').forEach(btn=>{
btn.addEventListener('click',()=>{
document.querySelectorAll('#page-ocr .engine-btn').forEach(b=>b.classList.remove('active'));
btn.classList.add('active');ocrEngine=btn.dataset.engine;
document.getElementById('ocr-ollama-opts').classList.toggle('visible',ocrEngine==='ollama');
document.getElementById('ocr-or-opts').classList.toggle('visible',ocrEngine==='openrouter');
document.getElementById('ocr-btn').className='btn-start '+(ocrEngine!=='paddle'?'purple':'green');
});
});
function setOcrLoading(on){const io=(ocrEngine==='ollama'||ocrEngine==='openrouter'),c=io?'var(--purple)':'var(--accent)';document.getElementById('ocr-btn').disabled=on;document.getElementById('ocr-prog').style.display=on?'block':'none';document.getElementById('ocr-wave').style.display=on?'flex':'none';document.getElementById('ocr-pfill').style.background=c;document.getElementById('ocr-ppct').style.color=c;document.querySelectorAll('#ocr-wave .wave-bar').forEach(b=>b.style.background=c);if(on)setProg('ocr',0,'준비 중...')}
document.getElementById('cprompt-toggle').addEventListener('click',()=>{const ta=document.getElementById('custom-prompt');const open=ta.style.display!=='block';ta.style.display=open?'block':'none';document.getElementById('cprompt-toggle').textContent=(open?'▼':'▶')+' 커스텀 프롬프트'});
document.querySelectorAll('.mode-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('.mode-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');ocrMode=btn.dataset.mode;document.getElementById('mode-desc').textContent=ocrMode==='structure'?'표 구조를 감지하고 Excel로 저장합니다':'일반 텍스트와 글자를 인식합니다'})});
document.getElementById('ocr-btn').addEventListener('click',async()=>{
const pending=ocrQueue.filter(i=>i.status==='waiting');
if(!pending.length){showErr('ocr-err','인식할 파일이 없습니다');return}
document.getElementById('ocr-err').style.display='none';
document.getElementById('ocr-btn').disabled=true;
document.getElementById('ocr-prog').style.display='block';
setProg('ocr',0,`${pending.length}개 업로드 중...`);
const fd=new FormData();
pending.forEach(item=>fd.append('files',item.file));
fd.append('mode',ocrMode);fd.append('backend',ocrEngine);
fd.append('ollama_model',ocrEngine==='ollama'?(document.getElementById('ocr-ollama-model').value||''):'');
fd.append('openrouter_model',ocrEngine==='openrouter'?(document.getElementById('ocr-or-model')?.value||''):'');
const cp=ocrEngine==='openrouter'?(document.getElementById('custom-prompt-or')?.value||''):(document.getElementById('custom-prompt').value||'');
fd.append('custom_prompt',cp);
try{
const url=pending.length===1?'/api/ocr':'/api/ocr/batch';
const r=await api('POST',url,fd);const d=await r.json();if(!r.ok)throw new Error(d.detail||'업로드 실패');
const items=pending.length===1?[d]:(d.items||[]);
let pi=0;
ocrQueue.forEach((qItem,qi)=>{
if(qItem.status!=='waiting')return;
const taskItem=items[pi++];if(!taskItem)return;
if(taskItem.error){qItem.status='failed';return}
qItem.status='running';qItem.taskId=taskItem.task_id;renderOcrQueue();
const t=setInterval(async()=>{
try{
const r2=await api('GET','/api/status/'+taskItem.task_id);if(r2.status===401){clearInterval(t);showLogin();return}
const d2=await r2.json();
if(d2.state==='success'){clearInterval(t);qItem.txtFile=d2.txt_file||null;qItem.xlsxFile=d2.xlsx_file||null;qItem.status='done';renderOcrQueue();
if(ocrQueue.filter(i=>i.status==='done').length===1&&ocrQueue.filter(i=>i.status==='running').length===0)showOcrResult(d2);
if(ocrQueue.every(i=>['done','failed','waiting'].includes(i.status))){const done=ocrQueue.filter(i=>i.status==='done').length;setProg('ocr',100,`완료 ${done}/${ocrQueue.length}`);setTimeout(()=>document.getElementById('ocr-prog').style.display='none',2000);document.getElementById('ocr-btn').disabled=false;}
} else if(d2.state==='failure'){clearInterval(t);qItem.status='failed';renderOcrQueue();}
else{const done=ocrQueue.filter(i=>i.status==='done').length;setProg('ocr',20+Math.round((done/ocrQueue.length)*75),d2.message||'처리중...')}
}catch{}
},1800);
});
}catch(e){showErr('ocr-err',e.message);document.getElementById('ocr-btn').disabled=false;document.getElementById('ocr-prog').style.display='none'}
});
function setOcrLoading(on){const isAI=ocrEngine!=='paddle',c=isAI?'var(--purple)':'var(--accent)';document.getElementById('ocr-btn').disabled=on;document.getElementById('ocr-prog').style.display=on?'block':'none';document.getElementById('ocr-wave').style.display=on?'flex':'none';document.getElementById('ocr-pfill').style.background=c;document.getElementById('ocr-ppct').style.color=c;document.querySelectorAll('#ocr-wave .wave-bar').forEach(b=>b.style.background=c);if(on)setProg('ocr',0,'준비 중...')}
function showOcrResult(d){
ocrOutputTxt=d.txt_file||null;ocrOutputXlsx=d.xlsx_file||null;const io=d.backend==='ollama';
document.getElementById('ocr-mlines').textContent=(d.line_count||0)+'줄';document.getElementById('ocr-mmode').textContent=d.mode==='structure'?'구조분석':'텍스트';document.getElementById('ocr-mbackend').textContent=io?`Ollama`:'Paddle';document.getElementById('ocr-mtables').textContent=(d.tables||[]).length+'개';
ocrOutputTxt=d.txt_file||null;ocrOutputXlsx=d.xlsx_file||null;const io=d.backend!=='paddle';
document.getElementById('ocr-mlines').textContent=(d.line_count||0)+'줄';document.getElementById('ocr-mmode').textContent=d.mode==='structure'?'구조분석':'텍스트';document.getElementById('ocr-mbackend').textContent=d.backend==='openrouter'?'OpenRouter':d.backend==='ollama'?'Ollama':'Paddle';document.getElementById('ocr-mtables').textContent=(d.tables||[]).length+'개';
document.getElementById('ocr-meta').style.display='flex';document.getElementById('ocr-tabs').style.display='flex';document.getElementById('ocr-empty').style.display='none';document.getElementById('ocr-result').style.display='block';document.getElementById('ocr-result').value=d.full_text||'';
const ll=document.getElementById('ocr-linelist');ll.innerHTML='';(d.lines||[]).forEach(line=>{const c=line.confidence||0,cls=c>=.9?'high':c>=.7?'mid':'low';const row=document.createElement('div');row.className='line-item';row.innerHTML=`<div class="line-conf ${cls}">${io?'AI':Math.round(c*100)+'%'}</div><div class="line-text">${esc(line.text)}</div>`;ll.appendChild(row)});
const tl=document.getElementById('ocr-tablelist'),te=document.getElementById('ocr-tableempty');tl.innerHTML='';const tables=d.tables||[];te.style.display=tables.length?'none':'flex';tables.forEach((t,i)=>{const w=document.createElement('div');w.innerHTML=`<div class="table-title">표 ${i+1}${t.rows||0}× ${t.cols||0}열</div><div class="table-wrapper">${(t.html||'').replace(/<table/g,'<table class="ocr-table"')}</div>`;tl.appendChild(w)});
@@ -898,7 +1128,12 @@ function showOcrResult(d){
document.getElementById('ocr-copy').addEventListener('click',()=>copyText(document.getElementById('ocr-result').value,document.getElementById('ocr-copy')));
document.getElementById('ocr-dl-txt').addEventListener('click',()=>dlFile(ocrOutputTxt));
document.getElementById('ocr-dl-xlsx').addEventListener('click',()=>dlFile(ocrOutputXlsx));
document.getElementById('ocr-new').addEventListener('click',()=>{ocrFile=null;ocrInput.value='';ocrOutputTxt=null;ocrOutputXlsx=null;['ocr-info','ocr-preview-wrap','ocr-prog','ocr-err','ocr-meta','ocr-tabs','ocr-actions'].forEach(id=>document.getElementById(id).style.display='none');document.getElementById('ocr-empty').style.display='flex';document.getElementById('ocr-result').style.display='none';document.getElementById('ocr-result').value='';document.getElementById('ocr-linelist').innerHTML='';document.getElementById('ocr-tablelist').innerHTML='';document.getElementById('ocr-btn').disabled=true;resetTabs('ocr-tabs')});
document.getElementById('ocr-new').addEventListener('click',()=>{
ocrQueue=[];ocrInput.value='';ocrOutputTxt=null;ocrOutputXlsx=null;renderOcrQueue();
['ocr-prog','ocr-err','ocr-meta','ocr-tabs','ocr-actions'].forEach(id=>document.getElementById(id).style.display='none');
document.getElementById('ocr-empty').style.display='flex';document.getElementById('ocr-result').style.display='none';document.getElementById('ocr-result').value='';document.getElementById('ocr-linelist').innerHTML='';document.getElementById('ocr-tablelist').innerHTML='';document.getElementById('ocr-btn').disabled=true;resetTabs('ocr-tabs');
});
// ══ HISTORY ══
document.querySelectorAll('.hist-filter-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('.hist-filter-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');histType=btn.dataset.type;histPage=1;loadHistory()})});
@@ -1153,70 +1388,36 @@ async function copyText(text,btn){try{await navigator.clipboard.writeText(text);
// ══ OPENROUTER ══
async function loadOrModels(){
try{
const r=await api('GET','/api/openrouter/models');const d=await r.json();
try{const r=await api('GET','/api/openrouter/models');const d=await r.json();
const wrap=document.getElementById('or-models-wrap');
if(d.connected){
orModels=d.models||[];orVisionModels=d.vision_models||[];orTextModels=d.text_models||[];
wrap.style.display='block';
document.getElementById('or-connected-badge').textContent=`✓ 연결됨 — Vision ${orVisionModels.length}개 / 전체 ${orModels.length}`;
populateOrSelects('vision');
} else {
wrap.style.display='none';
}
if(d.connected){orModels=d.models||[];orVisionModels=d.vision_models||[];
wrap.style.display='block';document.getElementById('or-connected-badge').textContent=`✓ 연결됨 — Vision ${orVisionModels.length}개 / 전체 ${orModels.length}`;populateOrSelects('vision');}
else wrap.style.display='none';
}catch{}
}
let orFilter='vision';
document.querySelectorAll('.or-model-tab').forEach(btn=>{
btn.addEventListener('click',()=>{
document.querySelectorAll('.or-model-tab').forEach(b=>b.classList.remove('active'));
btn.classList.add('active');orFilter=btn.dataset.filter;populateOrSelects(orFilter);
});
});
document.querySelectorAll('.or-model-tab').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('.or-model-tab').forEach(b=>b.classList.remove('active'));btn.classList.add('active');orFilter=btn.dataset.filter;populateOrSelects(orFilter)})});
function populateOrSelects(filter){
filter=filter||orFilter;
const list = filter==='vision'?orVisionModels:filter==='text'?orTextModels:orModels;
const fillOr=(sel,def)=>{
const cur=sel.value||def||'';
sel.innerHTML='<option value="">(없음)</option>';
list.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;sel.appendChild(o)});
};
const sttSel=document.getElementById('setting-or-stt-model');
filter=filter||orFilter;const list=filter==='vision'?orVisionModels:filter==='text'?orTextModels:orModels;
const fillOr=(sel,def)=>{if(!sel)return;const cur=sel.value||def||'';sel.innerHTML='<option value="">(없음)</option>';list.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;sel.appendChild(o)})};
fillOr(document.getElementById('setting-or-stt-model'),appSettings.openrouter_stt_model);
const ocrSel=document.getElementById('setting-or-ocr-model');
const sttPage=document.getElementById('stt-or-model');
const ocrPage=document.getElementById('ocr-or-model');
if(sttSel)fillOr(sttSel,appSettings.openrouter_stt_model);
if(ocrSel){
// OCR은 Vision만
const vlist=filter==='text'?[]:orVisionModels;
const cur=ocrSel.value||appSettings.openrouter_ocr_model||'';
ocrSel.innerHTML='<option value="">(없음)</option>';
vlist.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;ocrSel.appendChild(o)});
}
if(sttPage)fillOr(sttPage,appSettings.openrouter_stt_model);
if(ocrPage){
const cur=ocrPage.value||appSettings.openrouter_ocr_model||'';
ocrPage.innerHTML='<option value="">설정 기본 모델 사용</option>';
orVisionModels.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;ocrPage.appendChild(o)});
}
if(ocrSel){const cur=ocrSel.value||appSettings.openrouter_ocr_model||'';ocrSel.innerHTML='<option value="">(없음)</option>';orVisionModels.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;ocrSel.appendChild(o)})}
fillOr(document.getElementById('stt-or-model'),appSettings.openrouter_stt_model);
const ocrPage=document.getElementById('ocr-or-model');if(ocrPage){const cur=ocrPage.value||appSettings.openrouter_ocr_model||'';ocrPage.innerHTML='<option value="">설정 기본 모델 사용</option>';orVisionModels.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;ocrPage.appendChild(o)})}
}
document.getElementById('btn-or-test').addEventListener('click',async()=>{
const key=document.getElementById('or-api-key').value.trim();
const url=document.getElementById('or-url').value.trim()||'https://openrouter.ai/api/v1';
document.getElementById('btn-or-test')?.addEventListener('click',async()=>{
const key=document.getElementById('or-api-key').value.trim(),url=document.getElementById('or-url').value.trim()||'https://openrouter.ai/api/v1';
const result=document.getElementById('or-test-result');
if(!key){result.style.display='block';result.style.color='var(--warn)';result.textContent='API 키를 입력하세요';return}
result.style.display='block';result.style.color='var(--muted)';result.textContent='연결 중...';
try{
const fd=new FormData();fd.append('api_key',key);fd.append('base_url',url);
try{const fd=new FormData();fd.append('api_key',key);fd.append('base_url',url);
const r=await api('POST','/api/openrouter/test',fd);const d=await r.json();
result.style.color=d.ok?'var(--accent)':'var(--warn)';result.textContent=d.message;
if(d.ok)loadOrModels();
}catch{result.style.color='var(--warn)';result.textContent='요청 실패'}
result.style.color=d.ok?'var(--accent)':'var(--warn)';result.textContent=d.message;if(d.ok)loadOrModels();}
catch{result.style.color='var(--warn)';result.textContent='요청 실패'}
});
loadLanguages();
checkAuth();
</script>
</body>

View File

@@ -1,4 +1,10 @@
import os
"""
STT Celery Tasks
- faster-whisper 변환
- Ollama / OpenRouter 후처리 (교정 또는 번역)
- SRT / VTT / TXT 자막 파일 생성
"""
import os, json
import httpx
from celery import Celery
from ocr_tasks import ocr_task # noqa: F401
@@ -31,13 +37,144 @@ def get_model():
from faster_whisper import WhisperModel
kwargs = dict(device=DEVICE, compute_type=COMPUTE_TYPE)
if CPU_THREADS is not None: kwargs["cpu_threads"] = CPU_THREADS
print(f"[Whisper] 로딩: {MODEL_SIZE} / {DEVICE} / {COMPUTE_TYPE} / threads={CPU_THREADS or 'auto'}")
print(f"[Whisper] 로딩: {MODEL_SIZE}/{DEVICE}/{COMPUTE_TYPE}/threads={CPU_THREADS or 'auto'}")
_model = WhisperModel(MODEL_SIZE, **kwargs)
print("[Whisper] 로드 완료")
return _model
# ── 후처리: Ollama ────────────────────────────────────────────
# ══════════════════════════════════════════════════════════════
# 언어 코드 매핑
# ══════════════════════════════════════════════════════════════
LANG_NAMES = {
"ko":"한국어","en":"English","ja":"日本語","zh":"中文","fr":"Français",
"de":"Deutsch","es":"Español","it":"Italiano","pt":"Português","ru":"Русский",
"ar":"العربية","vi":"Tiếng Việt","th":"ไทย","id":"Bahasa Indonesia",
"nl":"Nederlands","pl":"Polski","tr":"Türkçe","sv":"Svenska","uk":"Українська",
}
def _lang_name(code: str) -> str:
return LANG_NAMES.get(code, code)
# ══════════════════════════════════════════════════════════════
# 자막 포맷 생성
# ══════════════════════════════════════════════════════════════
def _fmt_srt_time(s: float) -> str:
"""초 → SRT 시간 포맷 00:00:00,000"""
ms = int(round(s * 1000))
h, rem = divmod(ms, 3600000)
m, rem = divmod(rem, 60000)
sec, ms = divmod(rem, 1000)
return f"{h:02d}:{m:02d}:{sec:02d},{ms:03d}"
def _fmt_vtt_time(s: float) -> str:
"""초 → VTT 시간 포맷 00:00:00.000"""
return _fmt_srt_time(s).replace(",", ".")
def _make_srt(segments: list) -> str:
lines = []
for i, seg in enumerate(segments, 1):
lines.append(str(i))
lines.append(f"{_fmt_srt_time(seg['start'])} --> {_fmt_srt_time(seg['end'])}")
lines.append(seg["text"].strip())
lines.append("")
return "\n".join(lines)
def _make_vtt(segments: list) -> str:
lines = ["WEBVTT", ""]
for i, seg in enumerate(segments, 1):
lines.append(f"{i}")
lines.append(f"{_fmt_vtt_time(seg['start'])} --> {_fmt_vtt_time(seg['end'])}")
lines.append(seg["text"].strip())
lines.append("")
return "\n".join(lines)
# ══════════════════════════════════════════════════════════════
# 번역 (Ollama / OpenRouter)
# ══════════════════════════════════════════════════════════════
def _translate_segments(segments: list, target_lang: str,
use_openrouter: bool, model: str,
openrouter_url: str, openrouter_key: str,
task_self=None) -> list:
"""세그먼트 텍스트를 target_lang으로 번역해서 새 세그먼트 리스트 반환"""
if not model or not target_lang:
return segments
lang_name = _lang_name(target_lang)
translated = []
# 세그먼트를 청크로 묶어서 번역 (API 호출 최소화)
# 최대 20개씩 묶음
CHUNK = 20
chunks = [segments[i:i+CHUNK] for i in range(0, len(segments), CHUNK)]
for ci, chunk in enumerate(chunks):
if task_self:
pct = 85 + int((ci / len(chunks)) * 10)
task_self.update_state(state="PROGRESS",
meta={"progress": pct,
"message": f"번역 중... ({ci*CHUNK+1}/{len(segments)})"})
# JSON 배열로 텍스트만 전달
texts = [seg["text"].strip() for seg in chunk]
prompt = (
f"다음 문장들을 {lang_name}로 번역해줘.\n"
f"JSON 배열 형식으로만 답해줘. 설명 없이 번역된 문장 배열만 출력해.\n"
f"입력 배열과 동일한 개수, 동일한 순서로 출력해.\n\n"
f"입력: {json.dumps(texts, ensure_ascii=False)}"
)
try:
if use_openrouter and openrouter_key:
resp = httpx.post(
f"{openrouter_url.rstrip('/')}/chat/completions",
headers={"Authorization": f"Bearer {openrouter_key}",
"HTTP-Referer": "https://voicescript.local",
"Content-Type": "application/json"},
json={"model": model,
"messages": [{"role":"user","content":prompt}],
"temperature": 0.2},
timeout=float(OLLAMA_TIMEOUT),
)
resp.raise_for_status()
raw = resp.json()["choices"][0]["message"]["content"].strip()
else:
resp = httpx.post(f"{OLLAMA_URL}/api/chat",
json={"model": model,
"messages": [{"role":"user","content":prompt}],
"stream": False, "options": {"temperature": 0.2}},
timeout=float(OLLAMA_TIMEOUT))
resp.raise_for_status()
raw = resp.json().get("message",{}).get("content","").strip()
# JSON 파싱
# 코드블록 제거
if "```" in raw:
raw = raw.split("```")[1].lstrip("json").strip()
trans_texts = json.loads(raw)
if not isinstance(trans_texts, list):
trans_texts = texts # 파싱 실패 시 원본 유지
except Exception as e:
print(f"[번역 실패 chunk {ci}] {e}")
trans_texts = texts # 실패 시 원본 유지
# 번역된 텍스트를 세그먼트에 결합
for seg, t_text in zip(chunk, trans_texts):
translated.append({**seg, "text": t_text})
# 남은 세그먼트 (번역 누락)
if len(trans_texts) < len(chunk):
for seg in chunk[len(trans_texts):]:
translated.append(seg)
return translated
# ══════════════════════════════════════════════════════════════
# Ollama 텍스트 후처리 (교정)
# ══════════════════════════════════════════════════════════════
def _ollama_postprocess(text: str, model: str) -> str:
if not model or not text.strip(): return text
prompt = (
@@ -46,19 +183,16 @@ def _ollama_postprocess(text: str, model: str) -> str:
"결과 텍스트만 출력하고 설명은 하지 마.\n\n" + text
)
try:
resp = httpx.post(f"{OLLAMA_URL}/api/chat", json={
"model": model,
"messages": [{"role":"user","content":prompt}],
"stream": False, "options": {"temperature": 0.1},
}, timeout=float(OLLAMA_TIMEOUT))
resp = httpx.post(f"{OLLAMA_URL}/api/chat",
json={"model":model,"messages":[{"role":"user","content":prompt}],
"stream":False,"options":{"temperature":0.1}},
timeout=float(OLLAMA_TIMEOUT))
resp.raise_for_status()
result = resp.json().get("message",{}).get("content","").strip()
return result if result else text
except Exception as e:
print(f"[Ollama 후처리 실패] {e}"); return text
# ── 후처리: OpenRouter (OpenAI 호환) ─────────────────────────
def _openrouter_postprocess(text: str, model: str, base_url: str, api_key: str) -> str:
if not model or not api_key or not text.strip(): return text
prompt = (
@@ -67,21 +201,10 @@ def _openrouter_postprocess(text: str, model: str, base_url: str, api_key: str)
"결과 텍스트만 출력하고 설명은 하지 마.\n\n" + text
)
try:
resp = httpx.post(
f"{base_url.rstrip('/')}/chat/completions",
headers={
"Authorization": f"Bearer {api_key}",
"HTTP-Referer": "https://voicescript.local",
"X-Title": "VoiceScript",
"Content-Type": "application/json",
},
json={
"model": model,
"messages": [{"role":"user","content":prompt}],
"temperature": 0.1,
},
timeout=float(OLLAMA_TIMEOUT),
)
resp = httpx.post(f"{base_url.rstrip('/')}/chat/completions",
headers={"Authorization":f"Bearer {api_key}","HTTP-Referer":"https://voicescript.local","Content-Type":"application/json"},
json={"model":model,"messages":[{"role":"user","content":prompt}],"temperature":0.1},
timeout=float(OLLAMA_TIMEOUT))
resp.raise_for_status()
result = resp.json()["choices"][0]["message"]["content"].strip()
return result if result else text
@@ -89,30 +212,44 @@ def _openrouter_postprocess(text: str, model: str, base_url: str, api_key: str)
print(f"[OpenRouter 후처리 실패] {e}"); return text
# ════════════════════════════════════════════════════════════════
# STT Task
# ════════════════════════════════════════════════════════════════
# ══════════════════════════════════════════════════════════════
# 메인 STT Task
# ══════════════════════════════════════════════════════════════
@celery_app.task(bind=True, name="tasks.transcribe_task", queue="stt")
def transcribe_task(
self,
file_id: str,
audio_path: str,
use_ollama: bool = False,
ollama_model: str = "",
use_openrouter: bool = False,
openrouter_model: str = "",
openrouter_url: str = "",
openrouter_key: str = "",
file_id: str,
audio_path: str,
# 후처리
use_ollama: bool = False,
ollama_model: str = "",
use_openrouter: bool = False,
openrouter_model: str = "",
openrouter_url: str = "",
openrouter_key: str = "",
# 자막
subtitle_mode: bool = False, # True → 자막 파일 생성
subtitle_format: str = "srt", # srt | vtt | both
translate_to: str = "", # 번역 대상 언어 코드 (없으면 원어 자막)
translate_model: str = "", # 번역에 쓸 모델
translate_via: str = "ollama",# ollama | openrouter
# 원본 언어 강제 지정 (없으면 auto)
force_language: str = "",
):
self.update_state(state="PROGRESS", meta={"progress":5,"message":"모델 준비 중..."})
try:
model = get_model()
self.update_state(state="PROGRESS", meta={"progress":15,"message":"오디오 분석 중..."})
lang = force_language.strip() or LANGUAGE
segments_gen, info = model.transcribe(
audio_path, language=LANGUAGE, beam_size=BEAM_SIZE,
initial_prompt=INITIAL_PROMPT, vad_filter=True,
vad_parameters=dict(min_silence_duration_ms=500), word_timestamps=False,
audio_path,
language=lang,
beam_size=BEAM_SIZE,
initial_prompt=INITIAL_PROMPT,
vad_filter=True,
vad_parameters=dict(min_silence_duration_ms=500),
word_timestamps=False,
)
self.update_state(state="PROGRESS", meta={"progress":30,"message":"텍스트 변환 중..."})
@@ -120,63 +257,126 @@ def transcribe_task(
duration = info.duration
for seg in segments_gen:
segments.append({"start":round(seg.start,2),"end":round(seg.end,2),"text":seg.text.strip()})
segments.append({"start":round(seg.start,3),"end":round(seg.end,3),"text":seg.text.strip()})
parts.append(seg.text.strip())
if duration > 0:
pct = 30 + int((seg.end/duration)*50)
pct = 30 + int((seg.end/duration)*45)
self.update_state(state="PROGRESS",
meta={"progress":min(pct,80),"message":f"변환 중... {seg.end:.0f}s / {duration:.0f}s"})
meta={"progress":min(pct,75),
"message":f"변환 중... {seg.end:.0f}s / {duration:.0f}s"})
raw_text = "\n".join(parts)
full_text = raw_text
detected_lang = info.language
# Ollama 후처리
if use_ollama and ollama_model:
# ── 텍스트 후처리 (교정) ──────────────────────────────
if use_ollama and ollama_model and not subtitle_mode:
self.update_state(state="PROGRESS",
meta={"progress":85,"message":f"Ollama({ollama_model}) 후처리 중..."})
meta={"progress":80,"message":f"Ollama({ollama_model}) 교정 중..."})
full_text = _ollama_postprocess(raw_text, ollama_model)
# OpenRouter 후처리
elif use_openrouter and openrouter_model and openrouter_key:
elif use_openrouter and openrouter_model and openrouter_key and not subtitle_mode:
self.update_state(state="PROGRESS",
meta={"progress":85,"message":f"OpenRouter({openrouter_model}) 후처리 중..."})
meta={"progress":80,"message":f"OpenRouter({openrouter_model}) 교정 중..."})
full_text = _openrouter_postprocess(raw_text, openrouter_model, openrouter_url, openrouter_key)
self.update_state(state="PROGRESS", meta={"progress":95,"message":"파일 저장 중..."})
os.makedirs(OUTPUT_DIR, exist_ok=True)
output_filename = f"{file_id}.txt"
# ── 자막 모드: 번역 ──────────────────────────────────
translated_segments = segments
is_translated = False
with open(os.path.join(OUTPUT_DIR, output_filename), "w", encoding="utf-8") as f:
f.write(f"# 변환 결과\n# 언어: {info.language} | 재생 시간: {duration:.1f}")
if use_ollama and ollama_model:
f.write(f" | Ollama: {ollama_model}")
elif use_openrouter and openrouter_model:
f.write(f" | OpenRouter: {openrouter_model}")
f.write("\n\n## 전체 텍스트\n\n" + full_text + "\n\n")
f.write("## 타임스탬프별 세그먼트\n\n")
for seg in segments:
f.write(f"[{_fmt(seg['start'])}{_fmt(seg['end'])}] {seg['text']}\n")
if subtitle_mode and translate_to and translate_to != detected_lang:
t_model = translate_model or (ollama_model if translate_via=="ollama" else openrouter_model)
t_via_or = (translate_via == "openrouter" and bool(openrouter_key))
self.update_state(state="PROGRESS",
meta={"progress":82,
"message":f"{_lang_name(translate_to)}로 번역 중..."})
translated_segments = _translate_segments(
segments, translate_to,
use_openrouter=t_via_or,
model=t_model,
openrouter_url=openrouter_url,
openrouter_key=openrouter_key,
task_self=self,
)
is_translated = True
# 번역된 전체 텍스트
full_text = "\n".join(s["text"] for s in translated_segments)
self.update_state(state="PROGRESS", meta={"progress":93,"message":"파일 저장 중..."})
os.makedirs(OUTPUT_DIR, exist_ok=True)
result_files = {}
# ── TXT 저장 ─────────────────────────────────────────
txt_filename = f"{file_id}.txt"
with open(os.path.join(OUTPUT_DIR, txt_filename), "w", encoding="utf-8") as f:
f.write(f"# 변환 결과\n")
f.write(f"# 언어: {detected_lang} | 재생 시간: {duration:.1f}\n")
if is_translated:
f.write(f"# 번역: {_lang_name(translate_to)}\n")
f.write(f"\n## 전체 텍스트\n\n{full_text}\n\n")
f.write(f"## 타임스탬프별 세그먼트\n\n")
for seg in (translated_segments if is_translated else segments):
f.write(f"[{_fmt_ts(seg['start'])}{_fmt_ts(seg['end'])}] {seg['text']}\n")
result_files["txt"] = txt_filename
# ── 자막 파일 저장 ────────────────────────────────────
if subtitle_mode:
sub_segs = translated_segments if is_translated else segments
lang_suffix = f".{translate_to}" if is_translated else f".{detected_lang}"
if subtitle_format in ("srt", "both"):
srt_fn = f"{file_id}{lang_suffix}.srt"
with open(os.path.join(OUTPUT_DIR, srt_fn), "w", encoding="utf-8") as f:
f.write(_make_srt(sub_segs))
result_files["srt"] = srt_fn
if subtitle_format in ("vtt", "both"):
vtt_fn = f"{file_id}{lang_suffix}.vtt"
with open(os.path.join(OUTPUT_DIR, vtt_fn), "w", encoding="utf-8") as f:
f.write(_make_vtt(sub_segs))
result_files["vtt"] = vtt_fn
# 원본 언어 SRT도 함께 (번역 시)
if is_translated and subtitle_format in ("srt","both"):
orig_fn = f"{file_id}.{detected_lang}.srt"
with open(os.path.join(OUTPUT_DIR, orig_fn), "w", encoding="utf-8") as f:
f.write(_make_srt(segments))
result_files["srt_original"] = orig_fn
try: os.remove(audio_path)
except: pass
return {
# 기본 STT 결과
"text": full_text,
"raw_text": raw_text,
"segments": segments,
"language": info.language,
"segments": translated_segments if is_translated else segments,
"orig_segments": segments,
"language": detected_lang,
"duration": round(duration, 1),
"output_file": output_filename,
"ollama_used": use_ollama and bool(ollama_model),
"ollama_model": ollama_model if (use_ollama and ollama_model) else "",
"openrouter_used": use_openrouter and bool(openrouter_model) and bool(openrouter_key),
"openrouter_model": openrouter_model if (use_openrouter and openrouter_model) else "",
# 후처리
"ollama_used": use_ollama and bool(ollama_model) and not subtitle_mode,
"ollama_model": ollama_model if (use_ollama and not subtitle_mode) else "",
"openrouter_used": use_openrouter and bool(openrouter_model) and not subtitle_mode,
"openrouter_model": openrouter_model if (use_openrouter and not subtitle_mode) else "",
# 자막
"subtitle_mode": subtitle_mode,
"subtitle_format": subtitle_format,
"translated": is_translated,
"translate_to": translate_to if is_translated else "",
"translate_model": translate_model if is_translated else "",
# 파일
"output_file": result_files.get("txt",""),
"srt_file": result_files.get("srt",""),
"vtt_file": result_files.get("vtt",""),
"srt_original_file": result_files.get("srt_original",""),
}
except Exception as e:
raise Exception(f"변환 실패: {str(e)}")
def _fmt(s):
def _fmt_ts(s: float) -> str:
m, sec = divmod(int(s), 60)
return f"{m:02d}:{sec:02d}"