feat: OpenRouter 외부 AI 연동 (STT 교정 + OCR Vision)
This commit is contained in:
@@ -9,8 +9,6 @@ RUN apt-get update && apt-get install -y \
|
|||||||
libxext6 \
|
libxext6 \
|
||||||
libxrender1 \
|
libxrender1 \
|
||||||
libgl1 \
|
libgl1 \
|
||||||
libgles2 \
|
|
||||||
libegl1 \
|
|
||||||
wget \
|
wget \
|
||||||
curl \
|
curl \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
@@ -19,8 +17,11 @@ WORKDIR /app
|
|||||||
|
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
|
|
||||||
RUN pip install --no-cache-dir paddlepaddle==3.0.0
|
# PaddlePaddle CPU (AMD64) — paddleocr 3.x 호환
|
||||||
|
RUN pip install --no-cache-dir paddlepaddle==3.0.0 \
|
||||||
|
-i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||||
|
|
||||||
|
# 나머지 패키지
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|||||||
391
app/main.py
391
app/main.py
@@ -32,11 +32,16 @@ AUDIO_EXT = {"mp3","mp4","wav","m4a","ogg","flac","aac","wma","webm","mkv","avi"
|
|||||||
IMAGE_EXT = {"jpg","jpeg","png","bmp","tiff","tif","webp","gif"}
|
IMAGE_EXT = {"jpg","jpeg","png","bmp","tiff","tif","webp","gif"}
|
||||||
|
|
||||||
_DEFAULT_SETTINGS = {
|
_DEFAULT_SETTINGS = {
|
||||||
"stt_ollama_model": "",
|
"stt_ollama_model": "",
|
||||||
"ocr_ollama_model": "granite3.2-vision:latest",
|
"ocr_ollama_model": "granite3.2-vision:latest",
|
||||||
"cpu_threads": 0,
|
"cpu_threads": 0,
|
||||||
"stt_timeout": 0,
|
"stt_timeout": 0,
|
||||||
"ollama_timeout": 600,
|
"ollama_timeout": 600,
|
||||||
|
# OpenRouter
|
||||||
|
"openrouter_url": "https://openrouter.ai/api/v1",
|
||||||
|
"openrouter_api_key": "",
|
||||||
|
"openrouter_stt_model": "",
|
||||||
|
"openrouter_ocr_model": "",
|
||||||
}
|
}
|
||||||
_hist_lock = threading.Lock()
|
_hist_lock = threading.Lock()
|
||||||
|
|
||||||
@@ -50,7 +55,8 @@ def _load_settings() -> dict:
|
|||||||
|
|
||||||
def _save_settings(data: dict):
|
def _save_settings(data: dict):
|
||||||
SETTINGS_FILE.parent.mkdir(parents=True, exist_ok=True)
|
SETTINGS_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||||
with open(SETTINGS_FILE, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2)
|
with open(SETTINGS_FILE, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
|
||||||
# ── 이력 I/O ─────────────────────────────────────────────────
|
# ── 이력 I/O ─────────────────────────────────────────────────
|
||||||
@@ -77,19 +83,15 @@ def append_history(record: dict):
|
|||||||
except: pass
|
except: pass
|
||||||
|
|
||||||
def _update_history_by_task(task_id: str, result: dict, success: bool, error_msg: str = ""):
|
def _update_history_by_task(task_id: str, result: dict, success: bool, error_msg: str = ""):
|
||||||
"""task_id로 이력을 찾아 결과 업데이트 — 핵심 버그 수정"""
|
|
||||||
with _hist_lock:
|
with _hist_lock:
|
||||||
if not HISTORY_FILE.exists(): return
|
if not HISTORY_FILE.exists(): return
|
||||||
try:
|
try:
|
||||||
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
|
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
|
||||||
for h in history:
|
for h in history:
|
||||||
# task_id 필드로 매칭
|
|
||||||
if h.get("task_id") != task_id: continue
|
if h.get("task_id") != task_id: continue
|
||||||
if h.get("status") != "processing": break
|
if h.get("status") != "processing": break
|
||||||
if not success:
|
if not success:
|
||||||
h["status"] = "failed"
|
h["status"] = "failed"; h["output"] = {"error": error_msg[:300]}; break
|
||||||
h["output"] = {"error": error_msg[:300]}
|
|
||||||
break
|
|
||||||
h["status"] = "success"
|
h["status"] = "success"
|
||||||
if h["type"] == "stt":
|
if h["type"] == "stt":
|
||||||
h["output"] = {
|
h["output"] = {
|
||||||
@@ -97,12 +99,14 @@ def _update_history_by_task(task_id: str, result: dict, success: bool, error_msg
|
|||||||
"language": result.get("language", ""),
|
"language": result.get("language", ""),
|
||||||
"duration_s": result.get("duration", 0),
|
"duration_s": result.get("duration", 0),
|
||||||
"segments": len(result.get("segments", [])),
|
"segments": len(result.get("segments", [])),
|
||||||
"text_preview": result.get("text", "")[:200] + ("…" if len(result.get("text",""))>200 else ""),
|
"text_preview": result.get("text","")[:200] + ("…" if len(result.get("text",""))>200 else ""),
|
||||||
"ollama_used": result.get("ollama_used", False),
|
"ollama_used": result.get("ollama_used", False),
|
||||||
"ollama_model": result.get("ollama_model", ""),
|
"ollama_model": result.get("ollama_model", ""),
|
||||||
|
"openrouter_used": result.get("openrouter_used", False),
|
||||||
|
"openrouter_model": result.get("openrouter_model", ""),
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
full_text = result.get("full_text", "")
|
ft = result.get("full_text", "")
|
||||||
h["output"] = {
|
h["output"] = {
|
||||||
"txt_file": result.get("txt_file", ""),
|
"txt_file": result.get("txt_file", ""),
|
||||||
"xlsx_file": result.get("xlsx_file", ""),
|
"xlsx_file": result.get("xlsx_file", ""),
|
||||||
@@ -110,7 +114,8 @@ def _update_history_by_task(task_id: str, result: dict, success: bool, error_msg
|
|||||||
"table_count": len(result.get("tables", [])),
|
"table_count": len(result.get("tables", [])),
|
||||||
"backend": result.get("backend", ""),
|
"backend": result.get("backend", ""),
|
||||||
"ollama_model": result.get("ollama_model", ""),
|
"ollama_model": result.get("ollama_model", ""),
|
||||||
"text_preview": full_text[:200] + ("…" if len(full_text)>200 else ""),
|
"openrouter_model": result.get("openrouter_model", ""),
|
||||||
|
"text_preview": ft[:200] + ("…" if len(ft)>200 else ""),
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
_write_history(history)
|
_write_history(history)
|
||||||
@@ -132,12 +137,11 @@ def clear_history():
|
|||||||
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
# 시작 이벤트
|
# 시작
|
||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
@app.on_event("startup")
|
@app.on_event("startup")
|
||||||
async def on_startup():
|
async def on_startup():
|
||||||
init_users()
|
init_users(); _cleanup_outputs()
|
||||||
_cleanup_outputs()
|
|
||||||
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
@@ -183,7 +187,10 @@ def system_info(user: dict = Depends(require_auth)):
|
|||||||
@app.post("/api/transcribe")
|
@app.post("/api/transcribe")
|
||||||
async def transcribe(
|
async def transcribe(
|
||||||
request: Request, file: UploadFile = File(...),
|
request: Request, file: UploadFile = File(...),
|
||||||
use_ollama: str = Form("false"), ollama_model: str = Form(""),
|
use_ollama: str = Form("false"),
|
||||||
|
ollama_model: str = Form(""),
|
||||||
|
use_openrouter: str = Form("false"),
|
||||||
|
openrouter_model: str = Form(""),
|
||||||
user: dict = Depends(require_stt),
|
user: dict = Depends(require_stt),
|
||||||
):
|
):
|
||||||
_check_size(request)
|
_check_size(request)
|
||||||
@@ -193,29 +200,39 @@ async def transcribe(
|
|||||||
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
|
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
|
||||||
await _save(file, save_path)
|
await _save(file, save_path)
|
||||||
file_size = os.path.getsize(save_path)
|
file_size = os.path.getsize(save_path)
|
||||||
_use_ollama = use_ollama.lower() == "true"
|
|
||||||
s = _load_settings()
|
s = _load_settings()
|
||||||
if _use_ollama and not ollama_model.strip(): ollama_model = s.get("stt_ollama_model", "")
|
_use_ollama = use_ollama.lower() == "true"
|
||||||
|
_use_openrouter = use_openrouter.lower() == "true"
|
||||||
|
|
||||||
task = transcribe_task.delay(file_id, save_path, _use_ollama, ollama_model)
|
if _use_ollama and not ollama_model.strip():
|
||||||
|
ollama_model = s.get("stt_ollama_model", "")
|
||||||
|
if _use_openrouter and not openrouter_model.strip():
|
||||||
|
openrouter_model = s.get("openrouter_stt_model", "")
|
||||||
|
|
||||||
|
task = transcribe_task.delay(
|
||||||
|
file_id, save_path,
|
||||||
|
_use_ollama, ollama_model,
|
||||||
|
_use_openrouter, openrouter_model,
|
||||||
|
s.get("openrouter_url", ""), s.get("openrouter_api_key", ""),
|
||||||
|
)
|
||||||
|
|
||||||
# ★ task_id를 이력에 함께 저장
|
|
||||||
append_history({
|
append_history({
|
||||||
"id": file_id,
|
"id": file_id, "task_id": task.id, "type": "stt",
|
||||||
"task_id": task.id, # ← 업데이트 매칭 키
|
"status": "processing",
|
||||||
"type": "stt",
|
|
||||||
"status": "processing",
|
|
||||||
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
"username": user["username"],
|
"username": user["username"],
|
||||||
"input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()},
|
"input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()},
|
||||||
"settings": {
|
"settings": {
|
||||||
"model": os.getenv("WHISPER_MODEL", "medium"),
|
"model": os.getenv("WHISPER_MODEL","medium"),
|
||||||
"language": os.getenv("WHISPER_LANGUAGE", "ko"),
|
"language": os.getenv("WHISPER_LANGUAGE","ko"),
|
||||||
"compute_type": os.getenv("WHISPER_COMPUTE_TYPE", "int8"),
|
"compute_type": os.getenv("WHISPER_COMPUTE_TYPE","int8"),
|
||||||
"cpu_threads": s.get("cpu_threads", 0),
|
"cpu_threads": s.get("cpu_threads",0),
|
||||||
"stt_timeout": s.get("stt_timeout", 0),
|
"stt_timeout": s.get("stt_timeout",0),
|
||||||
"use_ollama": _use_ollama,
|
"use_ollama": _use_ollama,
|
||||||
"ollama_model": ollama_model if _use_ollama else "",
|
"ollama_model": ollama_model if _use_ollama else "",
|
||||||
|
"use_openrouter": _use_openrouter,
|
||||||
|
"openrouter_model": openrouter_model if _use_openrouter else "",
|
||||||
},
|
},
|
||||||
"output": None,
|
"output": None,
|
||||||
})
|
})
|
||||||
@@ -228,211 +245,297 @@ async def transcribe(
|
|||||||
@app.post("/api/ocr")
|
@app.post("/api/ocr")
|
||||||
async def ocr(
|
async def ocr(
|
||||||
request: Request, file: UploadFile = File(...),
|
request: Request, file: UploadFile = File(...),
|
||||||
mode: str = Form("text"), backend: str = Form("paddle"),
|
mode: str = Form("text"),
|
||||||
ollama_model: str = Form(""), custom_prompt: str = Form(""),
|
backend: str = Form("paddle"), # paddle | ollama | openrouter
|
||||||
|
ollama_model: str = Form(""),
|
||||||
|
openrouter_model: str = Form(""),
|
||||||
|
custom_prompt: str = Form(""),
|
||||||
user: dict = Depends(require_ocr),
|
user: dict = Depends(require_ocr),
|
||||||
):
|
):
|
||||||
_check_size(request)
|
_check_size(request)
|
||||||
ext = _ext(file.filename)
|
ext = _ext(file.filename)
|
||||||
if ext not in IMAGE_EXT: raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(IMAGE_EXT))}")
|
if ext not in IMAGE_EXT: raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(IMAGE_EXT))}")
|
||||||
if mode not in ("text","structure"): mode = "text"
|
if mode not in ("text","structure"): mode = "text"
|
||||||
if backend not in ("paddle","ollama"): backend = "paddle"
|
if backend not in ("paddle","ollama","openrouter"): backend = "paddle"
|
||||||
|
|
||||||
s = _load_settings()
|
s = _load_settings()
|
||||||
if backend == "ollama" and not ollama_model.strip(): ollama_model = s.get("ocr_ollama_model","granite3.2-vision:latest")
|
if backend == "ollama" and not ollama_model.strip():
|
||||||
|
ollama_model = s.get("ocr_ollama_model","granite3.2-vision:latest")
|
||||||
|
if backend == "openrouter" and not openrouter_model.strip():
|
||||||
|
openrouter_model = s.get("openrouter_ocr_model","")
|
||||||
|
|
||||||
file_id = str(uuid.uuid4())
|
file_id = str(uuid.uuid4())
|
||||||
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
|
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
|
||||||
await _save(file, save_path)
|
await _save(file, save_path)
|
||||||
file_size = os.path.getsize(save_path)
|
file_size = os.path.getsize(save_path)
|
||||||
|
|
||||||
task = ocr_task.delay(file_id, save_path, mode, backend, ollama_model, custom_prompt)
|
task = ocr_task.delay(
|
||||||
|
file_id, save_path, mode, backend,
|
||||||
|
ollama_model, openrouter_model,
|
||||||
|
s.get("openrouter_url",""), s.get("openrouter_api_key",""),
|
||||||
|
custom_prompt,
|
||||||
|
)
|
||||||
|
|
||||||
# ★ task_id를 이력에 함께 저장
|
|
||||||
append_history({
|
append_history({
|
||||||
"id": file_id,
|
"id": file_id, "task_id": task.id, "type": "ocr",
|
||||||
"task_id": task.id, # ← 업데이트 매칭 키
|
"status": "processing",
|
||||||
"type": "ocr",
|
|
||||||
"status": "processing",
|
|
||||||
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
"username": user["username"],
|
"username": user["username"],
|
||||||
"input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()},
|
"input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()},
|
||||||
"settings": {
|
"settings": {
|
||||||
"backend": backend,
|
"backend": backend,
|
||||||
"mode": mode,
|
"mode": mode,
|
||||||
"ocr_lang": os.getenv("OCR_LANG", "korean"),
|
"ocr_lang": os.getenv("OCR_LANG","korean"),
|
||||||
"ollama_model": ollama_model if backend == "ollama" else "",
|
"ollama_model": ollama_model if backend=="ollama" else "",
|
||||||
"ollama_timeout":s.get("ollama_timeout", 600),
|
"openrouter_model": openrouter_model if backend=="openrouter" else "",
|
||||||
"custom_prompt": custom_prompt[:200] if custom_prompt else "",
|
"ollama_timeout": s.get("ollama_timeout",600),
|
||||||
|
"custom_prompt": custom_prompt[:200] if custom_prompt else "",
|
||||||
},
|
},
|
||||||
"output": None,
|
"output": None,
|
||||||
})
|
})
|
||||||
return {"task_id": task.id, "file_id": file_id, "filename": file.filename, "mode": mode, "backend": backend}
|
return {"task_id": task.id, "file_id": file_id,
|
||||||
|
"filename": file.filename, "mode": mode, "backend": backend}
|
||||||
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
# 상태 — task_id 기준으로 이력 업데이트
|
# 상태
|
||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
@app.get("/api/status/{task_id}")
|
@app.get("/api/status/{task_id}")
|
||||||
def get_status(task_id: str, user: dict = Depends(require_auth)):
|
def get_status(task_id: str, user: dict = Depends(require_auth)):
|
||||||
r = celery_app.AsyncResult(task_id)
|
r = celery_app.AsyncResult(task_id)
|
||||||
if r.state == "PENDING":
|
if r.state == "PENDING": return {"state":"pending", "progress":0, "message":"대기 중..."}
|
||||||
return {"state": "pending", "progress": 0, "message": "대기 중..."}
|
if r.state == "PROGRESS": m=r.info or {}; return {"state":"progress","progress":m.get("progress",0),"message":m.get("message","처리 중...")}
|
||||||
if r.state == "PROGRESS":
|
if r.state == "SUCCESS": _update_history_by_task(task_id, r.result or {}, True); return {"state":"success","progress":100,**(r.result or {})}
|
||||||
m = r.info or {}
|
if r.state == "FAILURE": _update_history_by_task(task_id, {}, False, str(r.info)); return {"state":"failure","progress":0,"message":str(r.info)}
|
||||||
return {"state": "progress", "progress": m.get("progress",0), "message": m.get("message","처리 중...")}
|
return {"state":r.state.lower(),"progress":0}
|
||||||
if r.state == "SUCCESS":
|
|
||||||
result = r.result or {}
|
|
||||||
# ★ task_id로 이력 업데이트 (file_id 아님)
|
|
||||||
_update_history_by_task(task_id, result, success=True)
|
|
||||||
return {"state": "success", "progress": 100, **result}
|
|
||||||
if r.state == "FAILURE":
|
|
||||||
_update_history_by_task(task_id, {}, success=False, error_msg=str(r.info))
|
|
||||||
return {"state": "failure", "progress": 0, "message": str(r.info)}
|
|
||||||
return {"state": r.state.lower(), "progress": 0}
|
|
||||||
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
# 이력
|
# 이력
|
||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
@app.get("/api/history")
|
@app.get("/api/history")
|
||||||
def get_history(page: int=1, per_page: int=15, type_: str="", user: dict=Depends(require_auth)):
|
def get_history(page:int=1,per_page:int=15,type_:str="",user:dict=Depends(require_auth)):
|
||||||
history = _load_history()
|
history = _load_history()
|
||||||
if user.get("role") != "admin": history = [h for h in history if h.get("username")==user["username"]]
|
if user.get("role") != "admin": history = [h for h in history if h.get("username")==user["username"]]
|
||||||
if type_ in ("stt","ocr"): history = [h for h in history if h.get("type")==type_]
|
if type_ in ("stt","ocr"): history = [h for h in history if h.get("type")==type_]
|
||||||
total = len(history); start = (page-1)*per_page
|
total = len(history); start = (page-1)*per_page
|
||||||
return {"total": total, "page": page, "per_page": per_page, "items": history[start:start+per_page]}
|
return {"total":total,"page":page,"per_page":per_page,"items":history[start:start+per_page]}
|
||||||
|
|
||||||
@app.delete("/api/history/{history_id}")
|
@app.delete("/api/history/{history_id}")
|
||||||
def delete_history(history_id: str, user: dict=Depends(require_auth)):
|
def delete_history(history_id:str,user:dict=Depends(require_auth)):
|
||||||
if not delete_history_item(history_id): raise HTTPException(404, "이력을 찾을 수 없습니다")
|
if not delete_history_item(history_id): raise HTTPException(404,"이력을 찾을 수 없습니다")
|
||||||
return {"ok": True}
|
return {"ok":True}
|
||||||
|
|
||||||
@app.delete("/api/history")
|
@app.delete("/api/history")
|
||||||
def clear_all_history(user: dict=Depends(require_admin)):
|
def clear_all_history(user:dict=Depends(require_admin)):
|
||||||
clear_history(); return {"ok": True}
|
clear_history(); return {"ok":True}
|
||||||
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
# 다운로드 / Ollama / 설정 / 관리자
|
# 다운로드
|
||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
@app.get("/api/download/{filename}")
|
@app.get("/api/download/{filename}")
|
||||||
def download(filename: str, user: dict=Depends(require_auth)):
|
def download(filename:str,user:dict=Depends(require_auth)):
|
||||||
if ".." in filename or "/" in filename: raise HTTPException(400, "잘못된 파일명")
|
if ".." in filename or "/" in filename: raise HTTPException(400,"잘못된 파일명")
|
||||||
path = os.path.join(OUTPUT_DIR, filename)
|
path = os.path.join(OUTPUT_DIR, filename)
|
||||||
if not os.path.exists(path): raise HTTPException(404, "파일을 찾을 수 없습니다")
|
if not os.path.exists(path): raise HTTPException(404,"파일을 찾을 수 없습니다")
|
||||||
media = ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
media = ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||||
if filename.endswith(".xlsx") else "text/plain")
|
if filename.endswith(".xlsx") else "text/plain")
|
||||||
return FileResponse(path, media_type=media, filename=filename)
|
return FileResponse(path, media_type=media, filename=filename)
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
|
# Ollama 모델 목록
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
@app.get("/api/ollama/models")
|
@app.get("/api/ollama/models")
|
||||||
def ollama_models(user: dict=Depends(require_auth)):
|
def ollama_models(user:dict=Depends(require_auth)):
|
||||||
try:
|
try:
|
||||||
resp = httpx.get(f"{OLLAMA_URL}/api/tags", timeout=8.0); resp.raise_for_status()
|
resp = httpx.get(f"{OLLAMA_URL}/api/tags", timeout=8.0); resp.raise_for_status()
|
||||||
return {"models": [m["name"] for m in resp.json().get("models",[])], "connected": True}
|
return {"models":[m["name"] for m in resp.json().get("models",[])], "connected":True}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"models": [], "connected": False, "error": str(e)}
|
return {"models":[], "connected":False, "error":str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
|
# OpenRouter 모델 목록 & 연결 테스트
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
|
@app.get("/api/openrouter/models")
|
||||||
|
def openrouter_models(user: dict = Depends(require_auth)):
|
||||||
|
s = _load_settings()
|
||||||
|
api_key = s.get("openrouter_api_key", "")
|
||||||
|
base_url = s.get("openrouter_url", "https://openrouter.ai/api/v1").rstrip("/")
|
||||||
|
if not api_key:
|
||||||
|
return {"models": [], "connected": False, "error": "API 키가 설정되지 않았습니다"}
|
||||||
|
try:
|
||||||
|
resp = httpx.get(
|
||||||
|
f"{base_url}/models",
|
||||||
|
headers={"Authorization": f"Bearer {api_key}",
|
||||||
|
"HTTP-Referer": "https://voicescript.local"},
|
||||||
|
timeout=12.0,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
# Vision 모델 필터링 (multimodal 지원 모델)
|
||||||
|
all_models = data.get("data", [])
|
||||||
|
vision = [m["id"] for m in all_models
|
||||||
|
if any(k in str(m.get("architecture", {}).get("modality","")).lower()
|
||||||
|
for k in ["image","vision","multimodal"])
|
||||||
|
or any(k in m["id"].lower()
|
||||||
|
for k in ["vision","claude-3","gemini","gpt-4o","llava","pixtral","qwen-vl","intern","deepseek-vl"])]
|
||||||
|
text = [m["id"] for m in all_models if m["id"] not in vision]
|
||||||
|
return {
|
||||||
|
"models": [m["id"] for m in all_models],
|
||||||
|
"vision_models": vision,
|
||||||
|
"text_models": text,
|
||||||
|
"connected": True,
|
||||||
|
"total": len(all_models),
|
||||||
|
}
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
return {"models":[], "connected":False, "error":f"HTTP {e.response.status_code}: API 키를 확인하세요"}
|
||||||
|
except Exception as e:
|
||||||
|
return {"models":[], "connected":False, "error":str(e)}
|
||||||
|
|
||||||
|
@app.post("/api/openrouter/test")
|
||||||
|
def openrouter_test(
|
||||||
|
api_key: str = Form(...),
|
||||||
|
base_url: str = Form("https://openrouter.ai/api/v1"),
|
||||||
|
user: dict = Depends(require_auth),
|
||||||
|
):
|
||||||
|
"""API 키 연결 테스트"""
|
||||||
|
try:
|
||||||
|
resp = httpx.get(
|
||||||
|
f"{base_url.rstrip('/')}/models",
|
||||||
|
headers={"Authorization": f"Bearer {api_key}",
|
||||||
|
"HTTP-Referer": "https://voicescript.local"},
|
||||||
|
timeout=10.0,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
count = len(resp.json().get("data", []))
|
||||||
|
return {"ok": True, "message": f"연결 성공 — {count}개 모델 사용 가능"}
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
return {"ok": False, "message": f"인증 실패 (HTTP {e.response.status_code}) — API 키를 확인하세요"}
|
||||||
|
except Exception as e:
|
||||||
|
return {"ok": False, "message": f"연결 실패: {str(e)}"}
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
|
# 설정
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
@app.get("/api/settings")
|
@app.get("/api/settings")
|
||||||
def get_settings(user: dict=Depends(require_auth)): return _load_settings()
|
def get_settings(user: dict = Depends(require_auth)):
|
||||||
|
s = _load_settings()
|
||||||
|
# API 키는 마스킹해서 반환
|
||||||
|
result = dict(s)
|
||||||
|
if result.get("openrouter_api_key"):
|
||||||
|
key = result["openrouter_api_key"]
|
||||||
|
result["openrouter_api_key_masked"] = key[:8] + "..." + key[-4:] if len(key) > 12 else "****"
|
||||||
|
else:
|
||||||
|
result["openrouter_api_key_masked"] = ""
|
||||||
|
result["openrouter_api_key"] = "" # 평문은 반환 안 함
|
||||||
|
return result
|
||||||
|
|
||||||
@app.post("/api/settings")
|
@app.post("/api/settings")
|
||||||
def save_settings_endpoint(
|
def save_settings_endpoint(
|
||||||
stt_ollama_model: str = Form(""),
|
stt_ollama_model: str = Form(""),
|
||||||
ocr_ollama_model: str = Form(""),
|
ocr_ollama_model: str = Form(""),
|
||||||
cpu_threads: str = Form("0"),
|
cpu_threads: str = Form("0"),
|
||||||
stt_timeout: str = Form("0"),
|
stt_timeout: str = Form("0"),
|
||||||
ollama_timeout: str = Form("600"),
|
ollama_timeout: str = Form("600"),
|
||||||
|
openrouter_url: str = Form("https://openrouter.ai/api/v1"),
|
||||||
|
openrouter_api_key: str = Form(""),
|
||||||
|
openrouter_stt_model: str = Form(""),
|
||||||
|
openrouter_ocr_model: str = Form(""),
|
||||||
user: dict = Depends(require_auth),
|
user: dict = Depends(require_auth),
|
||||||
):
|
):
|
||||||
def _int(v, d):
|
def _int(v, d):
|
||||||
try: return max(0, int(v))
|
try: return max(0, int(v))
|
||||||
except: return d
|
except: return d
|
||||||
|
|
||||||
|
current = _load_settings()
|
||||||
|
# API 키가 비어있으면 기존 값 유지
|
||||||
|
final_key = openrouter_api_key.strip() if openrouter_api_key.strip() else current.get("openrouter_api_key","")
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
"stt_ollama_model": stt_ollama_model,
|
"stt_ollama_model": stt_ollama_model,
|
||||||
"ocr_ollama_model": ocr_ollama_model,
|
"ocr_ollama_model": ocr_ollama_model,
|
||||||
"cpu_threads": _int(cpu_threads, 0),
|
"cpu_threads": _int(cpu_threads, 0),
|
||||||
"stt_timeout": _int(stt_timeout, 0),
|
"stt_timeout": _int(stt_timeout, 0),
|
||||||
"ollama_timeout": _int(ollama_timeout, 600),
|
"ollama_timeout": _int(ollama_timeout, 600),
|
||||||
|
"openrouter_url": openrouter_url.strip() or "https://openrouter.ai/api/v1",
|
||||||
|
"openrouter_api_key": final_key,
|
||||||
|
"openrouter_stt_model": openrouter_stt_model,
|
||||||
|
"openrouter_ocr_model": openrouter_ocr_model,
|
||||||
}
|
}
|
||||||
_save_settings(data)
|
_save_settings(data)
|
||||||
return {"ok": True, "settings": data}
|
return {"ok": True, "settings": {k: v for k, v in data.items() if k != "openrouter_api_key"}}
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
|
# 관리자
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
@app.get("/api/admin/users")
|
@app.get("/api/admin/users")
|
||||||
def admin_list_users(user: dict=Depends(require_admin)): return {"users": list_users()}
|
def admin_list_users(user:dict=Depends(require_admin)): return {"users":list_users()}
|
||||||
|
|
||||||
@app.post("/api/admin/users")
|
@app.post("/api/admin/users")
|
||||||
def admin_create_user(
|
def admin_create_user(
|
||||||
username: str = Form(...),
|
username:str=Form(...),password:str=Form(...),
|
||||||
password: str = Form(...),
|
perm_stt:str=Form("false"),perm_ocr:str=Form("false"),
|
||||||
perm_stt: str = Form("false"),
|
allowed_stt_models:str=Form(""),allowed_ocr_models:str=Form(""),
|
||||||
perm_ocr: str = Form("false"),
|
user:dict=Depends(require_admin),
|
||||||
allowed_stt_models: str = Form(""), # 콤마 구분 모델명
|
|
||||||
allowed_ocr_models: str = Form(""),
|
|
||||||
user: dict = Depends(require_admin),
|
|
||||||
):
|
):
|
||||||
def _parse_models(s): return [m.strip() for m in s.split(",") if m.strip()]
|
def _p(s): return [m.strip() for m in s.split(",") if m.strip()]
|
||||||
perms = {
|
perms={"stt":perm_stt.lower()=="true","ocr":perm_ocr.lower()=="true",
|
||||||
"stt": perm_stt.lower() == "true",
|
"allowed_stt_models":_p(allowed_stt_models),"allowed_ocr_models":_p(allowed_ocr_models)}
|
||||||
"ocr": perm_ocr.lower() == "true",
|
ok,msg=create_user(username,password,perms)
|
||||||
"allowed_stt_models": _parse_models(allowed_stt_models),
|
if not ok: raise HTTPException(400,msg)
|
||||||
"allowed_ocr_models": _parse_models(allowed_ocr_models),
|
return {"ok":True,"message":msg}
|
||||||
}
|
|
||||||
ok, msg = create_user(username, password, perms)
|
|
||||||
if not ok: raise HTTPException(400, msg)
|
|
||||||
return {"ok": True, "message": msg}
|
|
||||||
|
|
||||||
@app.put("/api/admin/users/{username}")
|
@app.put("/api/admin/users/{username}")
|
||||||
def admin_update_user(
|
def admin_update_user(
|
||||||
username: str,
|
username:str,perm_stt:str=Form("false"),perm_ocr:str=Form("false"),
|
||||||
perm_stt: str = Form("false"),
|
password:str=Form(""),allowed_stt_models:str=Form(""),allowed_ocr_models:str=Form(""),
|
||||||
perm_ocr: str = Form("false"),
|
user:dict=Depends(require_admin),
|
||||||
password: str = Form(""),
|
|
||||||
allowed_stt_models: str = Form(""),
|
|
||||||
allowed_ocr_models: str = Form(""),
|
|
||||||
user: dict = Depends(require_admin),
|
|
||||||
):
|
):
|
||||||
def _parse_models(s): return [m.strip() for m in s.split(",") if m.strip()]
|
def _p(s): return [m.strip() for m in s.split(",") if m.strip()]
|
||||||
perms = {
|
perms={"stt":perm_stt.lower()=="true","ocr":perm_ocr.lower()=="true",
|
||||||
"stt": perm_stt.lower() == "true",
|
"allowed_stt_models":_p(allowed_stt_models),"allowed_ocr_models":_p(allowed_ocr_models)}
|
||||||
"ocr": perm_ocr.lower() == "true",
|
ok,msg=update_user(username,perms,password or None)
|
||||||
"allowed_stt_models": _parse_models(allowed_stt_models),
|
if not ok: raise HTTPException(400,msg)
|
||||||
"allowed_ocr_models": _parse_models(allowed_ocr_models),
|
return {"ok":True,"message":msg}
|
||||||
}
|
|
||||||
ok, msg = update_user(username, perms, password or None)
|
|
||||||
if not ok: raise HTTPException(400, msg)
|
|
||||||
return {"ok": True, "message": msg}
|
|
||||||
|
|
||||||
@app.delete("/api/admin/users/{username}")
|
@app.delete("/api/admin/users/{username}")
|
||||||
def admin_delete_user(username: str, user: dict=Depends(require_admin)):
|
def admin_delete_user(username:str,user:dict=Depends(require_admin)):
|
||||||
ok, msg = delete_user(username)
|
ok,msg=delete_user(username)
|
||||||
if not ok: raise HTTPException(400, msg)
|
if not ok: raise HTTPException(400,msg)
|
||||||
return {"ok": True, "message": msg}
|
return {"ok":True,"message":msg}
|
||||||
|
|
||||||
@app.post("/api/cleanup")
|
@app.post("/api/cleanup")
|
||||||
def cleanup(user: dict=Depends(require_auth)): return {"removed": _cleanup_outputs()}
|
def cleanup(user:dict=Depends(require_auth)): return {"removed":_cleanup_outputs()}
|
||||||
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
# 유틸
|
# 유틸
|
||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
def _check_size(request: Request):
|
def _check_size(request):
|
||||||
cl = request.headers.get("content-length")
|
cl = request.headers.get("content-length")
|
||||||
if cl and int(cl) > MAX_UPLOAD_BYTES: raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
|
if cl and int(cl) > MAX_UPLOAD_BYTES:
|
||||||
|
raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
|
||||||
|
|
||||||
def _cleanup_outputs() -> int:
|
def _cleanup_outputs():
|
||||||
if OUTPUT_KEEP_SECS == 0: return 0
|
if OUTPUT_KEEP_SECS == 0: return 0
|
||||||
cutoff = time.time() - OUTPUT_KEEP_SECS; removed = 0
|
cutoff = time.time() - OUTPUT_KEEP_SECS; removed = 0
|
||||||
for f in glob.glob(os.path.join(OUTPUT_DIR, "*")):
|
for f in glob.glob(os.path.join(OUTPUT_DIR,"*")):
|
||||||
try:
|
try:
|
||||||
if os.path.getmtime(f) < cutoff: os.remove(f); removed += 1
|
if os.path.getmtime(f) < cutoff: os.remove(f); removed += 1
|
||||||
except: pass
|
except: pass
|
||||||
return removed
|
return removed
|
||||||
|
|
||||||
def _ext(fn): return fn.rsplit(".", 1)[-1].lower() if "." in fn else ""
|
def _ext(fn): return fn.rsplit(".",1)[-1].lower() if "." in fn else ""
|
||||||
|
|
||||||
async def _save(file, path):
|
async def _save(file, path):
|
||||||
written = 0
|
written = 0
|
||||||
async with aiofiles.open(path, "wb") as f:
|
async with aiofiles.open(path,"wb") as f:
|
||||||
while chunk := await file.read(1024 * 1024):
|
while chunk := await file.read(1024*1024):
|
||||||
written += len(chunk)
|
written += len(chunk)
|
||||||
if written > MAX_UPLOAD_BYTES:
|
if written > MAX_UPLOAD_BYTES:
|
||||||
await f.close(); os.remove(path)
|
await f.close(); os.remove(path)
|
||||||
|
|||||||
281
app/ocr_tasks.py
281
app/ocr_tasks.py
@@ -1,9 +1,8 @@
|
|||||||
"""
|
"""
|
||||||
OCR Celery Tasks — PaddleOCR 3.x + Ollama Vision
|
OCR Celery Tasks
|
||||||
|
backend: paddle | ollama | openrouter
|
||||||
"""
|
"""
|
||||||
import os
|
import os, base64
|
||||||
import base64
|
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from celery import Celery
|
from celery import Celery
|
||||||
import openpyxl
|
import openpyxl
|
||||||
@@ -17,11 +16,8 @@ OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "600"))
|
|||||||
|
|
||||||
celery_app = Celery("ocr_tasks", broker=REDIS_URL, backend=REDIS_URL)
|
celery_app = Celery("ocr_tasks", broker=REDIS_URL, backend=REDIS_URL)
|
||||||
celery_app.conf.update(
|
celery_app.conf.update(
|
||||||
task_serializer="json",
|
task_serializer="json", result_serializer="json",
|
||||||
result_serializer="json",
|
accept_content=["json"], task_track_started=True, result_expires=3600,
|
||||||
accept_content=["json"],
|
|
||||||
task_track_started=True,
|
|
||||||
result_expires=3600,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
_ocr_engine = None
|
_ocr_engine = None
|
||||||
@@ -46,12 +42,28 @@ def get_structure():
|
|||||||
return _struct_engine
|
return _struct_engine
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
|
# 메인 Task
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
@celery_app.task(bind=True, name="tasks.ocr_task", queue="ocr")
|
@celery_app.task(bind=True, name="tasks.ocr_task", queue="ocr")
|
||||||
def ocr_task(self, file_id, image_path, mode="text",
|
def ocr_task(
|
||||||
backend="paddle", ollama_model="granite3.2-vision", custom_prompt=""):
|
self,
|
||||||
self.update_state(state="PROGRESS", meta={"progress": 8, "message": "엔진 준비 중..."})
|
file_id: str,
|
||||||
|
image_path: str,
|
||||||
|
mode: str = "text",
|
||||||
|
backend: str = "paddle",
|
||||||
|
ollama_model: str = "granite3.2-vision",
|
||||||
|
openrouter_model: str = "",
|
||||||
|
openrouter_url: str = "",
|
||||||
|
openrouter_key: str = "",
|
||||||
|
custom_prompt: str = "",
|
||||||
|
):
|
||||||
|
self.update_state(state="PROGRESS", meta={"progress":8,"message":"엔진 준비 중..."})
|
||||||
try:
|
try:
|
||||||
if backend == "ollama":
|
if backend == "openrouter":
|
||||||
|
result = _run_openrouter(self, file_id, image_path, mode,
|
||||||
|
openrouter_model, openrouter_url, openrouter_key, custom_prompt)
|
||||||
|
elif backend == "ollama":
|
||||||
result = _run_ollama(self, file_id, image_path, mode, ollama_model, custom_prompt)
|
result = _run_ollama(self, file_id, image_path, mode, ollama_model, custom_prompt)
|
||||||
else:
|
else:
|
||||||
result = _run_paddle(self, file_id, image_path, mode)
|
result = _run_paddle(self, file_id, image_path, mode)
|
||||||
@@ -64,34 +76,124 @@ def ocr_task(self, file_id, image_path, mode="text",
|
|||||||
raise Exception(f"OCR 실패: {str(e)}")
|
raise Exception(f"OCR 실패: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
_OLLAMA_PROMPTS = {
|
# ════════════════════════════════════════════════════════════════
|
||||||
|
# OpenRouter Vision 백엔드 (OpenAI 호환)
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
|
_PROMPTS = {
|
||||||
"text": "이 이미지에서 모든 텍스트를 정확하게 추출해줘. 원본의 줄 구분과 단락 구조를 유지해줘.",
|
"text": "이 이미지에서 모든 텍스트를 정확하게 추출해줘. 원본의 줄 구분과 단락 구조를 유지해줘.",
|
||||||
"structure": "이 이미지를 분석해서 표는 마크다운 표 형식으로, 나머지 텍스트는 원본 구조를 유지하며 추출해줘.",
|
"structure": "이 이미지를 분석해서 표는 마크다운 표 형식으로, 나머지 텍스트는 원본 구조를 유지하며 추출해줘.",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _run_openrouter(task, file_id, image_path, mode,
|
||||||
|
model, base_url, api_key, custom_prompt):
|
||||||
|
if not api_key:
|
||||||
|
raise Exception("OpenRouter API 키가 설정되지 않았습니다")
|
||||||
|
if not model:
|
||||||
|
raise Exception("OpenRouter 모델이 선택되지 않았습니다")
|
||||||
|
|
||||||
|
task.update_state(state="PROGRESS",
|
||||||
|
meta={"progress":15,"message":f"OpenRouter ({model}) 연결 중..."})
|
||||||
|
|
||||||
|
with open(image_path, "rb") as f:
|
||||||
|
raw = f.read()
|
||||||
|
|
||||||
|
# 이미지 MIME 타입 감지
|
||||||
|
ext = image_path.rsplit(".", 1)[-1].lower()
|
||||||
|
mime = {"jpg":"image/jpeg","jpeg":"image/jpeg","png":"image/png",
|
||||||
|
"bmp":"image/bmp","gif":"image/gif","webp":"image/webp"}.get(ext, "image/jpeg")
|
||||||
|
b64 = base64.b64encode(raw).decode()
|
||||||
|
data_url = f"data:{mime};base64,{b64}"
|
||||||
|
|
||||||
|
prompt = custom_prompt.strip() or _PROMPTS.get(mode, _PROMPTS["text"])
|
||||||
|
|
||||||
|
task.update_state(state="PROGRESS", meta={"progress":30,"message":"모델 추론 중..."})
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = httpx.post(
|
||||||
|
f"{base_url.rstrip('/')}/chat/completions",
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {api_key}",
|
||||||
|
"HTTP-Referer": "https://voicescript.local",
|
||||||
|
"X-Title": "VoiceScript",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
json={
|
||||||
|
"model": model,
|
||||||
|
"messages": [{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "image_url", "image_url": {"url": data_url}},
|
||||||
|
{"type": "text", "text": prompt},
|
||||||
|
],
|
||||||
|
}],
|
||||||
|
"temperature": 0.1,
|
||||||
|
},
|
||||||
|
timeout=float(OLLAMA_TIMEOUT),
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
body = ""
|
||||||
|
try: body = e.response.json().get("error",{}).get("message","")
|
||||||
|
except: pass
|
||||||
|
if e.response.status_code == 400:
|
||||||
|
raise Exception(f"이 모델은 이미지를 지원하지 않습니다 — Vision 모델을 선택하세요\n({model})")
|
||||||
|
raise Exception(f"OpenRouter 오류 ({e.response.status_code}): {body or str(e)}")
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
raise Exception(f"OpenRouter 응답 시간 초과. OLLAMA_TIMEOUT 값을 늘려주세요.")
|
||||||
|
|
||||||
|
task.update_state(state="PROGRESS", meta={"progress":85,"message":"결과 저장 중..."})
|
||||||
|
|
||||||
|
full_text = resp.json()["choices"][0]["message"]["content"].strip()
|
||||||
|
if not full_text:
|
||||||
|
raise Exception("OpenRouter 빈 응답")
|
||||||
|
|
||||||
|
tables = _parse_md_tables(full_text) if mode == "structure" else []
|
||||||
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||||
|
txt_file = f"{file_id}_ocr.txt"
|
||||||
|
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
|
||||||
|
f.write(f"# OCR 결과 (OpenRouter / {model})\n\n{full_text}")
|
||||||
|
xlsx_file = None
|
||||||
|
if tables:
|
||||||
|
xlsx_file = f"{file_id}_tables.xlsx"
|
||||||
|
_save_excel(tables, os.path.join(OUTPUT_DIR, xlsx_file))
|
||||||
|
tables_html = [_md_table_to_html(t) for t in tables]
|
||||||
|
lines = [{"text":l,"confidence":1.0,"bbox":[]} for l in full_text.splitlines() if l.strip()]
|
||||||
|
return {
|
||||||
|
"mode": mode, "backend": "openrouter", "openrouter_model": model,
|
||||||
|
"ollama_model": "",
|
||||||
|
"full_text": full_text, "lines": lines, "line_count": len(lines),
|
||||||
|
"txt_file": txt_file,
|
||||||
|
"tables": [{"html":h,"rows":len(t),"cols":max(len(r) for r in t) if t else 0}
|
||||||
|
for h, t in zip(tables_html, tables)],
|
||||||
|
"xlsx_file": xlsx_file,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
|
# Ollama Vision 백엔드
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
def _run_ollama(task, file_id, image_path, mode, ollama_model, custom_prompt):
|
def _run_ollama(task, file_id, image_path, mode, ollama_model, custom_prompt):
|
||||||
task.update_state(state="PROGRESS",
|
task.update_state(state="PROGRESS",
|
||||||
meta={"progress": 15, "message": f"Ollama ({ollama_model}) 연결 중..."})
|
meta={"progress":15,"message":f"Ollama ({ollama_model}) 연결 중..."})
|
||||||
with open(image_path, "rb") as f:
|
with open(image_path, "rb") as f:
|
||||||
img_b64 = base64.b64encode(f.read()).decode()
|
img_b64 = base64.b64encode(f.read()).decode()
|
||||||
prompt = custom_prompt.strip() or _OLLAMA_PROMPTS.get(mode, _OLLAMA_PROMPTS["text"])
|
prompt = custom_prompt.strip() or _PROMPTS.get(mode, _PROMPTS["text"])
|
||||||
task.update_state(state="PROGRESS", meta={"progress": 30, "message": "모델 추론 중..."})
|
task.update_state(state="PROGRESS", meta={"progress":30,"message":"모델 추론 중..."})
|
||||||
try:
|
try:
|
||||||
resp = httpx.post(f"{OLLAMA_URL}/api/chat", json={
|
resp = httpx.post(f"{OLLAMA_URL}/api/chat", json={
|
||||||
"model": ollama_model,
|
"model": ollama_model,
|
||||||
"messages": [{"role": "user", "content": prompt, "images": [img_b64]}],
|
"messages": [{"role":"user","content":prompt,"images":[img_b64]}],
|
||||||
"stream": False, "options": {"temperature": 0.1},
|
"stream": False, "options": {"temperature":0.1},
|
||||||
}, timeout=float(OLLAMA_TIMEOUT))
|
}, timeout=float(OLLAMA_TIMEOUT))
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
except httpx.ConnectError:
|
except httpx.ConnectError:
|
||||||
raise Exception(f"Ollama 서버 연결 실패 ({OLLAMA_URL})")
|
raise Exception(f"Ollama 서버 연결 실패 ({OLLAMA_URL})")
|
||||||
except httpx.TimeoutException:
|
except httpx.TimeoutException:
|
||||||
raise Exception(f"Ollama 응답 시간 초과 ({OLLAMA_TIMEOUT}초). OLLAMA_TIMEOUT 값을 늘려주세요.")
|
raise Exception(f"Ollama 응답 시간 초과 ({OLLAMA_TIMEOUT}초)")
|
||||||
|
|
||||||
task.update_state(state="PROGRESS", meta={"progress": 85, "message": "결과 저장 중..."})
|
task.update_state(state="PROGRESS", meta={"progress":85,"message":"결과 저장 중..."})
|
||||||
full_text = resp.json().get("message", {}).get("content", "").strip()
|
full_text = resp.json().get("message",{}).get("content","").strip()
|
||||||
if not full_text:
|
if not full_text: raise Exception("Ollama 빈 응답. 모델이 Vision을 지원하는지 확인하세요.")
|
||||||
raise Exception("Ollama 빈 응답. 모델이 설치되어 있는지 확인하세요.")
|
|
||||||
|
|
||||||
tables = _parse_md_tables(full_text) if mode == "structure" else []
|
tables = _parse_md_tables(full_text) if mode == "structure" else []
|
||||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||||
@@ -103,74 +205,66 @@ def _run_ollama(task, file_id, image_path, mode, ollama_model, custom_prompt):
|
|||||||
xlsx_file = f"{file_id}_tables.xlsx"
|
xlsx_file = f"{file_id}_tables.xlsx"
|
||||||
_save_excel(tables, os.path.join(OUTPUT_DIR, xlsx_file))
|
_save_excel(tables, os.path.join(OUTPUT_DIR, xlsx_file))
|
||||||
tables_html = [_md_table_to_html(t) for t in tables]
|
tables_html = [_md_table_to_html(t) for t in tables]
|
||||||
lines = [{"text": l, "confidence": 1.0, "bbox": []}
|
lines = [{"text":l,"confidence":1.0,"bbox":[]} for l in full_text.splitlines() if l.strip()]
|
||||||
for l in full_text.splitlines() if l.strip()]
|
|
||||||
return {
|
return {
|
||||||
"mode": mode, "backend": "ollama", "ollama_model": ollama_model,
|
"mode": mode, "backend": "ollama", "ollama_model": ollama_model,
|
||||||
|
"openrouter_model": "",
|
||||||
"full_text": full_text, "lines": lines, "line_count": len(lines),
|
"full_text": full_text, "lines": lines, "line_count": len(lines),
|
||||||
"txt_file": txt_file,
|
"txt_file": txt_file,
|
||||||
"tables": [{"html": h, "rows": len(t), "cols": max(len(r) for r in t) if t else 0}
|
"tables": [{"html":h,"rows":len(t),"cols":max(len(r) for r in t) if t else 0}
|
||||||
for h, t in zip(tables_html, tables)],
|
for h, t in zip(tables_html, tables)],
|
||||||
"xlsx_file": xlsx_file,
|
"xlsx_file": xlsx_file,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
|
# PaddleOCR 백엔드
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
def _run_paddle(task, file_id, image_path, mode):
|
def _run_paddle(task, file_id, image_path, mode):
|
||||||
import cv2
|
import cv2
|
||||||
img = cv2.imread(image_path)
|
img = cv2.imread(image_path)
|
||||||
if img is None:
|
if img is None: raise ValueError("이미지를 읽을 수 없습니다")
|
||||||
raise ValueError("이미지를 읽을 수 없습니다")
|
|
||||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||||
return _paddle_structure(task, file_id, img) if mode == "structure" \
|
return _paddle_structure(task, file_id, img) if mode == "structure" else _paddle_text(task, file_id, img)
|
||||||
else _paddle_text(task, file_id, img)
|
|
||||||
|
|
||||||
def _paddle_text(task, file_id, img):
|
def _paddle_text(task, file_id, img):
|
||||||
task.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 인식 중..."})
|
task.update_state(state="PROGRESS", meta={"progress":30,"message":"텍스트 인식 중..."})
|
||||||
result = get_ocr().ocr(img)
|
result = get_ocr().ocr(img)
|
||||||
task.update_state(state="PROGRESS", meta={"progress": 80, "message": "결과 정리 중..."})
|
task.update_state(state="PROGRESS", meta={"progress":80,"message":"결과 정리 중..."})
|
||||||
lines = []
|
lines = []
|
||||||
if result and len(result) > 0:
|
if result and len(result) > 0:
|
||||||
r = result[0]
|
r = result[0]
|
||||||
if isinstance(r, dict):
|
if isinstance(r, dict):
|
||||||
texts = r.get("rec_texts", [])
|
for text, conf in zip(r.get("rec_texts",[]), r.get("rec_scores",[])):
|
||||||
scores = r.get("rec_scores", [])
|
if text.strip(): lines.append({"text":text,"confidence":round(float(conf),3),"bbox":[]})
|
||||||
for text, conf in zip(texts, scores):
|
|
||||||
if text.strip():
|
|
||||||
lines.append({"text": text, "confidence": round(float(conf), 3), "bbox": []})
|
|
||||||
elif isinstance(r, list):
|
elif isinstance(r, list):
|
||||||
for item in r:
|
for item in r:
|
||||||
if item and len(item) == 2:
|
if item and len(item)==2:
|
||||||
_, (text, conf) = item
|
_, (text, conf) = item
|
||||||
if text.strip():
|
if text.strip(): lines.append({"text":text,"confidence":round(float(conf),3),"bbox":[]})
|
||||||
lines.append({"text": text, "confidence": round(float(conf), 3), "bbox": []})
|
|
||||||
full_text = "\n".join(l["text"] for l in lines)
|
full_text = "\n".join(l["text"] for l in lines)
|
||||||
txt_file = f"{file_id}_ocr.txt"
|
txt_file = f"{file_id}_ocr.txt"
|
||||||
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
|
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f: f.write(full_text)
|
||||||
f.write(full_text)
|
return {"mode":"text","backend":"paddle","ollama_model":"","openrouter_model":"",
|
||||||
return {"mode": "text", "backend": "paddle", "ollama_model": "",
|
"full_text":full_text,"lines":lines,"line_count":len(lines),
|
||||||
"full_text": full_text, "lines": lines,
|
"txt_file":txt_file,"tables":[],"xlsx_file":None}
|
||||||
"line_count": len(lines), "txt_file": txt_file,
|
|
||||||
"tables": [], "xlsx_file": None}
|
|
||||||
|
|
||||||
def _paddle_structure(task, file_id, img):
|
def _paddle_structure(task, file_id, img):
|
||||||
task.update_state(state="PROGRESS", meta={"progress": 20, "message": "레이아웃 분석 중..."})
|
task.update_state(state="PROGRESS", meta={"progress":20,"message":"레이아웃 분석 중..."})
|
||||||
result = get_structure()(img)
|
result = get_structure()(img)
|
||||||
task.update_state(state="PROGRESS", meta={"progress": 60, "message": "표 구조 추출 중..."})
|
task.update_state(state="PROGRESS", meta={"progress":60,"message":"표 구조 추출 중..."})
|
||||||
text_blocks, tables_html, tables_data = [], [], []
|
text_blocks, tables_html, tables_data = [], [], []
|
||||||
for region in result:
|
for region in result:
|
||||||
rtype = region.get("type", "").lower()
|
rtype = region.get("type","").lower()
|
||||||
if rtype == "table":
|
if rtype == "table":
|
||||||
html = region.get("res", {}).get("html", "")
|
html = region.get("res",{}).get("html","")
|
||||||
if html:
|
if html: tables_html.append(html); tables_data.append(_html_table_to_list(html))
|
||||||
tables_html.append(html)
|
elif rtype in ("text","title","figure_caption"):
|
||||||
tables_data.append(_html_table_to_list(html))
|
for line in (region.get("res",[]) or []):
|
||||||
elif rtype in ("text", "title", "figure_caption"):
|
if isinstance(line,(list,tuple)) and len(line)==2:
|
||||||
for line in (region.get("res", []) or []):
|
_, (text, _conf) = line; text_blocks.append(text)
|
||||||
if isinstance(line, (list, tuple)) and len(line) == 2:
|
|
||||||
_, (text, _conf) = line
|
|
||||||
text_blocks.append(text)
|
|
||||||
full_text = "\n".join(text_blocks)
|
full_text = "\n".join(text_blocks)
|
||||||
task.update_state(state="PROGRESS", meta={"progress": 80, "message": "Excel 생성 중..."})
|
task.update_state(state="PROGRESS", meta={"progress":80,"message":"Excel 생성 중..."})
|
||||||
xlsx_file = None
|
xlsx_file = None
|
||||||
if tables_data:
|
if tables_data:
|
||||||
xlsx_file = f"{file_id}_tables.xlsx"
|
xlsx_file = f"{file_id}_tables.xlsx"
|
||||||
@@ -178,15 +272,17 @@ def _paddle_structure(task, file_id, img):
|
|||||||
txt_file = f"{file_id}_ocr.txt"
|
txt_file = f"{file_id}_ocr.txt"
|
||||||
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
|
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
|
||||||
f.write("# 텍스트\n\n" + full_text)
|
f.write("# 텍스트\n\n" + full_text)
|
||||||
lines = [{"text": t, "confidence": 1.0, "bbox": []} for t in text_blocks]
|
lines = [{"text":t,"confidence":1.0,"bbox":[]} for t in text_blocks]
|
||||||
tables_meta = [{"html": h, "rows": len(d), "cols": max(len(r) for r in d) if d else 0}
|
tables_meta = [{"html":h,"rows":len(d),"cols":max(len(r) for r in d) if d else 0}
|
||||||
for h, d in zip(tables_html, tables_data)]
|
for h, d in zip(tables_html, tables_data)]
|
||||||
return {"mode": "structure", "backend": "paddle", "ollama_model": "",
|
return {"mode":"structure","backend":"paddle","ollama_model":"","openrouter_model":"",
|
||||||
"full_text": full_text, "lines": lines,
|
"full_text":full_text,"lines":lines,"line_count":len(lines),
|
||||||
"line_count": len(lines), "txt_file": txt_file,
|
"txt_file":txt_file,"tables":tables_meta,"xlsx_file":xlsx_file}
|
||||||
"tables": tables_meta, "xlsx_file": xlsx_file}
|
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
|
# 공통 유틸
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
def _parse_md_tables(text):
|
def _parse_md_tables(text):
|
||||||
tables, current = [], []
|
tables, current = [], []
|
||||||
for line in text.splitlines():
|
for line in text.splitlines():
|
||||||
@@ -204,8 +300,8 @@ def _md_table_to_html(table):
|
|||||||
if not table: return ""
|
if not table: return ""
|
||||||
rows = ""
|
rows = ""
|
||||||
for i, row in enumerate(table):
|
for i, row in enumerate(table):
|
||||||
tag = "th" if i == 0 else "td"
|
tag = "th" if i==0 else "td"
|
||||||
rows += "<tr>" + "".join(f"<{tag}>{c}</{tag}>" for c in row) + "</tr>"
|
rows += "<tr>"+"".join(f"<{tag}>{c}</{tag}>" for c in row)+"</tr>"
|
||||||
return f"<table>{rows}</table>"
|
return f"<table>{rows}</table>"
|
||||||
|
|
||||||
def _html_table_to_list(html):
|
def _html_table_to_list(html):
|
||||||
@@ -213,36 +309,31 @@ def _html_table_to_list(html):
|
|||||||
class P(HTMLParser):
|
class P(HTMLParser):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.rows, self._row, self._cell, self._in = [], [], [], False
|
self.rows,self._row,self._cell,self._in=[],[],[],False
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self,tag,attrs):
|
||||||
if tag == "tr": self._row = []
|
if tag=="tr": self._row=[]
|
||||||
elif tag in ("td","th"): self._cell = []; self._in = True
|
elif tag in("td","th"): self._cell=[];self._in=True
|
||||||
def handle_endtag(self, tag):
|
def handle_endtag(self,tag):
|
||||||
if tag in ("td","th"): self._row.append("".join(self._cell).strip()); self._in = False
|
if tag in("td","th"): self._row.append("".join(self._cell).strip());self._in=False
|
||||||
elif tag == "tr":
|
elif tag=="tr":
|
||||||
if self._row: self.rows.append(self._row)
|
if self._row: self.rows.append(self._row)
|
||||||
def handle_data(self, data):
|
def handle_data(self,data):
|
||||||
if self._in: self._cell.append(data)
|
if self._in: self._cell.append(data)
|
||||||
p = P(); p.feed(html); return p.rows
|
p=P();p.feed(html);return p.rows
|
||||||
|
|
||||||
def _save_excel(tables, path):
|
def _save_excel(tables, path):
|
||||||
wb = openpyxl.Workbook(); wb.remove(wb.active)
|
wb=openpyxl.Workbook();wb.remove(wb.active)
|
||||||
for i, table in enumerate(tables, 1):
|
for i,table in enumerate(tables,1):
|
||||||
ws = wb.create_sheet(f"표 {i}")
|
ws=wb.create_sheet(f"표 {i}")
|
||||||
thin = Side(style="thin", color="2A2A33")
|
thin=Side(style="thin",color="2A2A33");bdr=Border(left=thin,right=thin,top=thin,bottom=thin)
|
||||||
bdr = Border(left=thin, right=thin, top=thin, bottom=thin)
|
for r_idx,row in enumerate(table,1):
|
||||||
for r_idx, row in enumerate(table, 1):
|
for c_idx,val in enumerate(row,1):
|
||||||
for c_idx, val in enumerate(row, 1):
|
cell=ws.cell(row=r_idx,column=c_idx,value=val)
|
||||||
cell = ws.cell(row=r_idx, column=c_idx, value=val)
|
cell.border=bdr;cell.alignment=Alignment(horizontal="center",vertical="center",wrap_text=True)
|
||||||
cell.border = bdr
|
if r_idx==1: cell.fill=PatternFill("solid",fgColor="1A1A2E");cell.font=Font(color="00E5A0",bold=True,size=10)
|
||||||
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
|
else: cell.font=Font(size=10)
|
||||||
if r_idx == 1:
|
|
||||||
cell.fill = PatternFill("solid", fgColor="1A1A2E")
|
|
||||||
cell.font = Font(color="00E5A0", bold=True, size=10)
|
|
||||||
else:
|
|
||||||
cell.font = Font(size=10)
|
|
||||||
for col in ws.columns:
|
for col in ws.columns:
|
||||||
w = max((len(str(c.value or "")) for c in col), default=8)
|
w=max((len(str(c.value or "")) for c in col),default=8)
|
||||||
ws.column_dimensions[col[0].column_letter].width = min(w + 4, 40)
|
ws.column_dimensions[col[0].column_letter].width=min(w+4,40)
|
||||||
if not wb.sheetnames: wb.create_sheet("Sheet1")
|
if not wb.sheetnames: wb.create_sheet("Sheet1")
|
||||||
wb.save(path)
|
wb.save(path)
|
||||||
|
|||||||
@@ -251,6 +251,17 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
|
|||||||
.ollama-status{font-family:var(--mono);font-size:.63rem;padding:4px 9px;border-radius:2px}
|
.ollama-status{font-family:var(--mono);font-size:.63rem;padding:4px 9px;border-radius:2px}
|
||||||
.ollama-status.ok{background:rgba(0,229,160,.1);color:var(--accent);border:1px solid rgba(0,229,160,.2)}
|
.ollama-status.ok{background:rgba(0,229,160,.1);color:var(--accent);border:1px solid rgba(0,229,160,.2)}
|
||||||
.ollama-status.fail{background:rgba(255,107,53,.1);color:var(--warn);border:1px solid rgba(255,107,53,.2)}
|
.ollama-status.fail{background:rgba(255,107,53,.1);color:var(--warn);border:1px solid rgba(255,107,53,.2)}
|
||||||
|
.openrouter-status.ok{background:rgba(77,166,255,.1);color:var(--blue);border:1px solid rgba(77,166,255,.2)}
|
||||||
|
.openrouter-status.fail{background:rgba(255,107,53,.1);color:var(--warn);border:1px solid rgba(255,107,53,.2)}
|
||||||
|
.or-section{margin-top:10px;padding:12px;background:var(--surf2);border:1px solid #1c2840;border-radius:4px}
|
||||||
|
.key-input-wrap{display:flex;gap:6px;margin-top:6px}
|
||||||
|
.key-input-wrap input{flex:1;background:var(--surf);border:1px solid var(--border2);color:var(--text);padding:9px 10px;border-radius:3px;font-family:var(--mono);font-size:.78rem;outline:none;-webkit-appearance:none}
|
||||||
|
.key-input-wrap input:focus{border-color:var(--blue)}
|
||||||
|
.btn-test{padding:9px 14px;background:none;border:1px solid #3a7cc4;color:var(--blue);border-radius:3px;font-family:var(--mono);font-size:.68rem;cursor:pointer;white-space:nowrap;transition:all .15s}
|
||||||
|
.btn-test:hover{background:rgba(77,166,255,.08)}
|
||||||
|
.or-model-tabs{display:flex;gap:5px;margin-top:8px;flex-wrap:wrap}
|
||||||
|
.or-model-tab{font-family:var(--mono);font-size:.6rem;padding:4px 10px;border:1px solid var(--border2);background:none;color:var(--muted);border-radius:2px;cursor:pointer;transition:all .12s;text-transform:uppercase}
|
||||||
|
.or-model-tab.active{border-color:var(--blue);color:var(--blue);background:rgba(77,166,255,.07)}
|
||||||
|
|
||||||
/* ── ADMIN ── */
|
/* ── ADMIN ── */
|
||||||
#page-admin{display:none;flex-direction:column}
|
#page-admin{display:none;flex-direction:column}
|
||||||
@@ -372,11 +383,17 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
|
|||||||
<div class="engine-btns">
|
<div class="engine-btns">
|
||||||
<button class="engine-btn active" data-engine="whisper"><span class="e-icon">⚡</span><span class="e-name">faster-whisper</span><span class="e-desc">로컬 CPU 변환<br>빠르고 안정적</span></button>
|
<button class="engine-btn active" data-engine="whisper"><span class="e-icon">⚡</span><span class="e-name">faster-whisper</span><span class="e-desc">로컬 CPU 변환<br>빠르고 안정적</span></button>
|
||||||
<button class="engine-btn" data-engine="whisper+ollama"><span class="e-icon">🦙</span><span class="e-name">+ Ollama 교정</span><span class="e-desc">Whisper 후<br>Ollama 교정</span></button>
|
<button class="engine-btn" data-engine="whisper+ollama"><span class="e-icon">🦙</span><span class="e-name">+ Ollama 교정</span><span class="e-desc">Whisper 후<br>Ollama 교정</span></button>
|
||||||
|
<button class="engine-btn" data-engine="whisper+openrouter" style="grid-column:1/-1"><span class="e-icon">🌐</span><span class="e-name">+ OpenRouter 교정</span><span class="e-desc">외부 AI 모델로 문장 부호·맞춤법 교정 (텍스트 전용 모델도 사용 가능)</span></button>
|
||||||
</div>
|
</div>
|
||||||
<div class="ollama-opts" id="stt-ollama-opts">
|
<div class="ollama-opts" id="stt-ollama-opts">
|
||||||
<div class="sec-label" style="margin-top:0">후처리 모델</div>
|
<div class="sec-label" style="margin-top:0">후처리 모델</div>
|
||||||
<select class="model-select" id="stt-ollama-model"><option value="">설정 기본 모델 사용</option></select>
|
<select class="model-select" id="stt-ollama-model"><option value="">설정 기본 모델 사용</option></select>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="ollama-opts" id="stt-or-opts">
|
||||||
|
<div class="sec-label" style="margin-top:0">OpenRouter 후처리 모델</div>
|
||||||
|
<select class="model-select" id="stt-or-model"><option value="">설정 기본 모델 사용</option></select>
|
||||||
|
<div style="font-family:var(--mono);font-size:.6rem;color:var(--muted);margin-top:5px">⚙️ 설정 → OpenRouter에서 API 키 및 기본 모델을 설정하세요</div>
|
||||||
|
</div>
|
||||||
<button class="btn-start green" id="stt-btn" disabled>변환 시작</button>
|
<button class="btn-start green" id="stt-btn" disabled>변환 시작</button>
|
||||||
<div class="prog-box" id="stt-prog">
|
<div class="prog-box" id="stt-prog">
|
||||||
<div class="prog-header"><span class="prog-msg" id="stt-pmsg">처리 중...</span><span class="prog-pct" id="stt-ppct">0%</span></div>
|
<div class="prog-header"><span class="prog-msg" id="stt-pmsg">처리 중...</span><span class="prog-pct" id="stt-ppct">0%</span></div>
|
||||||
@@ -428,6 +445,7 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
|
|||||||
<div class="engine-btns">
|
<div class="engine-btns">
|
||||||
<button class="engine-btn active" data-engine="paddle"><span class="e-icon">🐾</span><span class="e-name">PaddleOCR</span><span class="e-desc">로컬 실행<br>표 구조 분석</span></button>
|
<button class="engine-btn active" data-engine="paddle"><span class="e-icon">🐾</span><span class="e-name">PaddleOCR</span><span class="e-desc">로컬 실행<br>표 구조 분석</span></button>
|
||||||
<button class="engine-btn" data-engine="ollama"><span class="e-icon">🦙</span><span class="e-name">Ollama Vision</span><span class="e-desc">자연어 지시<br>커스텀 프롬프트</span></button>
|
<button class="engine-btn" data-engine="ollama"><span class="e-icon">🦙</span><span class="e-name">Ollama Vision</span><span class="e-desc">자연어 지시<br>커스텀 프롬프트</span></button>
|
||||||
|
<button class="engine-btn" data-engine="openrouter" style="grid-column:1/-1"><span class="e-icon">🌐</span><span class="e-name">OpenRouter Vision</span><span class="e-desc">Claude / GPT-4o / Gemini 등 외부 Vision 모델 사용</span></button>
|
||||||
</div>
|
</div>
|
||||||
<div class="ollama-opts" id="ocr-ollama-opts">
|
<div class="ollama-opts" id="ocr-ollama-opts">
|
||||||
<div class="sec-label" style="margin-top:0">Vision 모델</div>
|
<div class="sec-label" style="margin-top:0">Vision 모델</div>
|
||||||
@@ -435,6 +453,13 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
|
|||||||
<span class="cprompt-toggle" id="cprompt-toggle">▶ 커스텀 프롬프트</span>
|
<span class="cprompt-toggle" id="cprompt-toggle">▶ 커스텀 프롬프트</span>
|
||||||
<textarea class="cprompt" id="custom-prompt" placeholder="예: 이 영수증의 품목과 금액을 JSON으로 추출해줘"></textarea>
|
<textarea class="cprompt" id="custom-prompt" placeholder="예: 이 영수증의 품목과 금액을 JSON으로 추출해줘"></textarea>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="ollama-opts" id="ocr-or-opts">
|
||||||
|
<div class="sec-label" style="margin-top:0">OpenRouter Vision 모델</div>
|
||||||
|
<select class="model-select" id="ocr-or-model"><option value="">설정 기본 모델 사용</option></select>
|
||||||
|
<span class="cprompt-toggle" id="cprompt-toggle-or">▶ 커스텀 프롬프트</span>
|
||||||
|
<textarea class="cprompt" id="custom-prompt-or" placeholder="예: 이 영수증의 품목과 금액을 JSON으로 추출해줘"></textarea>
|
||||||
|
<div style="font-family:var(--mono);font-size:.6rem;color:var(--muted);margin-top:5px">⚠️ Vision 기능을 지원하는 모델만 이미지 처리 가능 (Claude-3, GPT-4o, Gemini 등)</div>
|
||||||
|
</div>
|
||||||
<div class="sec-label">인식 모드</div>
|
<div class="sec-label">인식 모드</div>
|
||||||
<div class="mode-btns">
|
<div class="mode-btns">
|
||||||
<button class="mode-btn active" data-mode="text">📄 텍스트 추출</button>
|
<button class="mode-btn active" data-mode="text">📄 텍스트 추출</button>
|
||||||
@@ -560,6 +585,40 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
|
|||||||
<select class="settings-select" id="setting-ocr-model" style="margin-top:8px"><option value="">(없음)</option></select>
|
<select class="settings-select" id="setting-ocr-model" style="margin-top:8px"><option value="">(없음)</option></select>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- OpenRouter -->
|
||||||
|
<div class="settings-section">
|
||||||
|
<h3>🌐 OpenRouter 외부 AI 연동</h3>
|
||||||
|
<label class="settings-label">API 키<small>openrouter.ai에서 발급 — 저장 후 "연결 테스트"로 확인</small></label>
|
||||||
|
<div class="key-input-wrap">
|
||||||
|
<input type="password" id="or-api-key" placeholder="sk-or-v1-..." autocomplete="off">
|
||||||
|
<button class="btn-test" id="btn-or-test">연결 테스트</button>
|
||||||
|
</div>
|
||||||
|
<div id="or-test-result" style="font-family:var(--mono);font-size:.68rem;margin-top:6px;display:none"></div>
|
||||||
|
|
||||||
|
<label class="settings-label" style="margin-top:12px">API URL<small>기본값 사용 권장</small></label>
|
||||||
|
<input type="text" id="or-url" value="https://openrouter.ai/api/v1"
|
||||||
|
style="width:100%;background:var(--surf2);border:1px solid var(--border2);color:var(--text);padding:9px 10px;border-radius:3px;font-family:var(--mono);font-size:.75rem;outline:none;margin-top:6px">
|
||||||
|
|
||||||
|
<div id="or-models-wrap" style="display:none;margin-top:14px">
|
||||||
|
<div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;flex-wrap:wrap">
|
||||||
|
<span id="or-connected-badge" class="openrouter-status ok"></span>
|
||||||
|
<div class="or-model-tabs">
|
||||||
|
<button class="or-model-tab active" data-filter="vision">Vision 모델</button>
|
||||||
|
<button class="or-model-tab" data-filter="text">텍스트 모델</button>
|
||||||
|
<button class="or-model-tab" data-filter="all">전체</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<label class="settings-label">STT 교정 기본 모델<small>텍스트 전용 모델도 사용 가능</small></label>
|
||||||
|
<select class="settings-select" id="setting-or-stt-model" style="margin-top:6px">
|
||||||
|
<option value="">(없음)</option>
|
||||||
|
</select>
|
||||||
|
<label class="settings-label" style="margin-top:10px">OCR 기본 Vision 모델<small>반드시 Vision 지원 모델 선택</small></label>
|
||||||
|
<select class="settings-select" id="setting-or-ocr-model" style="margin-top:6px">
|
||||||
|
<option value="">(없음)</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div style="display:flex;gap:10px;justify-content:flex-end;align-items:center">
|
<div style="display:flex;gap:10px;justify-content:flex-end;align-items:center">
|
||||||
<div id="settings-msg" style="font-family:var(--mono);font-size:.68rem;color:var(--accent);display:none">✓ 저장됨 (CPU·타임아웃: worker 재시작 후 반영)</div>
|
<div id="settings-msg" style="font-family:var(--mono);font-size:.68rem;color:var(--accent);display:none">✓ 저장됨 (CPU·타임아웃: worker 재시작 후 반영)</div>
|
||||||
<button class="btn-settings blue" id="btn-save-settings">저장</button>
|
<button class="btn-settings blue" id="btn-save-settings">저장</button>
|
||||||
@@ -643,6 +702,7 @@ textarea.cprompt{width:100%;background:var(--surf);border:1px solid var(--border
|
|||||||
// ══ STATE ══
|
// ══ STATE ══
|
||||||
let token=null,currentUser=null,ollamaModels=[],appSettings={};
|
let token=null,currentUser=null,ollamaModels=[],appSettings={};
|
||||||
let sttFile=null,sttOutputFile=null,sttEngine='whisper';
|
let sttFile=null,sttOutputFile=null,sttEngine='whisper';
|
||||||
|
let orModels=[],orVisionModels=[],orTextModels=[];
|
||||||
let ocrFile=null,ocrOutputTxt=null,ocrOutputXlsx=null,ocrEngine='paddle',ocrMode='text';
|
let ocrFile=null,ocrOutputTxt=null,ocrOutputXlsx=null,ocrEngine='paddle',ocrMode='text';
|
||||||
let editTarget=null,sysTimer=null;
|
let editTarget=null,sysTimer=null;
|
||||||
let histPage=1,histType='',histTotal=0;
|
let histPage=1,histType='',histTotal=0;
|
||||||
@@ -721,6 +781,8 @@ function populateModelSelects(){
|
|||||||
fill(document.getElementById('ocr-ollama-model'),appSettings.ocr_ollama_model,'설정 기본 모델 사용');
|
fill(document.getElementById('ocr-ollama-model'),appSettings.ocr_ollama_model,'설정 기본 모델 사용');
|
||||||
fill(document.getElementById('setting-stt-model'),appSettings.stt_ollama_model,'(없음)');
|
fill(document.getElementById('setting-stt-model'),appSettings.stt_ollama_model,'(없음)');
|
||||||
fill(document.getElementById('setting-ocr-model'),appSettings.ocr_ollama_model,'(없음)');
|
fill(document.getElementById('setting-ocr-model'),appSettings.ocr_ollama_model,'(없음)');
|
||||||
|
// OpenRouter 드롭다운
|
||||||
|
populateOrSelects();
|
||||||
}
|
}
|
||||||
|
|
||||||
// ══ 설정 ══
|
// ══ 설정 ══
|
||||||
@@ -729,7 +791,11 @@ async function loadSettings(){
|
|||||||
const th=appSettings.cpu_threads||0;cpuSlider.value=th;cpuDisplay.textContent=th===0?'0 (자동)':th+' 스레드';
|
const th=appSettings.cpu_threads||0;cpuSlider.value=th;cpuDisplay.textContent=th===0?'0 (자동)':th+' 스레드';
|
||||||
document.getElementById('stt-timeout').value=appSettings.stt_timeout||0;
|
document.getElementById('stt-timeout').value=appSettings.stt_timeout||0;
|
||||||
document.getElementById('ollama-timeout').value=appSettings.ollama_timeout||600;
|
document.getElementById('ollama-timeout').value=appSettings.ollama_timeout||600;
|
||||||
populateModelSelects()}catch{}
|
if(appSettings.openrouter_url)document.getElementById('or-url').value=appSettings.openrouter_url;
|
||||||
|
if(appSettings.openrouter_api_key_masked)document.getElementById('or-api-key').placeholder='저장된 키: '+appSettings.openrouter_api_key_masked;
|
||||||
|
populateModelSelects();
|
||||||
|
// 기존 OR 모델 로드
|
||||||
|
if(appSettings.openrouter_api_key_masked)loadOrModels();}catch{}
|
||||||
}
|
}
|
||||||
document.getElementById('btn-save-settings').addEventListener('click',async()=>{
|
document.getElementById('btn-save-settings').addEventListener('click',async()=>{
|
||||||
const fd=new FormData();
|
const fd=new FormData();
|
||||||
@@ -738,6 +804,10 @@ document.getElementById('btn-save-settings').addEventListener('click',async()=>{
|
|||||||
fd.append('cpu_threads',cpuSlider.value);
|
fd.append('cpu_threads',cpuSlider.value);
|
||||||
fd.append('stt_timeout',document.getElementById('stt-timeout').value||'0');
|
fd.append('stt_timeout',document.getElementById('stt-timeout').value||'0');
|
||||||
fd.append('ollama_timeout',document.getElementById('ollama-timeout').value||'600');
|
fd.append('ollama_timeout',document.getElementById('ollama-timeout').value||'600');
|
||||||
|
fd.append('openrouter_url',document.getElementById('or-url').value||'https://openrouter.ai/api/v1');
|
||||||
|
const orKey=document.getElementById('or-api-key').value.trim();if(orKey)fd.append('openrouter_api_key',orKey);
|
||||||
|
fd.append('openrouter_stt_model',document.getElementById('setting-or-stt-model').value);
|
||||||
|
fd.append('openrouter_ocr_model',document.getElementById('setting-or-ocr-model').value);
|
||||||
try{const r=await api('POST','/api/settings',fd);if(r.ok){appSettings=(await r.json()).settings;const msg=document.getElementById('settings-msg');msg.style.display='block';setTimeout(()=>msg.style.display='none',3500)}}catch{}
|
try{const r=await api('POST','/api/settings',fd);if(r.ok){appSettings=(await r.json()).settings;const msg=document.getElementById('settings-msg');msg.style.display='block';setTimeout(()=>msg.style.display='none',3500)}}catch{}
|
||||||
});
|
});
|
||||||
document.getElementById('btn-refresh-models').addEventListener('click',loadOllamaModels);
|
document.getElementById('btn-refresh-models').addEventListener('click',loadOllamaModels);
|
||||||
@@ -762,10 +832,11 @@ sttDrop.addEventListener('dragover',e=>{e.preventDefault();sttDrop.classList.add
|
|||||||
sttDrop.addEventListener('dragleave',()=>sttDrop.classList.remove('dragover'));
|
sttDrop.addEventListener('dragleave',()=>sttDrop.classList.remove('dragover'));
|
||||||
sttDrop.addEventListener('drop',e=>{e.preventDefault();sttDrop.classList.remove('dragover');setSttFile(e.dataTransfer.files[0])});
|
sttDrop.addEventListener('drop',e=>{e.preventDefault();sttDrop.classList.remove('dragover');setSttFile(e.dataTransfer.files[0])});
|
||||||
function setSttFile(f){if(!f)return;sttFile=f;showFileInfo('stt',f);document.getElementById('stt-btn').disabled=false;document.getElementById('stt-err').style.display='none'}
|
function setSttFile(f){if(!f)return;sttFile=f;showFileInfo('stt',f);document.getElementById('stt-btn').disabled=false;document.getElementById('stt-err').style.display='none'}
|
||||||
document.querySelectorAll('#page-stt .engine-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('#page-stt .engine-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');sttEngine=btn.dataset.engine;document.getElementById('stt-ollama-opts').classList.toggle('visible',sttEngine==='whisper+ollama');document.getElementById('stt-btn').className='btn-start '+(sttEngine==='whisper+ollama'?'purple':'green')})});
|
document.querySelectorAll('#page-stt .engine-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('#page-stt .engine-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');sttEngine=btn.dataset.engine;document.getElementById('stt-ollama-opts').classList.toggle('visible',sttEngine==='whisper+ollama');document.getElementById('stt-or-opts').classList.toggle('visible',sttEngine==='whisper+openrouter');const isOr=sttEngine==='whisper+openrouter',isOllama=sttEngine==='whisper+ollama';document.getElementById('stt-btn').className='btn-start '+(isOr||isOllama?'purple':'green')})});
|
||||||
document.getElementById('stt-btn').addEventListener('click',async()=>{
|
document.getElementById('stt-btn').addEventListener('click',async()=>{
|
||||||
if(!sttFile)return;document.getElementById('stt-err').style.display='none';setSttLoading(true);
|
if(!sttFile)return;document.getElementById('stt-err').style.display='none';setSttLoading(true);
|
||||||
const fd=new FormData();fd.append('file',sttFile);fd.append('use_ollama',sttEngine==='whisper+ollama'?'true':'false');fd.append('ollama_model',document.getElementById('stt-ollama-model').value||'');
|
const fd=new FormData();fd.append('file',sttFile);fd.append('use_ollama',sttEngine==='whisper+ollama'?'true':'false');fd.append('ollama_model',document.getElementById('stt-ollama-model').value||'');
|
||||||
|
fd.append('use_openrouter',sttEngine==='whisper+openrouter'?'true':'false');fd.append('openrouter_model',document.getElementById('stt-or-model').value||'');
|
||||||
try{const r=await api('POST','/api/transcribe',fd);const d=await r.json();if(!r.ok)throw new Error(d.detail||'업로드 실패');pollTask(d.task_id,dt=>setProg('stt',dt.progress||0,dt.message||'처리 중...'),showSttResult,e=>{showErr('stt-err',e);setSttLoading(false)})}
|
try{const r=await api('POST','/api/transcribe',fd);const d=await r.json();if(!r.ok)throw new Error(d.detail||'업로드 실패');pollTask(d.task_id,dt=>setProg('stt',dt.progress||0,dt.message||'처리 중...'),showSttResult,e=>{showErr('stt-err',e);setSttLoading(false)})}
|
||||||
catch(e){showErr('stt-err',e.message);setSttLoading(false)}
|
catch(e){showErr('stt-err',e.message);setSttLoading(false)}
|
||||||
});
|
});
|
||||||
@@ -793,12 +864,13 @@ ocrDrop.addEventListener('dragover',e=>{e.preventDefault();ocrDrop.classList.add
|
|||||||
ocrDrop.addEventListener('dragleave',()=>ocrDrop.classList.remove('dragover'));
|
ocrDrop.addEventListener('dragleave',()=>ocrDrop.classList.remove('dragover'));
|
||||||
ocrDrop.addEventListener('drop',e=>{e.preventDefault();ocrDrop.classList.remove('dragover');setOcrFile(e.dataTransfer.files[0])});
|
ocrDrop.addEventListener('drop',e=>{e.preventDefault();ocrDrop.classList.remove('dragover');setOcrFile(e.dataTransfer.files[0])});
|
||||||
function setOcrFile(f){if(!f)return;ocrFile=f;showFileInfo('ocr',f);document.getElementById('ocr-btn').disabled=false;document.getElementById('ocr-err').style.display='none';const p=document.getElementById('ocr-preview'),w=document.getElementById('ocr-preview-wrap');p.src=URL.createObjectURL(f);w.style.display='block'}
|
function setOcrFile(f){if(!f)return;ocrFile=f;showFileInfo('ocr',f);document.getElementById('ocr-btn').disabled=false;document.getElementById('ocr-err').style.display='none';const p=document.getElementById('ocr-preview'),w=document.getElementById('ocr-preview-wrap');p.src=URL.createObjectURL(f);w.style.display='block'}
|
||||||
document.querySelectorAll('#page-ocr .engine-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('#page-ocr .engine-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');ocrEngine=btn.dataset.engine;document.getElementById('ocr-ollama-opts').classList.toggle('visible',ocrEngine==='ollama');document.getElementById('ocr-btn').className='btn-start '+(ocrEngine==='ollama'?'purple':'green')})});
|
document.querySelectorAll('#page-ocr .engine-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('#page-ocr .engine-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');ocrEngine=btn.dataset.engine;document.getElementById('ocr-ollama-opts').classList.toggle('visible',ocrEngine==='ollama');document.getElementById('ocr-or-opts').classList.toggle('visible',ocrEngine==='openrouter');const isOr=ocrEngine==='openrouter',isOllama=ocrEngine==='ollama';document.getElementById('ocr-btn').className='btn-start '+(isOr||isOllama?'purple':'green')})});
|
||||||
document.getElementById('cprompt-toggle').addEventListener('click',()=>{const ta=document.getElementById('custom-prompt');const open=ta.style.display!=='block';ta.style.display=open?'block':'none';document.getElementById('cprompt-toggle').textContent=(open?'▼':'▶')+' 커스텀 프롬프트'});
|
document.getElementById('cprompt-toggle').addEventListener('click',()=>{const ta=document.getElementById('custom-prompt');const open=ta.style.display!=='block';ta.style.display=open?'block':'none';document.getElementById('cprompt-toggle').textContent=(open?'▼':'▶')+' 커스텀 프롬프트'});
|
||||||
|
document.getElementById('cprompt-toggle-or').addEventListener('click',()=>{const ta=document.getElementById('custom-prompt-or');const open=ta.style.display!=='block';ta.style.display=open?'block':'none';document.getElementById('cprompt-toggle-or').textContent=(open?'▼':'▶')+' 커스텀 프롬프트'});
|
||||||
document.querySelectorAll('.mode-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('.mode-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');ocrMode=btn.dataset.mode;document.getElementById('mode-desc').textContent=ocrMode==='structure'?'표 구조를 감지하고 Excel로 저장합니다':'일반 텍스트와 글자를 인식합니다'})});
|
document.querySelectorAll('.mode-btn').forEach(btn=>{btn.addEventListener('click',()=>{document.querySelectorAll('.mode-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');ocrMode=btn.dataset.mode;document.getElementById('mode-desc').textContent=ocrMode==='structure'?'표 구조를 감지하고 Excel로 저장합니다':'일반 텍스트와 글자를 인식합니다'})});
|
||||||
document.getElementById('ocr-btn').addEventListener('click',async()=>{
|
document.getElementById('ocr-btn').addEventListener('click',async()=>{
|
||||||
if(!ocrFile)return;document.getElementById('ocr-err').style.display='none';setOcrLoading(true);
|
if(!ocrFile)return;document.getElementById('ocr-err').style.display='none';setOcrLoading(true);
|
||||||
const fd=new FormData();fd.append('file',ocrFile);fd.append('mode',ocrMode);fd.append('backend',ocrEngine);fd.append('ollama_model',document.getElementById('ocr-ollama-model').value||'');fd.append('custom_prompt',document.getElementById('custom-prompt').value||'');
|
const fd=new FormData();fd.append('file',ocrFile);fd.append('mode',ocrMode);fd.append('ollama_model',document.getElementById('ocr-ollama-model').value||'');fd.append('custom_prompt',document.getElementById('custom-prompt').value||'');
|
||||||
try{const r=await api('POST','/api/ocr',fd);const d=await r.json();if(!r.ok)throw new Error(d.detail||'업로드 실패');pollTask(d.task_id,dt=>setProg('ocr',dt.progress||0,dt.message||'처리 중...'),showOcrResult,e=>{showErr('ocr-err',e);setOcrLoading(false)})}
|
try{const r=await api('POST','/api/ocr',fd);const d=await r.json();if(!r.ok)throw new Error(d.detail||'업로드 실패');pollTask(d.task_id,dt=>setProg('ocr',dt.progress||0,dt.message||'처리 중...'),showOcrResult,e=>{showErr('ocr-err',e);setOcrLoading(false)})}
|
||||||
catch(e){showErr('ocr-err',e.message);setOcrLoading(false)}
|
catch(e){showErr('ocr-err',e.message);setOcrLoading(false)}
|
||||||
});
|
});
|
||||||
@@ -1067,6 +1139,72 @@ function fmtTime(s){const m=Math.floor(s/60),ss=Math.floor(s%60);return String(m
|
|||||||
function esc(s){return String(s||'').replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>')}
|
function esc(s){return String(s||'').replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>')}
|
||||||
async function copyText(text,btn){try{await navigator.clipboard.writeText(text);const o=btn.textContent;btn.textContent='복사됨 ✓';setTimeout(()=>btn.textContent=o,1500)}catch{}}
|
async function copyText(text,btn){try{await navigator.clipboard.writeText(text);const o=btn.textContent;btn.textContent='복사됨 ✓';setTimeout(()=>btn.textContent=o,1500)}catch{}}
|
||||||
|
|
||||||
|
// ══ OPENROUTER ══
|
||||||
|
async function loadOrModels(){
|
||||||
|
try{
|
||||||
|
const r=await api('GET','/api/openrouter/models');const d=await r.json();
|
||||||
|
const wrap=document.getElementById('or-models-wrap');
|
||||||
|
if(d.connected){
|
||||||
|
orModels=d.models||[];orVisionModels=d.vision_models||[];orTextModels=d.text_models||[];
|
||||||
|
wrap.style.display='block';
|
||||||
|
document.getElementById('or-connected-badge').textContent=`✓ 연결됨 — Vision ${orVisionModels.length}개 / 전체 ${orModels.length}개`;
|
||||||
|
populateOrSelects('vision');
|
||||||
|
} else {
|
||||||
|
wrap.style.display='none';
|
||||||
|
}
|
||||||
|
}catch{}
|
||||||
|
}
|
||||||
|
|
||||||
|
let orFilter='vision';
|
||||||
|
document.querySelectorAll('.or-model-tab').forEach(btn=>{
|
||||||
|
btn.addEventListener('click',()=>{
|
||||||
|
document.querySelectorAll('.or-model-tab').forEach(b=>b.classList.remove('active'));
|
||||||
|
btn.classList.add('active');orFilter=btn.dataset.filter;populateOrSelects(orFilter);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
function populateOrSelects(filter){
|
||||||
|
filter=filter||orFilter;
|
||||||
|
const list = filter==='vision'?orVisionModels:filter==='text'?orTextModels:orModels;
|
||||||
|
const fillOr=(sel,def)=>{
|
||||||
|
const cur=sel.value||def||'';
|
||||||
|
sel.innerHTML='<option value="">(없음)</option>';
|
||||||
|
list.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;sel.appendChild(o)});
|
||||||
|
};
|
||||||
|
const sttSel=document.getElementById('setting-or-stt-model');
|
||||||
|
const ocrSel=document.getElementById('setting-or-ocr-model');
|
||||||
|
const sttPage=document.getElementById('stt-or-model');
|
||||||
|
const ocrPage=document.getElementById('ocr-or-model');
|
||||||
|
if(sttSel)fillOr(sttSel,appSettings.openrouter_stt_model);
|
||||||
|
if(ocrSel){
|
||||||
|
// OCR은 Vision만
|
||||||
|
const vlist=filter==='text'?[]:orVisionModels;
|
||||||
|
const cur=ocrSel.value||appSettings.openrouter_ocr_model||'';
|
||||||
|
ocrSel.innerHTML='<option value="">(없음)</option>';
|
||||||
|
vlist.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;ocrSel.appendChild(o)});
|
||||||
|
}
|
||||||
|
if(sttPage)fillOr(sttPage,appSettings.openrouter_stt_model);
|
||||||
|
if(ocrPage){
|
||||||
|
const cur=ocrPage.value||appSettings.openrouter_ocr_model||'';
|
||||||
|
ocrPage.innerHTML='<option value="">설정 기본 모델 사용</option>';
|
||||||
|
orVisionModels.forEach(m=>{const o=document.createElement('option');o.value=m;o.textContent=m;if(m===cur)o.selected=true;ocrPage.appendChild(o)});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById('btn-or-test').addEventListener('click',async()=>{
|
||||||
|
const key=document.getElementById('or-api-key').value.trim();
|
||||||
|
const url=document.getElementById('or-url').value.trim()||'https://openrouter.ai/api/v1';
|
||||||
|
const result=document.getElementById('or-test-result');
|
||||||
|
if(!key){result.style.display='block';result.style.color='var(--warn)';result.textContent='API 키를 입력하세요';return}
|
||||||
|
result.style.display='block';result.style.color='var(--muted)';result.textContent='연결 중...';
|
||||||
|
try{
|
||||||
|
const fd=new FormData();fd.append('api_key',key);fd.append('base_url',url);
|
||||||
|
const r=await api('POST','/api/openrouter/test',fd);const d=await r.json();
|
||||||
|
result.style.color=d.ok?'var(--accent)':'var(--warn)';result.textContent=d.message;
|
||||||
|
if(d.ok)loadOrModels();
|
||||||
|
}catch{result.style.color='var(--warn)';result.textContent='요청 실패'}
|
||||||
|
});
|
||||||
|
|
||||||
checkAuth();
|
checkAuth();
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
|
|||||||
141
app/tasks.py
141
app/tasks.py
@@ -15,15 +15,12 @@ OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434")
|
|||||||
OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "600"))
|
OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "600"))
|
||||||
|
|
||||||
_cpu_threads_env = int(os.getenv("CPU_THREADS", "0"))
|
_cpu_threads_env = int(os.getenv("CPU_THREADS", "0"))
|
||||||
CPU_THREADS = _cpu_threads_env if _cpu_threads_env > 0 else None # None = auto
|
CPU_THREADS = _cpu_threads_env if _cpu_threads_env > 0 else None
|
||||||
|
|
||||||
celery_app = Celery("whisper_tasks", broker=REDIS_URL, backend=REDIS_URL)
|
celery_app = Celery("whisper_tasks", broker=REDIS_URL, backend=REDIS_URL)
|
||||||
celery_app.conf.update(
|
celery_app.conf.update(
|
||||||
task_serializer="json",
|
task_serializer="json", result_serializer="json",
|
||||||
result_serializer="json",
|
accept_content=["json"], task_track_started=True, result_expires=3600,
|
||||||
accept_content=["json"],
|
|
||||||
task_track_started=True,
|
|
||||||
result_expires=3600,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
_model = None
|
_model = None
|
||||||
@@ -33,92 +30,128 @@ def get_model():
|
|||||||
if _model is None:
|
if _model is None:
|
||||||
from faster_whisper import WhisperModel
|
from faster_whisper import WhisperModel
|
||||||
kwargs = dict(device=DEVICE, compute_type=COMPUTE_TYPE)
|
kwargs = dict(device=DEVICE, compute_type=COMPUTE_TYPE)
|
||||||
if CPU_THREADS is not None:
|
if CPU_THREADS is not None: kwargs["cpu_threads"] = CPU_THREADS
|
||||||
kwargs["cpu_threads"] = CPU_THREADS
|
|
||||||
print(f"[Whisper] 로딩: {MODEL_SIZE} / {DEVICE} / {COMPUTE_TYPE} / threads={CPU_THREADS or 'auto'}")
|
print(f"[Whisper] 로딩: {MODEL_SIZE} / {DEVICE} / {COMPUTE_TYPE} / threads={CPU_THREADS or 'auto'}")
|
||||||
_model = WhisperModel(MODEL_SIZE, **kwargs)
|
_model = WhisperModel(MODEL_SIZE, **kwargs)
|
||||||
print("[Whisper] 로드 완료")
|
print("[Whisper] 로드 완료")
|
||||||
return _model
|
return _model
|
||||||
|
|
||||||
|
|
||||||
|
# ── 후처리: Ollama ────────────────────────────────────────────
|
||||||
def _ollama_postprocess(text: str, model: str) -> str:
|
def _ollama_postprocess(text: str, model: str) -> str:
|
||||||
if not model or not text.strip():
|
if not model or not text.strip(): return text
|
||||||
return text
|
|
||||||
prompt = (
|
prompt = (
|
||||||
"다음은 음성 인식으로 추출된 텍스트입니다. "
|
"다음은 음성 인식으로 추출된 텍스트입니다. "
|
||||||
"내용은 절대 변경하지 말고, 문장 부호를 추가하고 자연스럽게 다듬어줘. "
|
"내용은 절대 변경하지 말고, 문장 부호를 추가하고 자연스럽게 다듬어줘. "
|
||||||
"결과 텍스트만 출력하고 설명은 하지 마.\n\n"
|
"결과 텍스트만 출력하고 설명은 하지 마.\n\n" + text
|
||||||
f"{text}"
|
)
|
||||||
|
try:
|
||||||
|
resp = httpx.post(f"{OLLAMA_URL}/api/chat", json={
|
||||||
|
"model": model,
|
||||||
|
"messages": [{"role":"user","content":prompt}],
|
||||||
|
"stream": False, "options": {"temperature": 0.1},
|
||||||
|
}, timeout=float(OLLAMA_TIMEOUT))
|
||||||
|
resp.raise_for_status()
|
||||||
|
result = resp.json().get("message",{}).get("content","").strip()
|
||||||
|
return result if result else text
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[Ollama 후처리 실패] {e}"); return text
|
||||||
|
|
||||||
|
|
||||||
|
# ── 후처리: OpenRouter (OpenAI 호환) ─────────────────────────
|
||||||
|
def _openrouter_postprocess(text: str, model: str, base_url: str, api_key: str) -> str:
|
||||||
|
if not model or not api_key or not text.strip(): return text
|
||||||
|
prompt = (
|
||||||
|
"다음은 음성 인식으로 추출된 텍스트입니다. "
|
||||||
|
"내용은 절대 변경하지 말고, 문장 부호를 추가하고 자연스럽게 다듬어줘. "
|
||||||
|
"결과 텍스트만 출력하고 설명은 하지 마.\n\n" + text
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
resp = httpx.post(
|
resp = httpx.post(
|
||||||
f"{OLLAMA_URL}/api/chat",
|
f"{base_url.rstrip('/')}/chat/completions",
|
||||||
json={"model": model,
|
headers={
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
"Authorization": f"Bearer {api_key}",
|
||||||
"stream": False, "options": {"temperature": 0.1}},
|
"HTTP-Referer": "https://voicescript.local",
|
||||||
|
"X-Title": "VoiceScript",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
json={
|
||||||
|
"model": model,
|
||||||
|
"messages": [{"role":"user","content":prompt}],
|
||||||
|
"temperature": 0.1,
|
||||||
|
},
|
||||||
timeout=float(OLLAMA_TIMEOUT),
|
timeout=float(OLLAMA_TIMEOUT),
|
||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
result = resp.json().get("message", {}).get("content", "").strip()
|
result = resp.json()["choices"][0]["message"]["content"].strip()
|
||||||
return result if result else text
|
return result if result else text
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[Ollama 후처리 실패] {e}")
|
print(f"[OpenRouter 후처리 실패] {e}"); return text
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
|
# STT Task
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
@celery_app.task(bind=True, name="tasks.transcribe_task", queue="stt")
|
@celery_app.task(bind=True, name="tasks.transcribe_task", queue="stt")
|
||||||
def transcribe_task(self, file_id: str, audio_path: str,
|
def transcribe_task(
|
||||||
use_ollama: bool = False, ollama_model: str = ""):
|
self,
|
||||||
self.update_state(state="PROGRESS", meta={"progress": 5, "message": "모델 준비 중..."})
|
file_id: str,
|
||||||
|
audio_path: str,
|
||||||
|
use_ollama: bool = False,
|
||||||
|
ollama_model: str = "",
|
||||||
|
use_openrouter: bool = False,
|
||||||
|
openrouter_model: str = "",
|
||||||
|
openrouter_url: str = "",
|
||||||
|
openrouter_key: str = "",
|
||||||
|
):
|
||||||
|
self.update_state(state="PROGRESS", meta={"progress":5,"message":"모델 준비 중..."})
|
||||||
try:
|
try:
|
||||||
model = get_model()
|
model = get_model()
|
||||||
self.update_state(state="PROGRESS", meta={"progress": 15, "message": "오디오 분석 중..."})
|
self.update_state(state="PROGRESS", meta={"progress":15,"message":"오디오 분석 중..."})
|
||||||
|
|
||||||
segments_gen, info = model.transcribe(
|
segments_gen, info = model.transcribe(
|
||||||
audio_path,
|
audio_path, language=LANGUAGE, beam_size=BEAM_SIZE,
|
||||||
language=LANGUAGE,
|
initial_prompt=INITIAL_PROMPT, vad_filter=True,
|
||||||
beam_size=BEAM_SIZE,
|
vad_parameters=dict(min_silence_duration_ms=500), word_timestamps=False,
|
||||||
initial_prompt=INITIAL_PROMPT,
|
|
||||||
vad_filter=True,
|
|
||||||
vad_parameters=dict(min_silence_duration_ms=500),
|
|
||||||
word_timestamps=False,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
self.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 변환 중..."})
|
self.update_state(state="PROGRESS", meta={"progress":30,"message":"텍스트 변환 중..."})
|
||||||
|
|
||||||
segments, parts = [], []
|
segments, parts = [], []
|
||||||
duration = info.duration
|
duration = info.duration
|
||||||
|
|
||||||
for seg in segments_gen:
|
for seg in segments_gen:
|
||||||
segments.append({"start": round(seg.start, 2),
|
segments.append({"start":round(seg.start,2),"end":round(seg.end,2),"text":seg.text.strip()})
|
||||||
"end": round(seg.end, 2),
|
|
||||||
"text": seg.text.strip()})
|
|
||||||
parts.append(seg.text.strip())
|
parts.append(seg.text.strip())
|
||||||
if duration > 0:
|
if duration > 0:
|
||||||
pct = 30 + int((seg.end / duration) * 50)
|
pct = 30 + int((seg.end/duration)*50)
|
||||||
self.update_state(
|
self.update_state(state="PROGRESS",
|
||||||
state="PROGRESS",
|
meta={"progress":min(pct,80),"message":f"변환 중... {seg.end:.0f}s / {duration:.0f}s"})
|
||||||
meta={"progress": min(pct, 80),
|
|
||||||
"message": f"변환 중... {seg.end:.0f}s / {duration:.0f}s"},
|
|
||||||
)
|
|
||||||
|
|
||||||
raw_text = "\n".join(parts)
|
raw_text = "\n".join(parts)
|
||||||
full_text = raw_text
|
full_text = raw_text
|
||||||
|
|
||||||
|
# Ollama 후처리
|
||||||
if use_ollama and ollama_model:
|
if use_ollama and ollama_model:
|
||||||
self.update_state(state="PROGRESS",
|
self.update_state(state="PROGRESS",
|
||||||
meta={"progress": 85,
|
meta={"progress":85,"message":f"Ollama({ollama_model}) 후처리 중..."})
|
||||||
"message": f"Ollama({ollama_model}) 후처리 중..."})
|
|
||||||
full_text = _ollama_postprocess(raw_text, ollama_model)
|
full_text = _ollama_postprocess(raw_text, ollama_model)
|
||||||
|
|
||||||
self.update_state(state="PROGRESS", meta={"progress": 95, "message": "파일 저장 중..."})
|
# OpenRouter 후처리
|
||||||
|
elif use_openrouter and openrouter_model and openrouter_key:
|
||||||
|
self.update_state(state="PROGRESS",
|
||||||
|
meta={"progress":85,"message":f"OpenRouter({openrouter_model}) 후처리 중..."})
|
||||||
|
full_text = _openrouter_postprocess(raw_text, openrouter_model, openrouter_url, openrouter_key)
|
||||||
|
|
||||||
|
self.update_state(state="PROGRESS", meta={"progress":95,"message":"파일 저장 중..."})
|
||||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||||
output_filename = f"{file_id}.txt"
|
output_filename = f"{file_id}.txt"
|
||||||
|
|
||||||
with open(os.path.join(OUTPUT_DIR, output_filename), "w", encoding="utf-8") as f:
|
with open(os.path.join(OUTPUT_DIR, output_filename), "w", encoding="utf-8") as f:
|
||||||
f.write(f"# 변환 결과\n# 언어: {info.language} | 재생 시간: {duration:.1f}초")
|
f.write(f"# 변환 결과\n# 언어: {info.language} | 재생 시간: {duration:.1f}초")
|
||||||
if use_ollama and ollama_model:
|
if use_ollama and ollama_model:
|
||||||
f.write(f" | Ollama 후처리: {ollama_model}")
|
f.write(f" | Ollama: {ollama_model}")
|
||||||
|
elif use_openrouter and openrouter_model:
|
||||||
|
f.write(f" | OpenRouter: {openrouter_model}")
|
||||||
f.write("\n\n## 전체 텍스트\n\n" + full_text + "\n\n")
|
f.write("\n\n## 전체 텍스트\n\n" + full_text + "\n\n")
|
||||||
f.write("## 타임스탬프별 세그먼트\n\n")
|
f.write("## 타임스탬프별 세그먼트\n\n")
|
||||||
for seg in segments:
|
for seg in segments:
|
||||||
@@ -128,14 +161,16 @@ def transcribe_task(self, file_id: str, audio_path: str,
|
|||||||
except: pass
|
except: pass
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"text": full_text,
|
"text": full_text,
|
||||||
"raw_text": raw_text,
|
"raw_text": raw_text,
|
||||||
"segments": segments,
|
"segments": segments,
|
||||||
"language": info.language,
|
"language": info.language,
|
||||||
"duration": round(duration, 1),
|
"duration": round(duration, 1),
|
||||||
"output_file": output_filename,
|
"output_file": output_filename,
|
||||||
"ollama_used": use_ollama and bool(ollama_model),
|
"ollama_used": use_ollama and bool(ollama_model),
|
||||||
"ollama_model": ollama_model if (use_ollama and ollama_model) else "",
|
"ollama_model": ollama_model if (use_ollama and ollama_model) else "",
|
||||||
|
"openrouter_used": use_openrouter and bool(openrouter_model) and bool(openrouter_key),
|
||||||
|
"openrouter_model": openrouter_model if (use_openrouter and openrouter_model) else "",
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user