443 lines
21 KiB
Python
443 lines
21 KiB
Python
import os, uuid, time, glob, json, threading
|
|
import psutil, httpx, aiofiles
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Form, Request
|
|
from fastapi.staticfiles import StaticFiles
|
|
from fastapi.responses import FileResponse
|
|
|
|
from auth import (authenticate, create_access_token, init_users,
|
|
require_auth, require_admin, require_stt, require_ocr,
|
|
list_users, create_user, update_user, delete_user)
|
|
from tasks import celery_app, transcribe_task
|
|
from ocr_tasks import ocr_task
|
|
|
|
app = FastAPI(title="VoiceScript API")
|
|
|
|
UPLOAD_DIR = os.getenv("UPLOAD_DIR", "/data/uploads")
|
|
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs")
|
|
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434")
|
|
MAX_UPLOAD_BYTES = int(os.getenv("MAX_UPLOAD_MB", "500")) * 1024 * 1024
|
|
OUTPUT_KEEP_SECS = int(os.getenv("OUTPUT_KEEP_HOURS", "48")) * 3600
|
|
|
|
DATA_DIR = Path(UPLOAD_DIR).parent
|
|
SETTINGS_FILE = DATA_DIR / "settings.json"
|
|
HISTORY_FILE = DATA_DIR / "history.json"
|
|
HISTORY_MAX = 300
|
|
|
|
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
|
|
|
AUDIO_EXT = {"mp3","mp4","wav","m4a","ogg","flac","aac","wma","webm","mkv","avi","mov"}
|
|
IMAGE_EXT = {"jpg","jpeg","png","bmp","tiff","tif","webp","gif"}
|
|
|
|
_DEFAULT_SETTINGS = {
|
|
"stt_ollama_model": "",
|
|
"ocr_ollama_model": "granite3.2-vision:latest",
|
|
"cpu_threads": 0,
|
|
"stt_timeout": 0,
|
|
"ollama_timeout": 600,
|
|
}
|
|
_hist_lock = threading.Lock()
|
|
|
|
|
|
# ── 설정 I/O ─────────────────────────────────────────────────
|
|
def _load_settings() -> dict:
|
|
if not SETTINGS_FILE.exists(): return dict(_DEFAULT_SETTINGS)
|
|
with open(SETTINGS_FILE, "r", encoding="utf-8") as f: data = json.load(f)
|
|
for k, v in _DEFAULT_SETTINGS.items(): data.setdefault(k, v)
|
|
return data
|
|
|
|
def _save_settings(data: dict):
|
|
SETTINGS_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(SETTINGS_FILE, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2)
|
|
|
|
|
|
# ── 이력 I/O ─────────────────────────────────────────────────
|
|
def _load_history() -> list:
|
|
with _hist_lock:
|
|
if not HISTORY_FILE.exists(): return []
|
|
try:
|
|
with open(HISTORY_FILE, "r", encoding="utf-8") as f: return json.load(f)
|
|
except: return []
|
|
|
|
def _write_history(history: list):
|
|
HISTORY_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(HISTORY_FILE, "w", encoding="utf-8") as f:
|
|
json.dump(history, f, ensure_ascii=False, indent=2)
|
|
|
|
def append_history(record: dict):
|
|
with _hist_lock:
|
|
try:
|
|
history = []
|
|
if HISTORY_FILE.exists():
|
|
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
|
|
history.insert(0, record)
|
|
_write_history(history[:HISTORY_MAX])
|
|
except: pass
|
|
|
|
def _update_history_by_task(task_id: str, result: dict, success: bool, error_msg: str = ""):
|
|
"""task_id로 이력을 찾아 결과 업데이트 — 핵심 버그 수정"""
|
|
with _hist_lock:
|
|
if not HISTORY_FILE.exists(): return
|
|
try:
|
|
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
|
|
for h in history:
|
|
# task_id 필드로 매칭
|
|
if h.get("task_id") != task_id: continue
|
|
if h.get("status") != "processing": break
|
|
if not success:
|
|
h["status"] = "failed"
|
|
h["output"] = {"error": error_msg[:300]}
|
|
break
|
|
h["status"] = "success"
|
|
if h["type"] == "stt":
|
|
h["output"] = {
|
|
"filename": result.get("output_file", ""),
|
|
"language": result.get("language", ""),
|
|
"duration_s": result.get("duration", 0),
|
|
"segments": len(result.get("segments", [])),
|
|
"text_preview": result.get("text", "")[:200] + ("…" if len(result.get("text",""))>200 else ""),
|
|
"ollama_used": result.get("ollama_used", False),
|
|
"ollama_model": result.get("ollama_model", ""),
|
|
}
|
|
else:
|
|
full_text = result.get("full_text", "")
|
|
h["output"] = {
|
|
"txt_file": result.get("txt_file", ""),
|
|
"xlsx_file": result.get("xlsx_file", ""),
|
|
"line_count": result.get("line_count", 0),
|
|
"table_count": len(result.get("tables", [])),
|
|
"backend": result.get("backend", ""),
|
|
"ollama_model": result.get("ollama_model", ""),
|
|
"text_preview": full_text[:200] + ("…" if len(full_text)>200 else ""),
|
|
}
|
|
break
|
|
_write_history(history)
|
|
except: pass
|
|
|
|
def delete_history_item(history_id: str) -> bool:
|
|
with _hist_lock:
|
|
if not HISTORY_FILE.exists(): return False
|
|
try:
|
|
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
|
|
new = [h for h in history if h.get("id") != history_id]
|
|
if len(new) == len(history): return False
|
|
_write_history(new); return True
|
|
except: return False
|
|
|
|
def clear_history():
|
|
with _hist_lock:
|
|
if HISTORY_FILE.exists(): HISTORY_FILE.write_text("[]", encoding="utf-8")
|
|
|
|
|
|
# ════════════════════════════════════════════════════════════════
|
|
# 시작 이벤트
|
|
# ════════════════════════════════════════════════════════════════
|
|
@app.on_event("startup")
|
|
async def on_startup():
|
|
init_users()
|
|
_cleanup_outputs()
|
|
|
|
|
|
# ════════════════════════════════════════════════════════════════
|
|
# 인증
|
|
# ════════════════════════════════════════════════════════════════
|
|
@app.post("/api/login")
|
|
def login(username: str = Form(...), password: str = Form(...)):
|
|
user = authenticate(username, password)
|
|
if not user: raise HTTPException(401, "아이디 또는 비밀번호가 올바르지 않습니다")
|
|
return {"access_token": create_access_token(username), "token_type": "bearer"}
|
|
|
|
@app.get("/api/me")
|
|
def me(user: dict = Depends(require_auth)):
|
|
return {"username": user["username"], "role": user.get("role","user"),
|
|
"permissions": user.get("permissions", {"stt":False,"ocr":False})}
|
|
|
|
|
|
# ════════════════════════════════════════════════════════════════
|
|
# 시스템 정보
|
|
# ════════════════════════════════════════════════════════════════
|
|
@app.get("/api/system")
|
|
def system_info(user: dict = Depends(require_auth)):
|
|
mem = psutil.virtual_memory(); swap = psutil.swap_memory(); s = _load_settings()
|
|
return {
|
|
"ram_total_gb": round(mem.total / 1024**3, 1),
|
|
"ram_used_gb": round(mem.used / 1024**3, 1),
|
|
"ram_avail_gb": round(mem.available / 1024**3, 1),
|
|
"ram_percent": mem.percent,
|
|
"swap_total_gb": round(swap.total / 1024**3, 1),
|
|
"swap_used_gb": round(swap.used / 1024**3, 1),
|
|
"cpu_logical": psutil.cpu_count(logical=True),
|
|
"cpu_physical": psutil.cpu_count(logical=False),
|
|
"cpu_percent": psutil.cpu_percent(interval=0.3),
|
|
"cpu_threads_setting": s.get("cpu_threads", 0),
|
|
"stt_timeout": s.get("stt_timeout", 0),
|
|
"ollama_timeout":s.get("ollama_timeout", 600),
|
|
}
|
|
|
|
|
|
# ════════════════════════════════════════════════════════════════
|
|
# STT
|
|
# ════════════════════════════════════════════════════════════════
|
|
@app.post("/api/transcribe")
|
|
async def transcribe(
|
|
request: Request, file: UploadFile = File(...),
|
|
use_ollama: str = Form("false"), ollama_model: str = Form(""),
|
|
user: dict = Depends(require_stt),
|
|
):
|
|
_check_size(request)
|
|
ext = _ext(file.filename)
|
|
if ext not in AUDIO_EXT: raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(AUDIO_EXT))}")
|
|
file_id = str(uuid.uuid4())
|
|
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
|
|
await _save(file, save_path)
|
|
file_size = os.path.getsize(save_path)
|
|
_use_ollama = use_ollama.lower() == "true"
|
|
s = _load_settings()
|
|
if _use_ollama and not ollama_model.strip(): ollama_model = s.get("stt_ollama_model", "")
|
|
|
|
task = transcribe_task.delay(file_id, save_path, _use_ollama, ollama_model)
|
|
|
|
# ★ task_id를 이력에 함께 저장
|
|
append_history({
|
|
"id": file_id,
|
|
"task_id": task.id, # ← 업데이트 매칭 키
|
|
"type": "stt",
|
|
"status": "processing",
|
|
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
|
"username": user["username"],
|
|
"input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()},
|
|
"settings": {
|
|
"model": os.getenv("WHISPER_MODEL", "medium"),
|
|
"language": os.getenv("WHISPER_LANGUAGE", "ko"),
|
|
"compute_type": os.getenv("WHISPER_COMPUTE_TYPE", "int8"),
|
|
"cpu_threads": s.get("cpu_threads", 0),
|
|
"stt_timeout": s.get("stt_timeout", 0),
|
|
"use_ollama": _use_ollama,
|
|
"ollama_model": ollama_model if _use_ollama else "",
|
|
},
|
|
"output": None,
|
|
})
|
|
return {"task_id": task.id, "file_id": file_id, "filename": file.filename}
|
|
|
|
|
|
# ════════════════════════════════════════════════════════════════
|
|
# OCR
|
|
# ════════════════════════════════════════════════════════════════
|
|
@app.post("/api/ocr")
|
|
async def ocr(
|
|
request: Request, file: UploadFile = File(...),
|
|
mode: str = Form("text"), backend: str = Form("paddle"),
|
|
ollama_model: str = Form(""), custom_prompt: str = Form(""),
|
|
user: dict = Depends(require_ocr),
|
|
):
|
|
_check_size(request)
|
|
ext = _ext(file.filename)
|
|
if ext not in IMAGE_EXT: raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(IMAGE_EXT))}")
|
|
if mode not in ("text","structure"): mode = "text"
|
|
if backend not in ("paddle","ollama"): backend = "paddle"
|
|
s = _load_settings()
|
|
if backend == "ollama" and not ollama_model.strip(): ollama_model = s.get("ocr_ollama_model","granite3.2-vision:latest")
|
|
file_id = str(uuid.uuid4())
|
|
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
|
|
await _save(file, save_path)
|
|
file_size = os.path.getsize(save_path)
|
|
|
|
task = ocr_task.delay(file_id, save_path, mode, backend, ollama_model, custom_prompt)
|
|
|
|
# ★ task_id를 이력에 함께 저장
|
|
append_history({
|
|
"id": file_id,
|
|
"task_id": task.id, # ← 업데이트 매칭 키
|
|
"type": "ocr",
|
|
"status": "processing",
|
|
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
|
"username": user["username"],
|
|
"input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()},
|
|
"settings": {
|
|
"backend": backend,
|
|
"mode": mode,
|
|
"ocr_lang": os.getenv("OCR_LANG", "korean"),
|
|
"ollama_model": ollama_model if backend == "ollama" else "",
|
|
"ollama_timeout":s.get("ollama_timeout", 600),
|
|
"custom_prompt": custom_prompt[:200] if custom_prompt else "",
|
|
},
|
|
"output": None,
|
|
})
|
|
return {"task_id": task.id, "file_id": file_id, "filename": file.filename, "mode": mode, "backend": backend}
|
|
|
|
|
|
# ════════════════════════════════════════════════════════════════
|
|
# 상태 — task_id 기준으로 이력 업데이트
|
|
# ════════════════════════════════════════════════════════════════
|
|
@app.get("/api/status/{task_id}")
|
|
def get_status(task_id: str, user: dict = Depends(require_auth)):
|
|
r = celery_app.AsyncResult(task_id)
|
|
if r.state == "PENDING":
|
|
return {"state": "pending", "progress": 0, "message": "대기 중..."}
|
|
if r.state == "PROGRESS":
|
|
m = r.info or {}
|
|
return {"state": "progress", "progress": m.get("progress",0), "message": m.get("message","처리 중...")}
|
|
if r.state == "SUCCESS":
|
|
result = r.result or {}
|
|
# ★ task_id로 이력 업데이트 (file_id 아님)
|
|
_update_history_by_task(task_id, result, success=True)
|
|
return {"state": "success", "progress": 100, **result}
|
|
if r.state == "FAILURE":
|
|
_update_history_by_task(task_id, {}, success=False, error_msg=str(r.info))
|
|
return {"state": "failure", "progress": 0, "message": str(r.info)}
|
|
return {"state": r.state.lower(), "progress": 0}
|
|
|
|
|
|
# ════════════════════════════════════════════════════════════════
|
|
# 이력
|
|
# ════════════════════════════════════════════════════════════════
|
|
@app.get("/api/history")
|
|
def get_history(page: int=1, per_page: int=15, type_: str="", user: dict=Depends(require_auth)):
|
|
history = _load_history()
|
|
if user.get("role") != "admin": history = [h for h in history if h.get("username")==user["username"]]
|
|
if type_ in ("stt","ocr"): history = [h for h in history if h.get("type")==type_]
|
|
total = len(history); start = (page-1)*per_page
|
|
return {"total": total, "page": page, "per_page": per_page, "items": history[start:start+per_page]}
|
|
|
|
@app.delete("/api/history/{history_id}")
|
|
def delete_history(history_id: str, user: dict=Depends(require_auth)):
|
|
if not delete_history_item(history_id): raise HTTPException(404, "이력을 찾을 수 없습니다")
|
|
return {"ok": True}
|
|
|
|
@app.delete("/api/history")
|
|
def clear_all_history(user: dict=Depends(require_admin)):
|
|
clear_history(); return {"ok": True}
|
|
|
|
|
|
# ════════════════════════════════════════════════════════════════
|
|
# 다운로드 / Ollama / 설정 / 관리자
|
|
# ════════════════════════════════════════════════════════════════
|
|
@app.get("/api/download/{filename}")
|
|
def download(filename: str, user: dict=Depends(require_auth)):
|
|
if ".." in filename or "/" in filename: raise HTTPException(400, "잘못된 파일명")
|
|
path = os.path.join(OUTPUT_DIR, filename)
|
|
if not os.path.exists(path): raise HTTPException(404, "파일을 찾을 수 없습니다")
|
|
media = ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
if filename.endswith(".xlsx") else "text/plain")
|
|
return FileResponse(path, media_type=media, filename=filename)
|
|
|
|
@app.get("/api/ollama/models")
|
|
def ollama_models(user: dict=Depends(require_auth)):
|
|
try:
|
|
resp = httpx.get(f"{OLLAMA_URL}/api/tags", timeout=8.0); resp.raise_for_status()
|
|
return {"models": [m["name"] for m in resp.json().get("models",[])], "connected": True}
|
|
except Exception as e:
|
|
return {"models": [], "connected": False, "error": str(e)}
|
|
|
|
@app.get("/api/settings")
|
|
def get_settings(user: dict=Depends(require_auth)): return _load_settings()
|
|
|
|
@app.post("/api/settings")
|
|
def save_settings_endpoint(
|
|
stt_ollama_model: str = Form(""),
|
|
ocr_ollama_model: str = Form(""),
|
|
cpu_threads: str = Form("0"),
|
|
stt_timeout: str = Form("0"),
|
|
ollama_timeout: str = Form("600"),
|
|
user: dict = Depends(require_auth),
|
|
):
|
|
def _int(v, d):
|
|
try: return max(0, int(v))
|
|
except: return d
|
|
data = {
|
|
"stt_ollama_model": stt_ollama_model,
|
|
"ocr_ollama_model": ocr_ollama_model,
|
|
"cpu_threads": _int(cpu_threads, 0),
|
|
"stt_timeout": _int(stt_timeout, 0),
|
|
"ollama_timeout": _int(ollama_timeout, 600),
|
|
}
|
|
_save_settings(data)
|
|
return {"ok": True, "settings": data}
|
|
|
|
@app.get("/api/admin/users")
|
|
def admin_list_users(user: dict=Depends(require_admin)): return {"users": list_users()}
|
|
|
|
@app.post("/api/admin/users")
|
|
def admin_create_user(
|
|
username: str = Form(...),
|
|
password: str = Form(...),
|
|
perm_stt: str = Form("false"),
|
|
perm_ocr: str = Form("false"),
|
|
allowed_stt_models: str = Form(""), # 콤마 구분 모델명
|
|
allowed_ocr_models: str = Form(""),
|
|
user: dict = Depends(require_admin),
|
|
):
|
|
def _parse_models(s): return [m.strip() for m in s.split(",") if m.strip()]
|
|
perms = {
|
|
"stt": perm_stt.lower() == "true",
|
|
"ocr": perm_ocr.lower() == "true",
|
|
"allowed_stt_models": _parse_models(allowed_stt_models),
|
|
"allowed_ocr_models": _parse_models(allowed_ocr_models),
|
|
}
|
|
ok, msg = create_user(username, password, perms)
|
|
if not ok: raise HTTPException(400, msg)
|
|
return {"ok": True, "message": msg}
|
|
|
|
@app.put("/api/admin/users/{username}")
|
|
def admin_update_user(
|
|
username: str,
|
|
perm_stt: str = Form("false"),
|
|
perm_ocr: str = Form("false"),
|
|
password: str = Form(""),
|
|
allowed_stt_models: str = Form(""),
|
|
allowed_ocr_models: str = Form(""),
|
|
user: dict = Depends(require_admin),
|
|
):
|
|
def _parse_models(s): return [m.strip() for m in s.split(",") if m.strip()]
|
|
perms = {
|
|
"stt": perm_stt.lower() == "true",
|
|
"ocr": perm_ocr.lower() == "true",
|
|
"allowed_stt_models": _parse_models(allowed_stt_models),
|
|
"allowed_ocr_models": _parse_models(allowed_ocr_models),
|
|
}
|
|
ok, msg = update_user(username, perms, password or None)
|
|
if not ok: raise HTTPException(400, msg)
|
|
return {"ok": True, "message": msg}
|
|
|
|
@app.delete("/api/admin/users/{username}")
|
|
def admin_delete_user(username: str, user: dict=Depends(require_admin)):
|
|
ok, msg = delete_user(username)
|
|
if not ok: raise HTTPException(400, msg)
|
|
return {"ok": True, "message": msg}
|
|
|
|
@app.post("/api/cleanup")
|
|
def cleanup(user: dict=Depends(require_auth)): return {"removed": _cleanup_outputs()}
|
|
|
|
|
|
# ════════════════════════════════════════════════════════════════
|
|
# 유틸
|
|
# ════════════════════════════════════════════════════════════════
|
|
def _check_size(request: Request):
|
|
cl = request.headers.get("content-length")
|
|
if cl and int(cl) > MAX_UPLOAD_BYTES: raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
|
|
|
|
def _cleanup_outputs() -> int:
|
|
if OUTPUT_KEEP_SECS == 0: return 0
|
|
cutoff = time.time() - OUTPUT_KEEP_SECS; removed = 0
|
|
for f in glob.glob(os.path.join(OUTPUT_DIR, "*")):
|
|
try:
|
|
if os.path.getmtime(f) < cutoff: os.remove(f); removed += 1
|
|
except: pass
|
|
return removed
|
|
|
|
def _ext(fn): return fn.rsplit(".", 1)[-1].lower() if "." in fn else ""
|
|
|
|
async def _save(file, path):
|
|
written = 0
|
|
async with aiofiles.open(path, "wb") as f:
|
|
while chunk := await file.read(1024 * 1024):
|
|
written += len(chunk)
|
|
if written > MAX_UPLOAD_BYTES:
|
|
await f.close(); os.remove(path)
|
|
raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
|
|
await f.write(chunk)
|
|
|
|
app.mount("/", StaticFiles(directory="static", html=True), name="static")
|