Files
whisper-stt/app/main.py

385 lines
20 KiB
Python

import os, uuid, time, glob, json, threading
import psutil, httpx, aiofiles
from pathlib import Path
from datetime import datetime
from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Form, Request
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from auth import (authenticate, create_access_token, init_users,
require_auth, require_admin, require_stt, require_ocr,
list_users, create_user, update_user, delete_user)
from tasks import celery_app, transcribe_task
from ocr_tasks import ocr_task
app = FastAPI(title="VoiceScript API")
UPLOAD_DIR = os.getenv("UPLOAD_DIR", "/data/uploads")
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs")
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434")
MAX_UPLOAD_BYTES = int(os.getenv("MAX_UPLOAD_MB", "500")) * 1024 * 1024
OUTPUT_KEEP_SECS = int(os.getenv("OUTPUT_KEEP_HOURS", "48")) * 3600
DATA_DIR = Path(UPLOAD_DIR).parent
SETTINGS_FILE = DATA_DIR / "settings.json"
HISTORY_FILE = DATA_DIR / "history.json"
HISTORY_MAX = 300
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
AUDIO_EXT = {"mp3","mp4","wav","m4a","ogg","flac","aac","wma","webm","mkv","avi","mov"}
IMAGE_EXT = {"jpg","jpeg","png","bmp","tiff","tif","webp","gif"}
_DEFAULT_SETTINGS = {
"stt_ollama_model": "",
"ocr_ollama_model": "granite3.2-vision:latest",
"cpu_threads": 0,
"stt_timeout": 0, # 0 = 무제한
"ollama_timeout": 600, # 초
}
_hist_lock = threading.Lock()
# ── 설정 I/O ─────────────────────────────────────────────────
def _load_settings() -> dict:
if not SETTINGS_FILE.exists():
return dict(_DEFAULT_SETTINGS)
with open(SETTINGS_FILE, "r", encoding="utf-8") as f:
data = json.load(f)
for k, v in _DEFAULT_SETTINGS.items():
data.setdefault(k, v)
return data
def _save_settings(data: dict):
SETTINGS_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(SETTINGS_FILE, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
# ── 이력 I/O ─────────────────────────────────────────────────
def _load_history() -> list:
with _hist_lock:
if not HISTORY_FILE.exists(): return []
try:
with open(HISTORY_FILE, "r", encoding="utf-8") as f: return json.load(f)
except: return []
def append_history(record: dict):
with _hist_lock:
try:
history = []
if HISTORY_FILE.exists():
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
history.insert(0, record)
history = history[:HISTORY_MAX]
HISTORY_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(HISTORY_FILE, "w", encoding="utf-8") as f: json.dump(history, f, ensure_ascii=False, indent=2)
except: pass
def _update_history(file_id: str, result: dict):
with _hist_lock:
if not HISTORY_FILE.exists(): return
try:
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
for h in history:
if h.get("id") == file_id and h.get("status") == "processing":
h["status"] = "success"
if h["type"] == "stt":
h["output"] = {
"filename": result.get("output_file",""),
"language": result.get("language",""),
"duration_s": result.get("duration", 0),
"segments": len(result.get("segments",[])),
"text_preview": (result.get("text","")[:200]+"" if len(result.get("text",""))>200 else result.get("text","")),
"ollama_used": result.get("ollama_used", False),
"ollama_model": result.get("ollama_model",""),
}
else:
h["output"] = {
"txt_file": result.get("txt_file",""),
"xlsx_file": result.get("xlsx_file",""),
"line_count": result.get("line_count", 0),
"table_count": len(result.get("tables",[])),
"backend": result.get("backend",""),
"ollama_model": result.get("ollama_model",""),
"text_preview": (result.get("full_text","")[:200]+"" if len(result.get("full_text",""))>200 else result.get("full_text","")),
}
break
with open(HISTORY_FILE, "w", encoding="utf-8") as f: json.dump(history, f, ensure_ascii=False, indent=2)
except: pass
def _update_history_fail(file_id: str, error_msg: str):
with _hist_lock:
if not HISTORY_FILE.exists(): return
try:
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
for h in history:
if h.get("id") == file_id and h.get("status") == "processing":
h["status"] = "failed"; h["output"] = {"error": error_msg[:300]}; break
with open(HISTORY_FILE, "w", encoding="utf-8") as f: json.dump(history, f, ensure_ascii=False, indent=2)
except: pass
def delete_history_item(history_id: str) -> bool:
with _hist_lock:
if not HISTORY_FILE.exists(): return False
try:
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
new = [h for h in history if h.get("id") != history_id]
if len(new) == len(history): return False
with open(HISTORY_FILE, "w", encoding="utf-8") as f: json.dump(new, f, ensure_ascii=False, indent=2)
return True
except: return False
def clear_history():
with _hist_lock:
if HISTORY_FILE.exists(): HISTORY_FILE.write_text("[]", encoding="utf-8")
# ════════════════════════════════════════════════════════════════
# 시작 이벤트
# ════════════════════════════════════════════════════════════════
@app.on_event("startup")
async def on_startup():
init_users()
_cleanup_outputs()
# ════════════════════════════════════════════════════════════════
# 인증
# ════════════════════════════════════════════════════════════════
@app.post("/api/login")
def login(username: str = Form(...), password: str = Form(...)):
user = authenticate(username, password)
if not user: raise HTTPException(401, "아이디 또는 비밀번호가 올바르지 않습니다")
return {"access_token": create_access_token(username), "token_type": "bearer"}
@app.get("/api/me")
def me(user: dict = Depends(require_auth)):
return {"username": user["username"], "role": user.get("role","user"),
"permissions": user.get("permissions", {"stt":False,"ocr":False})}
# ════════════════════════════════════════════════════════════════
# 시스템 정보
# ════════════════════════════════════════════════════════════════
@app.get("/api/system")
def system_info(user: dict = Depends(require_auth)):
mem = psutil.virtual_memory()
swap = psutil.swap_memory()
s = _load_settings()
return {
"ram_total_gb": round(mem.total / 1024**3, 1),
"ram_used_gb": round(mem.used / 1024**3, 1),
"ram_avail_gb": round(mem.available / 1024**3, 1),
"ram_percent": mem.percent,
"swap_total_gb": round(swap.total / 1024**3, 1),
"swap_used_gb": round(swap.used / 1024**3, 1),
"cpu_logical": psutil.cpu_count(logical=True),
"cpu_physical": psutil.cpu_count(logical=False),
"cpu_percent": psutil.cpu_percent(interval=0.3),
"cpu_threads_setting": s.get("cpu_threads", 0),
"stt_timeout": s.get("stt_timeout", 0),
"ollama_timeout":s.get("ollama_timeout", 600),
}
# ════════════════════════════════════════════════════════════════
# STT
# ════════════════════════════════════════════════════════════════
@app.post("/api/transcribe")
async def transcribe(
request: Request, file: UploadFile = File(...),
use_ollama: str = Form("false"), ollama_model: str = Form(""),
user: dict = Depends(require_stt),
):
_check_size(request)
ext = _ext(file.filename)
if ext not in AUDIO_EXT: raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(AUDIO_EXT))}")
file_id = str(uuid.uuid4())
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
await _save(file, save_path)
file_size = os.path.getsize(save_path)
_use_ollama = use_ollama.lower() == "true"
s = _load_settings()
if _use_ollama and not ollama_model.strip(): ollama_model = s.get("stt_ollama_model","")
append_history({"id": file_id, "type": "stt", "status": "processing",
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "username": user["username"],
"input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()},
"settings": {"model": os.getenv("WHISPER_MODEL","medium"), "language": os.getenv("WHISPER_LANGUAGE","ko"),
"compute_type": os.getenv("WHISPER_COMPUTE_TYPE","int8"), "cpu_threads": s.get("cpu_threads",0),
"stt_timeout": s.get("stt_timeout",0), "use_ollama": _use_ollama,
"ollama_model": ollama_model if _use_ollama else ""},
"output": None})
task = transcribe_task.delay(file_id, save_path, _use_ollama, ollama_model)
return {"task_id": task.id, "file_id": file_id, "filename": file.filename}
# ════════════════════════════════════════════════════════════════
# OCR
# ════════════════════════════════════════════════════════════════
@app.post("/api/ocr")
async def ocr(
request: Request, file: UploadFile = File(...),
mode: str = Form("text"), backend: str = Form("paddle"),
ollama_model: str = Form(""), custom_prompt: str = Form(""),
user: dict = Depends(require_ocr),
):
_check_size(request)
ext = _ext(file.filename)
if ext not in IMAGE_EXT: raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(IMAGE_EXT))}")
if mode not in ("text","structure"): mode = "text"
if backend not in ("paddle","ollama"): backend = "paddle"
s = _load_settings()
if backend == "ollama" and not ollama_model.strip(): ollama_model = s.get("ocr_ollama_model","granite3.2-vision:latest")
file_id = str(uuid.uuid4())
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
await _save(file, save_path)
file_size = os.path.getsize(save_path)
append_history({"id": file_id, "type": "ocr", "status": "processing",
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "username": user["username"],
"input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()},
"settings": {"backend": backend, "mode": mode, "ocr_lang": os.getenv("OCR_LANG","korean"),
"ollama_model": ollama_model if backend=="ollama" else "",
"ollama_timeout": s.get("ollama_timeout",600),
"custom_prompt": custom_prompt[:200] if custom_prompt else ""},
"output": None})
task = ocr_task.delay(file_id, save_path, mode, backend, ollama_model, custom_prompt)
return {"task_id": task.id, "file_id": file_id, "filename": file.filename, "mode": mode, "backend": backend}
# ════════════════════════════════════════════════════════════════
# 상태
# ════════════════════════════════════════════════════════════════
@app.get("/api/status/{task_id}")
def get_status(task_id: str, user: dict = Depends(require_auth)):
r = celery_app.AsyncResult(task_id)
if r.state == "PENDING": return {"state":"pending", "progress":0, "message":"대기 중..."}
if r.state == "PROGRESS": m=r.info or {}; return {"state":"progress","progress":m.get("progress",0),"message":m.get("message","처리 중...")}
if r.state == "SUCCESS": _update_history(task_id, r.result or {}); return {"state":"success","progress":100,**(r.result or {})}
if r.state == "FAILURE": _update_history_fail(task_id, str(r.info)); return {"state":"failure","progress":0,"message":str(r.info)}
return {"state":r.state.lower(),"progress":0}
# ════════════════════════════════════════════════════════════════
# 이력
# ════════════════════════════════════════════════════════════════
@app.get("/api/history")
def get_history(page: int=1, per_page: int=15, type_: str="", user: dict=Depends(require_auth)):
history = _load_history()
if user.get("role") != "admin": history = [h for h in history if h.get("username")==user["username"]]
if type_ in ("stt","ocr"): history = [h for h in history if h.get("type")==type_]
total = len(history); start = (page-1)*per_page
return {"total":total,"page":page,"per_page":per_page,"items":history[start:start+per_page]}
@app.delete("/api/history/{history_id}")
def delete_history(history_id: str, user: dict=Depends(require_auth)):
if not delete_history_item(history_id): raise HTTPException(404,"이력을 찾을 수 없습니다")
return {"ok":True}
@app.delete("/api/history")
def clear_all_history(user: dict=Depends(require_admin)):
clear_history(); return {"ok":True}
# ════════════════════════════════════════════════════════════════
# 다운로드 / Ollama / 설정 / 관리자
# ════════════════════════════════════════════════════════════════
@app.get("/api/download/{filename}")
def download(filename: str, user: dict=Depends(require_auth)):
if ".." in filename or "/" in filename: raise HTTPException(400,"잘못된 파일명")
path = os.path.join(OUTPUT_DIR, filename)
if not os.path.exists(path): raise HTTPException(404,"파일을 찾을 수 없습니다")
media = ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" if filename.endswith(".xlsx") else "text/plain")
return FileResponse(path, media_type=media, filename=filename)
@app.get("/api/ollama/models")
def ollama_models(user: dict=Depends(require_auth)):
try:
resp = httpx.get(f"{OLLAMA_URL}/api/tags", timeout=8.0); resp.raise_for_status()
return {"models":[m["name"] for m in resp.json().get("models",[])], "connected":True}
except Exception as e: return {"models":[], "connected":False, "error":str(e)}
@app.get("/api/settings")
def get_settings(user: dict=Depends(require_auth)): return _load_settings()
@app.post("/api/settings")
def save_settings_endpoint(
stt_ollama_model: str = Form(""),
ocr_ollama_model: str = Form(""),
cpu_threads: str = Form("0"),
stt_timeout: str = Form("0"),
ollama_timeout: str = Form("600"),
user: dict = Depends(require_auth),
):
def _int(v, default):
try: return max(0, int(v))
except: return default
data = {
"stt_ollama_model": stt_ollama_model,
"ocr_ollama_model": ocr_ollama_model,
"cpu_threads": _int(cpu_threads, 0),
"stt_timeout": _int(stt_timeout, 0),
"ollama_timeout": _int(ollama_timeout, 600),
}
_save_settings(data)
return {"ok":True, "settings":data}
@app.get("/api/admin/users")
def admin_list_users(user: dict=Depends(require_admin)): return {"users":list_users()}
@app.post("/api/admin/users")
def admin_create_user(username:str=Form(...),password:str=Form(...),perm_stt:str=Form("false"),perm_ocr:str=Form("false"),user:dict=Depends(require_admin)):
perms={"stt":perm_stt.lower()=="true","ocr":perm_ocr.lower()=="true"}
ok,msg=create_user(username,password,perms)
if not ok: raise HTTPException(400,msg)
return {"ok":True,"message":msg}
@app.put("/api/admin/users/{username}")
def admin_update_user(username:str,perm_stt:str=Form("false"),perm_ocr:str=Form("false"),password:str=Form(""),user:dict=Depends(require_admin)):
perms={"stt":perm_stt.lower()=="true","ocr":perm_ocr.lower()=="true"}
ok,msg=update_user(username,perms,password or None)
if not ok: raise HTTPException(400,msg)
return {"ok":True,"message":msg}
@app.delete("/api/admin/users/{username}")
def admin_delete_user(username:str,user:dict=Depends(require_admin)):
ok,msg=delete_user(username)
if not ok: raise HTTPException(400,msg)
return {"ok":True,"message":msg}
@app.post("/api/cleanup")
def cleanup(user:dict=Depends(require_auth)): return {"removed":_cleanup_outputs()}
# ════════════════════════════════════════════════════════════════
# 유틸
# ════════════════════════════════════════════════════════════════
def _check_size(request):
cl = request.headers.get("content-length")
if cl and int(cl) > MAX_UPLOAD_BYTES: raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
def _cleanup_outputs():
if OUTPUT_KEEP_SECS == 0: return 0
cutoff = time.time() - OUTPUT_KEEP_SECS; removed = 0
for f in glob.glob(os.path.join(OUTPUT_DIR,"*")):
try:
if os.path.getmtime(f) < cutoff: os.remove(f); removed += 1
except: pass
return removed
def _ext(fn): return fn.rsplit(".",1)[-1].lower() if "." in fn else ""
async def _save(file, path):
written = 0
async with aiofiles.open(path,"wb") as f:
while chunk := await file.read(1024*1024):
written += len(chunk)
if written > MAX_UPLOAD_BYTES:
await f.close(); os.remove(path)
raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
await f.write(chunk)
app.mount("/", StaticFiles(directory="static", html=True), name="static")