feat: 모바일 반응형 + 변환 타임아웃 설정

This commit is contained in:
root
2026-04-23 02:33:57 +09:00
parent 248ac1deea
commit 4af1279a08
9 changed files with 967 additions and 1022 deletions

View File

@@ -9,8 +9,6 @@ RUN apt-get update && apt-get install -y \
libxext6 \
libxrender1 \
libgl1 \
libgles2 \
libegl1 \
wget \
curl \
&& rm -rf /var/lib/apt/lists/*
@@ -19,8 +17,9 @@ WORKDIR /app
COPY requirements.txt .
# PaddlePaddle CPU — PyPI 공식 서버
RUN pip install --no-cache-dir paddlepaddle==3.0.0
# PaddlePaddle CPU (AMD64) — paddleocr 3.x 호환
RUN pip install --no-cache-dir paddlepaddle==3.0.0 \
-i https://pypi.tuna.tsinghua.edu.cn/simple
# 나머지 패키지
RUN pip install --no-cache-dir -r requirements.txt

View File

@@ -1,11 +1,10 @@
import os, uuid, time, glob, json
import httpx
import aiofiles
import os, uuid, time, glob, json, threading
import psutil, httpx, aiofiles
from pathlib import Path
from datetime import datetime
from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Form, Request
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pydantic import BaseModel
from auth import (authenticate, create_access_token, init_users,
require_auth, require_admin, require_stt, require_ocr,
@@ -23,6 +22,8 @@ OUTPUT_KEEP_SECS = int(os.getenv("OUTPUT_KEEP_HOURS", "48")) * 3600
DATA_DIR = Path(UPLOAD_DIR).parent
SETTINGS_FILE = DATA_DIR / "settings.json"
HISTORY_FILE = DATA_DIR / "history.json"
HISTORY_MAX = 300
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
@@ -30,13 +31,26 @@ os.makedirs(OUTPUT_DIR, exist_ok=True)
AUDIO_EXT = {"mp3","mp4","wav","m4a","ogg","flac","aac","wma","webm","mkv","avi","mov"}
IMAGE_EXT = {"jpg","jpeg","png","bmp","tiff","tif","webp","gif"}
_DEFAULT_SETTINGS = {
"stt_ollama_model": "",
"ocr_ollama_model": "granite3.2-vision:latest",
"cpu_threads": 0,
"stt_timeout": 0, # 0 = 무제한
"ollama_timeout": 600, # 초
}
_hist_lock = threading.Lock()
# ── 설정 I/O ─────────────────────────────────────────────────
def _load_settings() -> dict:
if not SETTINGS_FILE.exists():
return {"stt_ollama_model": "", "ocr_ollama_model": "granite3.2-vision:latest"}
return dict(_DEFAULT_SETTINGS)
with open(SETTINGS_FILE, "r", encoding="utf-8") as f:
return json.load(f)
data = json.load(f)
for k, v in _DEFAULT_SETTINGS.items():
data.setdefault(k, v)
return data
def _save_settings(data: dict):
SETTINGS_FILE.parent.mkdir(parents=True, exist_ok=True)
@@ -44,6 +58,85 @@ def _save_settings(data: dict):
json.dump(data, f, ensure_ascii=False, indent=2)
# ── 이력 I/O ─────────────────────────────────────────────────
def _load_history() -> list:
with _hist_lock:
if not HISTORY_FILE.exists(): return []
try:
with open(HISTORY_FILE, "r", encoding="utf-8") as f: return json.load(f)
except: return []
def append_history(record: dict):
with _hist_lock:
try:
history = []
if HISTORY_FILE.exists():
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
history.insert(0, record)
history = history[:HISTORY_MAX]
HISTORY_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(HISTORY_FILE, "w", encoding="utf-8") as f: json.dump(history, f, ensure_ascii=False, indent=2)
except: pass
def _update_history(file_id: str, result: dict):
with _hist_lock:
if not HISTORY_FILE.exists(): return
try:
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
for h in history:
if h.get("id") == file_id and h.get("status") == "processing":
h["status"] = "success"
if h["type"] == "stt":
h["output"] = {
"filename": result.get("output_file",""),
"language": result.get("language",""),
"duration_s": result.get("duration", 0),
"segments": len(result.get("segments",[])),
"text_preview": (result.get("text","")[:200]+"" if len(result.get("text",""))>200 else result.get("text","")),
"ollama_used": result.get("ollama_used", False),
"ollama_model": result.get("ollama_model",""),
}
else:
h["output"] = {
"txt_file": result.get("txt_file",""),
"xlsx_file": result.get("xlsx_file",""),
"line_count": result.get("line_count", 0),
"table_count": len(result.get("tables",[])),
"backend": result.get("backend",""),
"ollama_model": result.get("ollama_model",""),
"text_preview": (result.get("full_text","")[:200]+"" if len(result.get("full_text",""))>200 else result.get("full_text","")),
}
break
with open(HISTORY_FILE, "w", encoding="utf-8") as f: json.dump(history, f, ensure_ascii=False, indent=2)
except: pass
def _update_history_fail(file_id: str, error_msg: str):
with _hist_lock:
if not HISTORY_FILE.exists(): return
try:
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
for h in history:
if h.get("id") == file_id and h.get("status") == "processing":
h["status"] = "failed"; h["output"] = {"error": error_msg[:300]}; break
with open(HISTORY_FILE, "w", encoding="utf-8") as f: json.dump(history, f, ensure_ascii=False, indent=2)
except: pass
def delete_history_item(history_id: str) -> bool:
with _hist_lock:
if not HISTORY_FILE.exists(): return False
try:
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
new = [h for h in history if h.get("id") != history_id]
if len(new) == len(history): return False
with open(HISTORY_FILE, "w", encoding="utf-8") as f: json.dump(new, f, ensure_ascii=False, indent=2)
return True
except: return False
def clear_history():
with _hist_lock:
if HISTORY_FILE.exists(): HISTORY_FILE.write_text("[]", encoding="utf-8")
# ════════════════════════════════════════════════════════════════
# 시작 이벤트
# ════════════════════════════════════════════════════════════════
@@ -59,16 +152,36 @@ async def on_startup():
@app.post("/api/login")
def login(username: str = Form(...), password: str = Form(...)):
user = authenticate(username, password)
if not user:
raise HTTPException(401, "아이디 또는 비밀번호가 올바르지 않습니다")
if not user: raise HTTPException(401, "아이디 또는 비밀번호가 올바르지 않습니다")
return {"access_token": create_access_token(username), "token_type": "bearer"}
@app.get("/api/me")
def me(user: dict = Depends(require_auth)):
return {"username": user["username"], "role": user.get("role","user"),
"permissions": user.get("permissions", {"stt":False,"ocr":False})}
# ════════════════════════════════════════════════════════════════
# 시스템 정보
# ════════════════════════════════════════════════════════════════
@app.get("/api/system")
def system_info(user: dict = Depends(require_auth)):
mem = psutil.virtual_memory()
swap = psutil.swap_memory()
s = _load_settings()
return {
"username": user["username"],
"role": user.get("role", "user"),
"permissions": user.get("permissions", {"stt": False, "ocr": False}),
"ram_total_gb": round(mem.total / 1024**3, 1),
"ram_used_gb": round(mem.used / 1024**3, 1),
"ram_avail_gb": round(mem.available / 1024**3, 1),
"ram_percent": mem.percent,
"swap_total_gb": round(swap.total / 1024**3, 1),
"swap_used_gb": round(swap.used / 1024**3, 1),
"cpu_logical": psutil.cpu_count(logical=True),
"cpu_physical": psutil.cpu_count(logical=False),
"cpu_percent": psutil.cpu_percent(interval=0.3),
"cpu_threads_setting": s.get("cpu_threads", 0),
"stt_timeout": s.get("stt_timeout", 0),
"ollama_timeout":s.get("ollama_timeout", 600),
}
@@ -77,25 +190,28 @@ def me(user: dict = Depends(require_auth)):
# ════════════════════════════════════════════════════════════════
@app.post("/api/transcribe")
async def transcribe(
request: Request,
file: UploadFile = File(...),
use_ollama: str = Form("false"),
ollama_model: str = Form(""),
request: Request, file: UploadFile = File(...),
use_ollama: str = Form("false"), ollama_model: str = Form(""),
user: dict = Depends(require_stt),
):
_check_size(request)
ext = _ext(file.filename)
if ext not in AUDIO_EXT:
raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(AUDIO_EXT))}")
file_id = str(uuid.uuid4())
if ext not in AUDIO_EXT: raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(AUDIO_EXT))}")
file_id = str(uuid.uuid4())
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
await _save(file, save_path)
file_size = os.path.getsize(save_path)
_use_ollama = use_ollama.lower() == "true"
# 모델 미지정 시 설정에서 가져옴
if _use_ollama and not ollama_model.strip():
ollama_model = _load_settings().get("stt_ollama_model", "")
s = _load_settings()
if _use_ollama and not ollama_model.strip(): ollama_model = s.get("stt_ollama_model","")
append_history({"id": file_id, "type": "stt", "status": "processing",
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "username": user["username"],
"input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()},
"settings": {"model": os.getenv("WHISPER_MODEL","medium"), "language": os.getenv("WHISPER_LANGUAGE","ko"),
"compute_type": os.getenv("WHISPER_COMPUTE_TYPE","int8"), "cpu_threads": s.get("cpu_threads",0),
"stt_timeout": s.get("stt_timeout",0), "use_ollama": _use_ollama,
"ollama_model": ollama_model if _use_ollama else ""},
"output": None})
task = transcribe_task.delay(file_id, save_path, _use_ollama, ollama_model)
return {"task_id": task.id, "file_id": file_id, "filename": file.filename}
@@ -105,167 +221,160 @@ async def transcribe(
# ════════════════════════════════════════════════════════════════
@app.post("/api/ocr")
async def ocr(
request: Request,
file: UploadFile = File(...),
mode: str = Form("text"),
backend: str = Form("paddle"),
ollama_model: str = Form(""),
custom_prompt: str = Form(""),
request: Request, file: UploadFile = File(...),
mode: str = Form("text"), backend: str = Form("paddle"),
ollama_model: str = Form(""), custom_prompt: str = Form(""),
user: dict = Depends(require_ocr),
):
_check_size(request)
ext = _ext(file.filename)
if ext not in IMAGE_EXT:
raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(IMAGE_EXT))}")
if mode not in ("text", "structure"): mode = "text"
if backend not in ("paddle", "ollama"): backend = "paddle"
# 모델 미지정 시 설정에서 가져옴
if backend == "ollama" and not ollama_model.strip():
ollama_model = _load_settings().get("ocr_ollama_model", "granite3.2-vision:latest")
file_id = str(uuid.uuid4())
if ext not in IMAGE_EXT: raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(IMAGE_EXT))}")
if mode not in ("text","structure"): mode = "text"
if backend not in ("paddle","ollama"): backend = "paddle"
s = _load_settings()
if backend == "ollama" and not ollama_model.strip(): ollama_model = s.get("ocr_ollama_model","granite3.2-vision:latest")
file_id = str(uuid.uuid4())
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
await _save(file, save_path)
file_size = os.path.getsize(save_path)
append_history({"id": file_id, "type": "ocr", "status": "processing",
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "username": user["username"],
"input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()},
"settings": {"backend": backend, "mode": mode, "ocr_lang": os.getenv("OCR_LANG","korean"),
"ollama_model": ollama_model if backend=="ollama" else "",
"ollama_timeout": s.get("ollama_timeout",600),
"custom_prompt": custom_prompt[:200] if custom_prompt else ""},
"output": None})
task = ocr_task.delay(file_id, save_path, mode, backend, ollama_model, custom_prompt)
return {"task_id": task.id, "file_id": file_id,
"filename": file.filename, "mode": mode, "backend": backend}
return {"task_id": task.id, "file_id": file_id, "filename": file.filename, "mode": mode, "backend": backend}
# ════════════════════════════════════════════════════════════════
# 작업 상태 / 다운로드
# 상태
# ════════════════════════════════════════════════════════════════
@app.get("/api/status/{task_id}")
def get_status(task_id: str, user: dict = Depends(require_auth)):
r = celery_app.AsyncResult(task_id)
if r.state == "PENDING": return {"state": "pending", "progress": 0, "message": "대기 중..."}
if r.state == "PROGRESS": m = r.info or {}; return {"state": "progress","progress": m.get("progress",0),"message": m.get("message","처리 중...")}
if r.state == "SUCCESS": return {"state": "success", "progress": 100, **r.result}
if r.state == "FAILURE": return {"state": "failure", "progress": 0, "message": str(r.info)}
return {"state": r.state.lower(), "progress": 0}
if r.state == "PENDING": return {"state":"pending", "progress":0, "message":"대기 중..."}
if r.state == "PROGRESS": m=r.info or {}; return {"state":"progress","progress":m.get("progress",0),"message":m.get("message","처리 중...")}
if r.state == "SUCCESS": _update_history(task_id, r.result or {}); return {"state":"success","progress":100,**(r.result or {})}
if r.state == "FAILURE": _update_history_fail(task_id, str(r.info)); return {"state":"failure","progress":0,"message":str(r.info)}
return {"state":r.state.lower(),"progress":0}
# ════════════════════════════════════════════════════════════════
# 이력
# ════════════════════════════════════════════════════════════════
@app.get("/api/history")
def get_history(page: int=1, per_page: int=15, type_: str="", user: dict=Depends(require_auth)):
history = _load_history()
if user.get("role") != "admin": history = [h for h in history if h.get("username")==user["username"]]
if type_ in ("stt","ocr"): history = [h for h in history if h.get("type")==type_]
total = len(history); start = (page-1)*per_page
return {"total":total,"page":page,"per_page":per_page,"items":history[start:start+per_page]}
@app.delete("/api/history/{history_id}")
def delete_history(history_id: str, user: dict=Depends(require_auth)):
if not delete_history_item(history_id): raise HTTPException(404,"이력을 찾을 수 없습니다")
return {"ok":True}
@app.delete("/api/history")
def clear_all_history(user: dict=Depends(require_admin)):
clear_history(); return {"ok":True}
# ════════════════════════════════════════════════════════════════
# 다운로드 / Ollama / 설정 / 관리자
# ════════════════════════════════════════════════════════════════
@app.get("/api/download/{filename}")
def download(filename: str, user: dict = Depends(require_auth)):
if ".." in filename or "/" in filename:
raise HTTPException(400, "잘못된 파일명")
def download(filename: str, user: dict=Depends(require_auth)):
if ".." in filename or "/" in filename: raise HTTPException(400,"잘못된 파일명")
path = os.path.join(OUTPUT_DIR, filename)
if not os.path.exists(path):
raise HTTPException(404, "파일을 찾을 수 없습니다")
media = ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
if filename.endswith(".xlsx") else "text/plain")
if not os.path.exists(path): raise HTTPException(404,"파일을 찾을 수 없습니다")
media = ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" if filename.endswith(".xlsx") else "text/plain")
return FileResponse(path, media_type=media, filename=filename)
# ════════════════════════════════════════════════════════════════
# Ollama 모델 목록
# ════════════════════════════════════════════════════════════════
@app.get("/api/ollama/models")
def ollama_models(user: dict = Depends(require_auth)):
def ollama_models(user: dict=Depends(require_auth)):
try:
resp = httpx.get(f"{OLLAMA_URL}/api/tags", timeout=8.0)
resp.raise_for_status()
models = [m["name"] for m in resp.json().get("models", [])]
return {"models": models, "connected": True}
except Exception as e:
return {"models": [], "connected": False, "error": str(e)}
resp = httpx.get(f"{OLLAMA_URL}/api/tags", timeout=8.0); resp.raise_for_status()
return {"models":[m["name"] for m in resp.json().get("models",[])], "connected":True}
except Exception as e: return {"models":[], "connected":False, "error":str(e)}
# ════════════════════════════════════════════════════════════════
# 설정
# ════════════════════════════════════════════════════════════════
@app.get("/api/settings")
def get_settings(user: dict = Depends(require_auth)):
return _load_settings()
def get_settings(user: dict=Depends(require_auth)): return _load_settings()
@app.post("/api/settings")
def save_settings_endpoint(
stt_ollama_model: str = Form(""),
ocr_ollama_model: str = Form(""),
cpu_threads: str = Form("0"),
stt_timeout: str = Form("0"),
ollama_timeout: str = Form("600"),
user: dict = Depends(require_auth),
):
data = {"stt_ollama_model": stt_ollama_model,
"ocr_ollama_model": ocr_ollama_model}
def _int(v, default):
try: return max(0, int(v))
except: return default
data = {
"stt_ollama_model": stt_ollama_model,
"ocr_ollama_model": ocr_ollama_model,
"cpu_threads": _int(cpu_threads, 0),
"stt_timeout": _int(stt_timeout, 0),
"ollama_timeout": _int(ollama_timeout, 600),
}
_save_settings(data)
return {"ok": True, "settings": data}
return {"ok":True, "settings":data}
# ════════════════════════════════════════════════════════════════
# 관리자 — 사용자 관리
# ════════════════════════════════════════════════════════════════
@app.get("/api/admin/users")
def admin_list_users(user: dict = Depends(require_admin)):
return {"users": list_users()}
def admin_list_users(user: dict=Depends(require_admin)): return {"users":list_users()}
@app.post("/api/admin/users")
def admin_create_user(
username: str = Form(...),
password: str = Form(...),
perm_stt: str = Form("false"),
perm_ocr: str = Form("false"),
user: dict = Depends(require_admin),
):
perms = {"stt": perm_stt.lower()=="true", "ocr": perm_ocr.lower()=="true"}
ok, msg = create_user(username, password, perms)
if not ok:
raise HTTPException(400, msg)
return {"ok": True, "message": msg}
def admin_create_user(username:str=Form(...),password:str=Form(...),perm_stt:str=Form("false"),perm_ocr:str=Form("false"),user:dict=Depends(require_admin)):
perms={"stt":perm_stt.lower()=="true","ocr":perm_ocr.lower()=="true"}
ok,msg=create_user(username,password,perms)
if not ok: raise HTTPException(400,msg)
return {"ok":True,"message":msg}
@app.put("/api/admin/users/{username}")
def admin_update_user(
username: str,
perm_stt: str = Form("false"),
perm_ocr: str = Form("false"),
password: str = Form(""),
user: dict = Depends(require_admin),
):
perms = {"stt": perm_stt.lower()=="true", "ocr": perm_ocr.lower()=="true"}
ok, msg = update_user(username, perms, password or None)
if not ok:
raise HTTPException(400, msg)
return {"ok": True, "message": msg}
def admin_update_user(username:str,perm_stt:str=Form("false"),perm_ocr:str=Form("false"),password:str=Form(""),user:dict=Depends(require_admin)):
perms={"stt":perm_stt.lower()=="true","ocr":perm_ocr.lower()=="true"}
ok,msg=update_user(username,perms,password or None)
if not ok: raise HTTPException(400,msg)
return {"ok":True,"message":msg}
@app.delete("/api/admin/users/{username}")
def admin_delete_user(username: str, user: dict = Depends(require_admin)):
ok, msg = delete_user(username)
if not ok:
raise HTTPException(400, msg)
return {"ok": True, "message": msg}
def admin_delete_user(username:str,user:dict=Depends(require_admin)):
ok,msg=delete_user(username)
if not ok: raise HTTPException(400,msg)
return {"ok":True,"message":msg}
# ════════════════════════════════════════════════════════════════
# 정리
# ════════════════════════════════════════════════════════════════
@app.post("/api/cleanup")
def cleanup(user: dict = Depends(require_auth)):
return {"removed": _cleanup_outputs()}
def cleanup(user:dict=Depends(require_auth)): return {"removed":_cleanup_outputs()}
# ════════════════════════════════════════════════════════════════
# 유틸
# ════════════════════════════════════════════════════════════════
def _check_size(request: Request):
def _check_size(request):
cl = request.headers.get("content-length")
if cl and int(cl) > MAX_UPLOAD_BYTES:
raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
if cl and int(cl) > MAX_UPLOAD_BYTES: raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
def _cleanup_outputs() -> int:
if OUTPUT_KEEP_SECS == 0:
return 0
cutoff = time.time() - OUTPUT_KEEP_SECS
removed = 0
for f in glob.glob(os.path.join(OUTPUT_DIR, "*")):
def _cleanup_outputs():
if OUTPUT_KEEP_SECS == 0: return 0
cutoff = time.time() - OUTPUT_KEEP_SECS; removed = 0
for f in glob.glob(os.path.join(OUTPUT_DIR,"*")):
try:
if os.path.getmtime(f) < cutoff:
os.remove(f); removed += 1
if os.path.getmtime(f) < cutoff: os.remove(f); removed += 1
except: pass
return removed
def _ext(fn): return fn.rsplit(".", 1)[-1].lower() if "." in fn else ""
def _ext(fn): return fn.rsplit(".",1)[-1].lower() if "." in fn else ""
async def _save(file: UploadFile, path: str):
async def _save(file, path):
written = 0
async with aiofiles.open(path, "wb") as f:
while chunk := await file.read(1024 * 1024):
async with aiofiles.open(path,"wb") as f:
while chunk := await file.read(1024*1024):
written += len(chunk)
if written > MAX_UPLOAD_BYTES:
await f.close(); os.remove(path)

View File

@@ -1,8 +1,5 @@
"""
OCR Celery Tasks
- PaddleOCR 3.x 호환 (use_gpu/show_log/cls 파라미터 제거, 결과구조 변경 반영)
- backend="paddle" → PaddleOCR 로컬 실행
- backend="ollama" → Ollama Vision API 호출
OCR Celery Tasks — PaddleOCR 3.x + Ollama Vision
"""
import os
import base64
@@ -16,7 +13,7 @@ REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs")
OCR_LANG = os.getenv("OCR_LANG", "korean")
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434")
OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "180"))
OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "600"))
celery_app = Celery("ocr_tasks", broker=REDIS_URL, backend=REDIS_URL)
celery_app.conf.update(
@@ -27,7 +24,6 @@ celery_app.conf.update(
result_expires=3600,
)
# PaddleOCR 싱글톤
_ocr_engine = None
_struct_engine = None
@@ -36,7 +32,6 @@ def get_ocr():
if _ocr_engine is None:
from paddleocr import PaddleOCR
print(f"[PaddleOCR] 로딩 (lang={OCR_LANG})")
# PaddleOCR 3.x: use_gpu/show_log 파라미터 제거됨
_ocr_engine = PaddleOCR(use_angle_cls=True, lang=OCR_LANG)
print("[PaddleOCR] 완료")
return _ocr_engine
@@ -51,9 +46,6 @@ def get_structure():
return _struct_engine
# ════════════════════════════════════════════════════════════════
# 메인 Task
# ════════════════════════════════════════════════════════════════
@celery_app.task(bind=True, name="tasks.ocr_task", queue="ocr")
def ocr_task(self, file_id, image_path, mode="text",
backend="paddle", ollama_model="granite3.2-vision", custom_prompt=""):
@@ -72,9 +64,6 @@ def ocr_task(self, file_id, image_path, mode="text",
raise Exception(f"OCR 실패: {str(e)}")
# ════════════════════════════════════════════════════════════════
# Ollama 백엔드
# ════════════════════════════════════════════════════════════════
_OLLAMA_PROMPTS = {
"text": "이 이미지에서 모든 텍스트를 정확하게 추출해줘. 원본의 줄 구분과 단락 구조를 유지해줘.",
"structure": "이 이미지를 분석해서 표는 마크다운 표 형식으로, 나머지 텍스트는 원본 구조를 유지하며 추출해줘.",
@@ -91,8 +80,7 @@ def _run_ollama(task, file_id, image_path, mode, ollama_model, custom_prompt):
resp = httpx.post(f"{OLLAMA_URL}/api/chat", json={
"model": ollama_model,
"messages": [{"role": "user", "content": prompt, "images": [img_b64]}],
"stream": False,
"options": {"temperature": 0.1},
"stream": False, "options": {"temperature": 0.1},
}, timeout=float(OLLAMA_TIMEOUT))
resp.raise_for_status()
except httpx.ConnectError:
@@ -121,16 +109,12 @@ def _run_ollama(task, file_id, image_path, mode, ollama_model, custom_prompt):
"mode": mode, "backend": "ollama", "ollama_model": ollama_model,
"full_text": full_text, "lines": lines, "line_count": len(lines),
"txt_file": txt_file,
"tables": [{"html": h, "rows": len(t),
"cols": max(len(r) for r in t) if t else 0}
"tables": [{"html": h, "rows": len(t), "cols": max(len(r) for r in t) if t else 0}
for h, t in zip(tables_html, tables)],
"xlsx_file": xlsx_file,
}
# ════════════════════════════════════════════════════════════════
# PaddleOCR 백엔드
# ════════════════════════════════════════════════════════════════
def _run_paddle(task, file_id, image_path, mode):
import cv2
img = cv2.imread(image_path)
@@ -140,50 +124,38 @@ def _run_paddle(task, file_id, image_path, mode):
return _paddle_structure(task, file_id, img) if mode == "structure" \
else _paddle_text(task, file_id, img)
def _paddle_text(task, file_id, img):
task.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 인식 중..."})
# PaddleOCR 3.x: cls 파라미터 제거, 결과 구조 변경
result = get_ocr().ocr(img)
task.update_state(state="PROGRESS", meta={"progress": 80, "message": "결과 정리 중..."})
lines = []
if result and len(result) > 0:
r = result[0]
# PaddleOCR 3.x 결과 구조: dict with rec_texts, rec_scores
if isinstance(r, dict):
texts = r.get("rec_texts", [])
scores = r.get("rec_scores", [])
for text, conf in zip(texts, scores):
if text.strip():
lines.append({"text": text,
"confidence": round(float(conf), 3),
"bbox": []})
# 구버전 호환 (list of [bbox, (text, conf)])
lines.append({"text": text, "confidence": round(float(conf), 3), "bbox": []})
elif isinstance(r, list):
for item in r:
if item and len(item) == 2:
_, (text, conf) = item
if text.strip():
lines.append({"text": text,
"confidence": round(float(conf), 3),
"bbox": []})
lines.append({"text": text, "confidence": round(float(conf), 3), "bbox": []})
full_text = "\n".join(l["text"] for l in lines)
txt_file = f"{file_id}_ocr.txt"
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
f.write(full_text)
return {"mode": "text", "backend": "paddle",
return {"mode": "text", "backend": "paddle", "ollama_model": "",
"full_text": full_text, "lines": lines,
"line_count": len(lines), "txt_file": txt_file,
"tables": [], "xlsx_file": None}
def _paddle_structure(task, file_id, img):
task.update_state(state="PROGRESS", meta={"progress": 20, "message": "레이아웃 분석 중..."})
result = get_structure()(img)
task.update_state(state="PROGRESS", meta={"progress": 60, "message": "표 구조 추출 중..."})
text_blocks, tables_html, tables_data = [], [], []
for region in result:
rtype = region.get("type", "").lower()
@@ -197,32 +169,24 @@ def _paddle_structure(task, file_id, img):
if isinstance(line, (list, tuple)) and len(line) == 2:
_, (text, _conf) = line
text_blocks.append(text)
full_text = "\n".join(text_blocks)
task.update_state(state="PROGRESS", meta={"progress": 80, "message": "Excel 생성 중..."})
xlsx_file = None
if tables_data:
xlsx_file = f"{file_id}_tables.xlsx"
_save_excel(tables_data, os.path.join(OUTPUT_DIR, xlsx_file))
txt_file = f"{file_id}_ocr.txt"
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
f.write("# 텍스트\n\n" + full_text)
lines = [{"text": t, "confidence": 1.0, "bbox": []} for t in text_blocks]
tables_meta = [{"html": h, "rows": len(d),
"cols": max(len(r) for r in d) if d else 0}
lines = [{"text": t, "confidence": 1.0, "bbox": []} for t in text_blocks]
tables_meta = [{"html": h, "rows": len(d), "cols": max(len(r) for r in d) if d else 0}
for h, d in zip(tables_html, tables_data)]
return {"mode": "structure", "backend": "paddle",
return {"mode": "structure", "backend": "paddle", "ollama_model": "",
"full_text": full_text, "lines": lines,
"line_count": len(lines), "txt_file": txt_file,
"tables": tables_meta, "xlsx_file": xlsx_file}
# ════════════════════════════════════════════════════════════════
# 공통 유틸
# ════════════════════════════════════════════════════════════════
def _parse_md_tables(text):
tables, current = [], []
for line in text.splitlines():
@@ -241,8 +205,7 @@ def _md_table_to_html(table):
rows = ""
for i, row in enumerate(table):
tag = "th" if i == 0 else "td"
cells = "".join(f"<{tag}>{c}</{tag}>" for c in row)
rows += f"<tr>{cells}</tr>"
rows += "<tr>" + "".join(f"<{tag}>{c}</{tag}>" for c in row) + "</tr>"
return f"<table>{rows}</table>"
def _html_table_to_list(html):
@@ -252,11 +215,10 @@ def _html_table_to_list(html):
super().__init__()
self.rows, self._row, self._cell, self._in = [], [], [], False
def handle_starttag(self, tag, attrs):
if tag == "tr": self._row = []
if tag == "tr": self._row = []
elif tag in ("td","th"): self._cell = []; self._in = True
def handle_endtag(self, tag):
if tag in ("td","th"):
self._row.append("".join(self._cell).strip()); self._in = False
if tag in ("td","th"): self._row.append("".join(self._cell).strip()); self._in = False
elif tag == "tr":
if self._row: self.rows.append(self._row)
def handle_data(self, data):
@@ -264,18 +226,16 @@ def _html_table_to_list(html):
p = P(); p.feed(html); return p.rows
def _save_excel(tables, path):
wb = openpyxl.Workbook()
wb.remove(wb.active)
wb = openpyxl.Workbook(); wb.remove(wb.active)
for i, table in enumerate(tables, 1):
ws = wb.create_sheet(f"{i}")
ws = wb.create_sheet(f"{i}")
thin = Side(style="thin", color="2A2A33")
bdr = Border(left=thin, right=thin, top=thin, bottom=thin)
for r_idx, row in enumerate(table, 1):
for c_idx, val in enumerate(row, 1):
cell = ws.cell(row=r_idx, column=c_idx, value=val)
cell.border = bdr
cell.alignment = Alignment(horizontal="center",
vertical="center", wrap_text=True)
cell.border = bdr
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
if r_idx == 1:
cell.fill = PatternFill("solid", fgColor="1A1A2E")
cell.font = Font(color="00E5A0", bold=True, size=10)

View File

@@ -6,7 +6,7 @@ redis==5.0.8
faster-whisper==1.0.3
aiofiles==23.2.1
# 인증 (bcrypt 제거 — 직접 비교 방식 사용)
# 인증
python-jose[cryptography]==3.3.0
# PaddleOCR 3.x
@@ -19,3 +19,6 @@ httpx>=0.27.0
# Excel 출력
openpyxl==3.1.2
Pillow>=10.0.0
# 시스템 모니터링
psutil>=5.9.0

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
import os
import httpx
from celery import Celery
from ocr_tasks import ocr_task # noqa: F401 — worker에 등록
from ocr_tasks import ocr_task # noqa: F401
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
MODEL_SIZE = os.getenv("WHISPER_MODEL", "medium")
@@ -12,7 +12,10 @@ BEAM_SIZE = int(os.getenv("WHISPER_BEAM_SIZE", "5"))
INITIAL_PROMPT = os.getenv("WHISPER_INITIAL_PROMPT", "") or None
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs")
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434")
OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "180"))
OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "600"))
_cpu_threads_env = int(os.getenv("CPU_THREADS", "0"))
CPU_THREADS = _cpu_threads_env if _cpu_threads_env > 0 else None # None = auto
celery_app = Celery("whisper_tasks", broker=REDIS_URL, backend=REDIS_URL)
celery_app.conf.update(
@@ -29,15 +32,16 @@ def get_model():
global _model
if _model is None:
from faster_whisper import WhisperModel
print(f"[Whisper] 로딩: {MODEL_SIZE} / {DEVICE} / {COMPUTE_TYPE}")
_model = WhisperModel(MODEL_SIZE, device=DEVICE, compute_type=COMPUTE_TYPE)
kwargs = dict(device=DEVICE, compute_type=COMPUTE_TYPE)
if CPU_THREADS is not None:
kwargs["cpu_threads"] = CPU_THREADS
print(f"[Whisper] 로딩: {MODEL_SIZE} / {DEVICE} / {COMPUTE_TYPE} / threads={CPU_THREADS or 'auto'}")
_model = WhisperModel(MODEL_SIZE, **kwargs)
print("[Whisper] 로드 완료")
return _model
# ── Ollama 후처리 ─────────────────────────────────────────────
def _ollama_postprocess(text: str, model: str) -> str:
"""Whisper 결과를 Ollama로 후처리 (문장부호·맞춤법·자연스러운 문장)"""
if not model or not text.strip():
return text
prompt = (
@@ -49,33 +53,22 @@ def _ollama_postprocess(text: str, model: str) -> str:
try:
resp = httpx.post(
f"{OLLAMA_URL}/api/chat",
json={
"model": model,
"messages": [{"role": "user", "content": prompt}],
"stream": False,
"options": {"temperature": 0.1},
},
json={"model": model,
"messages": [{"role": "user", "content": prompt}],
"stream": False, "options": {"temperature": 0.1}},
timeout=float(OLLAMA_TIMEOUT),
)
resp.raise_for_status()
result = resp.json().get("message", {}).get("content", "").strip()
return result if result else text
except Exception as e:
print(f"[Ollama 후처리 실패] {e} — 원본 텍스트 사용")
print(f"[Ollama 후처리 실패] {e}")
return text
# ════════════════════════════════════════════════════════════════
# STT Celery Task
# ════════════════════════════════════════════════════════════════
@celery_app.task(bind=True, name="tasks.transcribe_task", queue="stt")
def transcribe_task(
self,
file_id: str,
audio_path: str,
use_ollama: bool = False,
ollama_model: str = "",
):
def transcribe_task(self, file_id: str, audio_path: str,
use_ollama: bool = False, ollama_model: str = ""):
self.update_state(state="PROGRESS", meta={"progress": 5, "message": "모델 준비 중..."})
try:
model = get_model()
@@ -97,8 +90,8 @@ def transcribe_task(
duration = info.duration
for seg in segments_gen:
segments.append({"start": round(seg.start,2),
"end": round(seg.end,2),
segments.append({"start": round(seg.start, 2),
"end": round(seg.end, 2),
"text": seg.text.strip()})
parts.append(seg.text.strip())
if duration > 0:
@@ -112,7 +105,6 @@ def transcribe_task(
raw_text = "\n".join(parts)
full_text = raw_text
# Ollama 후처리
if use_ollama and ollama_model:
self.update_state(state="PROGRESS",
meta={"progress": 85,

View File

@@ -3,7 +3,6 @@ services:
image: redis:7-alpine
container_name: whisper_redis
restart: unless-stopped
# RDB 스냅샷 저장 실패 시에도 쓰기 허용 (Celery 브로커 용도)
command: redis-server --stop-writes-on-bgsave-error no
environment:
- TZ=Asia/Seoul
@@ -19,13 +18,13 @@ services:
container_name: whisper_app
restart: unless-stopped
ports:
- "8800:8000" # 호스트 Nginx가 리버스 프록시
- "8800:8000"
environment:
- TZ=Asia/Seoul
# ── 인증 (반드시 변경) ──────────────────────────────
- AUTH_USERNAME=byun
- AUTH_PASSWORD=admin
- AUTH_USERNAME=admin
- AUTH_PASSWORD=changeme1234
- JWT_SECRET=your-very-secret-key-change-this
- JWT_EXPIRE_HOURS=12
@@ -33,24 +32,31 @@ services:
- REDIS_URL=redis://redis:6379/0
- UPLOAD_DIR=/data/uploads
- OUTPUT_DIR=/data/outputs
- WHISPER_MODEL=medium # tiny/base/small/medium/large-v3
- WHISPER_MODEL=medium
- WHISPER_DEVICE=cpu
- WHISPER_COMPUTE_TYPE=int8
- WHISPER_LANGUAGE=ko
- WHISPER_BEAM_SIZE=5
- WHISPER_INITIAL_PROMPT= # 예: "고객 상담 녹취록입니다."
- WHISPER_INITIAL_PROMPT=
# ── 타임아웃 ─────────────────────────────────────────
# STT: Celery 태스크 소프트 타임아웃 (초) — 0=무제한
- STT_TIMEOUT=0
# Ollama: Vision/후처리 API 응답 대기 (초)
- OLLAMA_TIMEOUT=600
# ── 파일 관리 ────────────────────────────────────────
- MAX_UPLOAD_MB=500
- OUTPUT_KEEP_HOURS=48
# ── PaddleOCR ────────────────────────────────────────
- OCR_LANG=korean # korean/en/japan/chinese_cht/ch
- OCR_LANG=korean
# ── Ollama OCR ───────────────────────────────────────
# 호스트 실제 LAN IP 사용 (host.docker.internal은 Linux에서 불안정)
# ── Ollama ───────────────────────────────────────────
- OLLAMA_URL=http://192.168.0.126:11434
- OLLAMA_TIMEOUT=600 # 11b 이상 모델은 300 이상 권장
# ── CPU 스레드 ───────────────────────────────────────
- CPU_THREADS=0
volumes:
- stt_data:/data
@@ -67,9 +73,6 @@ services:
dockerfile: Dockerfile
container_name: whisper_worker
restart: unless-stopped
# --pool=solo : CTranslate2(faster-whisper)가 prefork 방식과 충돌(SIGSEGV) 발생
# solo 모드로 포크 없이 실행하여 해결
# --max-tasks-per-child=50 : Whisper/Paddle 모델 메모리 누수 방지
command: >
celery -A tasks worker
--loglevel=info
@@ -87,11 +90,13 @@ services:
- WHISPER_LANGUAGE=ko
- WHISPER_BEAM_SIZE=5
- WHISPER_INITIAL_PROMPT=
- STT_TIMEOUT=0
- OLLAMA_TIMEOUT=600
- MAX_UPLOAD_MB=500
- OUTPUT_KEEP_HOURS=48
- OCR_LANG=korean
- OLLAMA_URL=http://192.168.0.126:11434
- OLLAMA_TIMEOUT=600
- CPU_THREADS=0
- JWT_SECRET=your-very-secret-key-change-this
volumes:
- stt_data:/data

114
docker-compose.yml.bak1 Normal file
View File

@@ -0,0 +1,114 @@
services:
redis:
image: redis:7-alpine
container_name: whisper_redis
restart: unless-stopped
command: redis-server --stop-writes-on-bgsave-error no
environment:
- TZ=Asia/Seoul
volumes:
- redis_data:/data
networks:
- whisper_net
app:
build:
context: ./app
dockerfile: Dockerfile
container_name: whisper_app
restart: unless-stopped
ports:
- "8800:8000"
environment:
- TZ=Asia/Seoul
# ── 인증 (반드시 변경) ──────────────────────────────
- AUTH_USERNAME=admin
- AUTH_PASSWORD=changeme1234
- JWT_SECRET=your-very-secret-key-change-this
- JWT_EXPIRE_HOURS=12
# ── Whisper STT ─────────────────────────────────────
- REDIS_URL=redis://redis:6379/0
- UPLOAD_DIR=/data/uploads
- OUTPUT_DIR=/data/outputs
- WHISPER_MODEL=medium
- WHISPER_DEVICE=cpu
- WHISPER_COMPUTE_TYPE=int8
- WHISPER_LANGUAGE=ko
- WHISPER_BEAM_SIZE=5
- WHISPER_INITIAL_PROMPT=
# ── CPU 스레드 설정 ──────────────────────────────────
# 5825u: 8코어 16스레드 → 8~12 권장
# 0 = 자동(시스템 전체 코어 사용)
- CPU_THREADS=0
# ── 파일 관리 ────────────────────────────────────────
- MAX_UPLOAD_MB=500
- OUTPUT_KEEP_HOURS=48
# ── PaddleOCR ────────────────────────────────────────
- OCR_LANG=korean
# ── Ollama ───────────────────────────────────────────
- OLLAMA_URL=http://192.168.0.126:11434
- OLLAMA_TIMEOUT=600
volumes:
- stt_data:/data
- whisper_models:/root/.cache/huggingface
- paddle_models:/root/.paddlex
depends_on:
- redis
networks:
- whisper_net
worker:
build:
context: ./app
dockerfile: Dockerfile
container_name: whisper_worker
restart: unless-stopped
command: >
celery -A tasks worker
--loglevel=info
--pool=solo
--max-tasks-per-child=50
-Q stt,ocr
environment:
- TZ=Asia/Seoul
- REDIS_URL=redis://redis:6379/0
- UPLOAD_DIR=/data/uploads
- OUTPUT_DIR=/data/outputs
- WHISPER_MODEL=medium
- WHISPER_DEVICE=cpu
- WHISPER_COMPUTE_TYPE=int8
- WHISPER_LANGUAGE=ko
- WHISPER_BEAM_SIZE=5
- WHISPER_INITIAL_PROMPT=
- CPU_THREADS=0
- MAX_UPLOAD_MB=500
- OUTPUT_KEEP_HOURS=48
- OCR_LANG=korean
- OLLAMA_URL=http://192.168.0.126:11434
- OLLAMA_TIMEOUT=600
- JWT_SECRET=your-very-secret-key-change-this
volumes:
- stt_data:/data
- whisper_models:/root/.cache/huggingface
- paddle_models:/root/.paddlex
depends_on:
- redis
networks:
- whisper_net
volumes:
redis_data:
stt_data:
whisper_models:
paddle_models:
networks:
whisper_net:
driver: bridge

BIN
whisper-stt.tar.gz Executable file

Binary file not shown.