feat: 모바일 반응형 + 변환 타임아웃 설정
This commit is contained in:
@@ -9,8 +9,6 @@ RUN apt-get update && apt-get install -y \
|
|||||||
libxext6 \
|
libxext6 \
|
||||||
libxrender1 \
|
libxrender1 \
|
||||||
libgl1 \
|
libgl1 \
|
||||||
libgles2 \
|
|
||||||
libegl1 \
|
|
||||||
wget \
|
wget \
|
||||||
curl \
|
curl \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
@@ -19,8 +17,9 @@ WORKDIR /app
|
|||||||
|
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
|
|
||||||
# PaddlePaddle CPU — PyPI 공식 서버
|
# PaddlePaddle CPU (AMD64) — paddleocr 3.x 호환
|
||||||
RUN pip install --no-cache-dir paddlepaddle==3.0.0
|
RUN pip install --no-cache-dir paddlepaddle==3.0.0 \
|
||||||
|
-i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||||
|
|
||||||
# 나머지 패키지
|
# 나머지 패키지
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|||||||
365
app/main.py
365
app/main.py
@@ -1,11 +1,10 @@
|
|||||||
import os, uuid, time, glob, json
|
import os, uuid, time, glob, json, threading
|
||||||
import httpx
|
import psutil, httpx, aiofiles
|
||||||
import aiofiles
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Form, Request
|
from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Form, Request
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
from fastapi.responses import FileResponse
|
from fastapi.responses import FileResponse
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
from auth import (authenticate, create_access_token, init_users,
|
from auth import (authenticate, create_access_token, init_users,
|
||||||
require_auth, require_admin, require_stt, require_ocr,
|
require_auth, require_admin, require_stt, require_ocr,
|
||||||
@@ -23,6 +22,8 @@ OUTPUT_KEEP_SECS = int(os.getenv("OUTPUT_KEEP_HOURS", "48")) * 3600
|
|||||||
|
|
||||||
DATA_DIR = Path(UPLOAD_DIR).parent
|
DATA_DIR = Path(UPLOAD_DIR).parent
|
||||||
SETTINGS_FILE = DATA_DIR / "settings.json"
|
SETTINGS_FILE = DATA_DIR / "settings.json"
|
||||||
|
HISTORY_FILE = DATA_DIR / "history.json"
|
||||||
|
HISTORY_MAX = 300
|
||||||
|
|
||||||
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
||||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||||
@@ -30,13 +31,26 @@ os.makedirs(OUTPUT_DIR, exist_ok=True)
|
|||||||
AUDIO_EXT = {"mp3","mp4","wav","m4a","ogg","flac","aac","wma","webm","mkv","avi","mov"}
|
AUDIO_EXT = {"mp3","mp4","wav","m4a","ogg","flac","aac","wma","webm","mkv","avi","mov"}
|
||||||
IMAGE_EXT = {"jpg","jpeg","png","bmp","tiff","tif","webp","gif"}
|
IMAGE_EXT = {"jpg","jpeg","png","bmp","tiff","tif","webp","gif"}
|
||||||
|
|
||||||
|
_DEFAULT_SETTINGS = {
|
||||||
|
"stt_ollama_model": "",
|
||||||
|
"ocr_ollama_model": "granite3.2-vision:latest",
|
||||||
|
"cpu_threads": 0,
|
||||||
|
"stt_timeout": 0, # 0 = 무제한
|
||||||
|
"ollama_timeout": 600, # 초
|
||||||
|
}
|
||||||
|
|
||||||
|
_hist_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
# ── 설정 I/O ─────────────────────────────────────────────────
|
# ── 설정 I/O ─────────────────────────────────────────────────
|
||||||
def _load_settings() -> dict:
|
def _load_settings() -> dict:
|
||||||
if not SETTINGS_FILE.exists():
|
if not SETTINGS_FILE.exists():
|
||||||
return {"stt_ollama_model": "", "ocr_ollama_model": "granite3.2-vision:latest"}
|
return dict(_DEFAULT_SETTINGS)
|
||||||
with open(SETTINGS_FILE, "r", encoding="utf-8") as f:
|
with open(SETTINGS_FILE, "r", encoding="utf-8") as f:
|
||||||
return json.load(f)
|
data = json.load(f)
|
||||||
|
for k, v in _DEFAULT_SETTINGS.items():
|
||||||
|
data.setdefault(k, v)
|
||||||
|
return data
|
||||||
|
|
||||||
def _save_settings(data: dict):
|
def _save_settings(data: dict):
|
||||||
SETTINGS_FILE.parent.mkdir(parents=True, exist_ok=True)
|
SETTINGS_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||||
@@ -44,6 +58,85 @@ def _save_settings(data: dict):
|
|||||||
json.dump(data, f, ensure_ascii=False, indent=2)
|
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
# ── 이력 I/O ─────────────────────────────────────────────────
|
||||||
|
def _load_history() -> list:
|
||||||
|
with _hist_lock:
|
||||||
|
if not HISTORY_FILE.exists(): return []
|
||||||
|
try:
|
||||||
|
with open(HISTORY_FILE, "r", encoding="utf-8") as f: return json.load(f)
|
||||||
|
except: return []
|
||||||
|
|
||||||
|
def append_history(record: dict):
|
||||||
|
with _hist_lock:
|
||||||
|
try:
|
||||||
|
history = []
|
||||||
|
if HISTORY_FILE.exists():
|
||||||
|
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
|
||||||
|
history.insert(0, record)
|
||||||
|
history = history[:HISTORY_MAX]
|
||||||
|
HISTORY_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(HISTORY_FILE, "w", encoding="utf-8") as f: json.dump(history, f, ensure_ascii=False, indent=2)
|
||||||
|
except: pass
|
||||||
|
|
||||||
|
def _update_history(file_id: str, result: dict):
|
||||||
|
with _hist_lock:
|
||||||
|
if not HISTORY_FILE.exists(): return
|
||||||
|
try:
|
||||||
|
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
|
||||||
|
for h in history:
|
||||||
|
if h.get("id") == file_id and h.get("status") == "processing":
|
||||||
|
h["status"] = "success"
|
||||||
|
if h["type"] == "stt":
|
||||||
|
h["output"] = {
|
||||||
|
"filename": result.get("output_file",""),
|
||||||
|
"language": result.get("language",""),
|
||||||
|
"duration_s": result.get("duration", 0),
|
||||||
|
"segments": len(result.get("segments",[])),
|
||||||
|
"text_preview": (result.get("text","")[:200]+"…" if len(result.get("text",""))>200 else result.get("text","")),
|
||||||
|
"ollama_used": result.get("ollama_used", False),
|
||||||
|
"ollama_model": result.get("ollama_model",""),
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
h["output"] = {
|
||||||
|
"txt_file": result.get("txt_file",""),
|
||||||
|
"xlsx_file": result.get("xlsx_file",""),
|
||||||
|
"line_count": result.get("line_count", 0),
|
||||||
|
"table_count": len(result.get("tables",[])),
|
||||||
|
"backend": result.get("backend",""),
|
||||||
|
"ollama_model": result.get("ollama_model",""),
|
||||||
|
"text_preview": (result.get("full_text","")[:200]+"…" if len(result.get("full_text",""))>200 else result.get("full_text","")),
|
||||||
|
}
|
||||||
|
break
|
||||||
|
with open(HISTORY_FILE, "w", encoding="utf-8") as f: json.dump(history, f, ensure_ascii=False, indent=2)
|
||||||
|
except: pass
|
||||||
|
|
||||||
|
def _update_history_fail(file_id: str, error_msg: str):
|
||||||
|
with _hist_lock:
|
||||||
|
if not HISTORY_FILE.exists(): return
|
||||||
|
try:
|
||||||
|
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
|
||||||
|
for h in history:
|
||||||
|
if h.get("id") == file_id and h.get("status") == "processing":
|
||||||
|
h["status"] = "failed"; h["output"] = {"error": error_msg[:300]}; break
|
||||||
|
with open(HISTORY_FILE, "w", encoding="utf-8") as f: json.dump(history, f, ensure_ascii=False, indent=2)
|
||||||
|
except: pass
|
||||||
|
|
||||||
|
def delete_history_item(history_id: str) -> bool:
|
||||||
|
with _hist_lock:
|
||||||
|
if not HISTORY_FILE.exists(): return False
|
||||||
|
try:
|
||||||
|
with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f)
|
||||||
|
new = [h for h in history if h.get("id") != history_id]
|
||||||
|
if len(new) == len(history): return False
|
||||||
|
with open(HISTORY_FILE, "w", encoding="utf-8") as f: json.dump(new, f, ensure_ascii=False, indent=2)
|
||||||
|
return True
|
||||||
|
except: return False
|
||||||
|
|
||||||
|
def clear_history():
|
||||||
|
with _hist_lock:
|
||||||
|
if HISTORY_FILE.exists(): HISTORY_FILE.write_text("[]", encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
# 시작 이벤트
|
# 시작 이벤트
|
||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
@@ -59,16 +152,36 @@ async def on_startup():
|
|||||||
@app.post("/api/login")
|
@app.post("/api/login")
|
||||||
def login(username: str = Form(...), password: str = Form(...)):
|
def login(username: str = Form(...), password: str = Form(...)):
|
||||||
user = authenticate(username, password)
|
user = authenticate(username, password)
|
||||||
if not user:
|
if not user: raise HTTPException(401, "아이디 또는 비밀번호가 올바르지 않습니다")
|
||||||
raise HTTPException(401, "아이디 또는 비밀번호가 올바르지 않습니다")
|
|
||||||
return {"access_token": create_access_token(username), "token_type": "bearer"}
|
return {"access_token": create_access_token(username), "token_type": "bearer"}
|
||||||
|
|
||||||
@app.get("/api/me")
|
@app.get("/api/me")
|
||||||
def me(user: dict = Depends(require_auth)):
|
def me(user: dict = Depends(require_auth)):
|
||||||
|
return {"username": user["username"], "role": user.get("role","user"),
|
||||||
|
"permissions": user.get("permissions", {"stt":False,"ocr":False})}
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
|
# 시스템 정보
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
|
@app.get("/api/system")
|
||||||
|
def system_info(user: dict = Depends(require_auth)):
|
||||||
|
mem = psutil.virtual_memory()
|
||||||
|
swap = psutil.swap_memory()
|
||||||
|
s = _load_settings()
|
||||||
return {
|
return {
|
||||||
"username": user["username"],
|
"ram_total_gb": round(mem.total / 1024**3, 1),
|
||||||
"role": user.get("role", "user"),
|
"ram_used_gb": round(mem.used / 1024**3, 1),
|
||||||
"permissions": user.get("permissions", {"stt": False, "ocr": False}),
|
"ram_avail_gb": round(mem.available / 1024**3, 1),
|
||||||
|
"ram_percent": mem.percent,
|
||||||
|
"swap_total_gb": round(swap.total / 1024**3, 1),
|
||||||
|
"swap_used_gb": round(swap.used / 1024**3, 1),
|
||||||
|
"cpu_logical": psutil.cpu_count(logical=True),
|
||||||
|
"cpu_physical": psutil.cpu_count(logical=False),
|
||||||
|
"cpu_percent": psutil.cpu_percent(interval=0.3),
|
||||||
|
"cpu_threads_setting": s.get("cpu_threads", 0),
|
||||||
|
"stt_timeout": s.get("stt_timeout", 0),
|
||||||
|
"ollama_timeout":s.get("ollama_timeout", 600),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -77,25 +190,28 @@ def me(user: dict = Depends(require_auth)):
|
|||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
@app.post("/api/transcribe")
|
@app.post("/api/transcribe")
|
||||||
async def transcribe(
|
async def transcribe(
|
||||||
request: Request,
|
request: Request, file: UploadFile = File(...),
|
||||||
file: UploadFile = File(...),
|
use_ollama: str = Form("false"), ollama_model: str = Form(""),
|
||||||
use_ollama: str = Form("false"),
|
|
||||||
ollama_model: str = Form(""),
|
|
||||||
user: dict = Depends(require_stt),
|
user: dict = Depends(require_stt),
|
||||||
):
|
):
|
||||||
_check_size(request)
|
_check_size(request)
|
||||||
ext = _ext(file.filename)
|
ext = _ext(file.filename)
|
||||||
if ext not in AUDIO_EXT:
|
if ext not in AUDIO_EXT: raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(AUDIO_EXT))}")
|
||||||
raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(AUDIO_EXT))}")
|
|
||||||
file_id = str(uuid.uuid4())
|
file_id = str(uuid.uuid4())
|
||||||
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
|
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
|
||||||
await _save(file, save_path)
|
await _save(file, save_path)
|
||||||
|
file_size = os.path.getsize(save_path)
|
||||||
_use_ollama = use_ollama.lower() == "true"
|
_use_ollama = use_ollama.lower() == "true"
|
||||||
# 모델 미지정 시 설정에서 가져옴
|
s = _load_settings()
|
||||||
if _use_ollama and not ollama_model.strip():
|
if _use_ollama and not ollama_model.strip(): ollama_model = s.get("stt_ollama_model","")
|
||||||
ollama_model = _load_settings().get("stt_ollama_model", "")
|
append_history({"id": file_id, "type": "stt", "status": "processing",
|
||||||
|
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "username": user["username"],
|
||||||
|
"input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()},
|
||||||
|
"settings": {"model": os.getenv("WHISPER_MODEL","medium"), "language": os.getenv("WHISPER_LANGUAGE","ko"),
|
||||||
|
"compute_type": os.getenv("WHISPER_COMPUTE_TYPE","int8"), "cpu_threads": s.get("cpu_threads",0),
|
||||||
|
"stt_timeout": s.get("stt_timeout",0), "use_ollama": _use_ollama,
|
||||||
|
"ollama_model": ollama_model if _use_ollama else ""},
|
||||||
|
"output": None})
|
||||||
task = transcribe_task.delay(file_id, save_path, _use_ollama, ollama_model)
|
task = transcribe_task.delay(file_id, save_path, _use_ollama, ollama_model)
|
||||||
return {"task_id": task.id, "file_id": file_id, "filename": file.filename}
|
return {"task_id": task.id, "file_id": file_id, "filename": file.filename}
|
||||||
|
|
||||||
@@ -105,167 +221,160 @@ async def transcribe(
|
|||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
@app.post("/api/ocr")
|
@app.post("/api/ocr")
|
||||||
async def ocr(
|
async def ocr(
|
||||||
request: Request,
|
request: Request, file: UploadFile = File(...),
|
||||||
file: UploadFile = File(...),
|
mode: str = Form("text"), backend: str = Form("paddle"),
|
||||||
mode: str = Form("text"),
|
ollama_model: str = Form(""), custom_prompt: str = Form(""),
|
||||||
backend: str = Form("paddle"),
|
|
||||||
ollama_model: str = Form(""),
|
|
||||||
custom_prompt: str = Form(""),
|
|
||||||
user: dict = Depends(require_ocr),
|
user: dict = Depends(require_ocr),
|
||||||
):
|
):
|
||||||
_check_size(request)
|
_check_size(request)
|
||||||
ext = _ext(file.filename)
|
ext = _ext(file.filename)
|
||||||
if ext not in IMAGE_EXT:
|
if ext not in IMAGE_EXT: raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(IMAGE_EXT))}")
|
||||||
raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(IMAGE_EXT))}")
|
if mode not in ("text","structure"): mode = "text"
|
||||||
if mode not in ("text", "structure"): mode = "text"
|
if backend not in ("paddle","ollama"): backend = "paddle"
|
||||||
if backend not in ("paddle", "ollama"): backend = "paddle"
|
s = _load_settings()
|
||||||
|
if backend == "ollama" and not ollama_model.strip(): ollama_model = s.get("ocr_ollama_model","granite3.2-vision:latest")
|
||||||
# 모델 미지정 시 설정에서 가져옴
|
|
||||||
if backend == "ollama" and not ollama_model.strip():
|
|
||||||
ollama_model = _load_settings().get("ocr_ollama_model", "granite3.2-vision:latest")
|
|
||||||
|
|
||||||
file_id = str(uuid.uuid4())
|
file_id = str(uuid.uuid4())
|
||||||
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
|
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
|
||||||
await _save(file, save_path)
|
await _save(file, save_path)
|
||||||
|
file_size = os.path.getsize(save_path)
|
||||||
|
append_history({"id": file_id, "type": "ocr", "status": "processing",
|
||||||
|
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "username": user["username"],
|
||||||
|
"input": {"filename": file.filename, "size_bytes": file_size, "format": ext.upper()},
|
||||||
|
"settings": {"backend": backend, "mode": mode, "ocr_lang": os.getenv("OCR_LANG","korean"),
|
||||||
|
"ollama_model": ollama_model if backend=="ollama" else "",
|
||||||
|
"ollama_timeout": s.get("ollama_timeout",600),
|
||||||
|
"custom_prompt": custom_prompt[:200] if custom_prompt else ""},
|
||||||
|
"output": None})
|
||||||
task = ocr_task.delay(file_id, save_path, mode, backend, ollama_model, custom_prompt)
|
task = ocr_task.delay(file_id, save_path, mode, backend, ollama_model, custom_prompt)
|
||||||
return {"task_id": task.id, "file_id": file_id,
|
return {"task_id": task.id, "file_id": file_id, "filename": file.filename, "mode": mode, "backend": backend}
|
||||||
"filename": file.filename, "mode": mode, "backend": backend}
|
|
||||||
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
# 작업 상태 / 다운로드
|
# 상태
|
||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
@app.get("/api/status/{task_id}")
|
@app.get("/api/status/{task_id}")
|
||||||
def get_status(task_id: str, user: dict = Depends(require_auth)):
|
def get_status(task_id: str, user: dict = Depends(require_auth)):
|
||||||
r = celery_app.AsyncResult(task_id)
|
r = celery_app.AsyncResult(task_id)
|
||||||
if r.state == "PENDING": return {"state": "pending", "progress": 0, "message": "대기 중..."}
|
if r.state == "PENDING": return {"state":"pending", "progress":0, "message":"대기 중..."}
|
||||||
if r.state == "PROGRESS": m = r.info or {}; return {"state": "progress","progress": m.get("progress",0),"message": m.get("message","처리 중...")}
|
if r.state == "PROGRESS": m=r.info or {}; return {"state":"progress","progress":m.get("progress",0),"message":m.get("message","처리 중...")}
|
||||||
if r.state == "SUCCESS": return {"state": "success", "progress": 100, **r.result}
|
if r.state == "SUCCESS": _update_history(task_id, r.result or {}); return {"state":"success","progress":100,**(r.result or {})}
|
||||||
if r.state == "FAILURE": return {"state": "failure", "progress": 0, "message": str(r.info)}
|
if r.state == "FAILURE": _update_history_fail(task_id, str(r.info)); return {"state":"failure","progress":0,"message":str(r.info)}
|
||||||
return {"state": r.state.lower(), "progress": 0}
|
return {"state":r.state.lower(),"progress":0}
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
|
# 이력
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
|
@app.get("/api/history")
|
||||||
|
def get_history(page: int=1, per_page: int=15, type_: str="", user: dict=Depends(require_auth)):
|
||||||
|
history = _load_history()
|
||||||
|
if user.get("role") != "admin": history = [h for h in history if h.get("username")==user["username"]]
|
||||||
|
if type_ in ("stt","ocr"): history = [h for h in history if h.get("type")==type_]
|
||||||
|
total = len(history); start = (page-1)*per_page
|
||||||
|
return {"total":total,"page":page,"per_page":per_page,"items":history[start:start+per_page]}
|
||||||
|
|
||||||
|
@app.delete("/api/history/{history_id}")
|
||||||
|
def delete_history(history_id: str, user: dict=Depends(require_auth)):
|
||||||
|
if not delete_history_item(history_id): raise HTTPException(404,"이력을 찾을 수 없습니다")
|
||||||
|
return {"ok":True}
|
||||||
|
|
||||||
|
@app.delete("/api/history")
|
||||||
|
def clear_all_history(user: dict=Depends(require_admin)):
|
||||||
|
clear_history(); return {"ok":True}
|
||||||
|
|
||||||
|
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
|
# 다운로드 / Ollama / 설정 / 관리자
|
||||||
|
# ════════════════════════════════════════════════════════════════
|
||||||
@app.get("/api/download/{filename}")
|
@app.get("/api/download/{filename}")
|
||||||
def download(filename: str, user: dict = Depends(require_auth)):
|
def download(filename: str, user: dict=Depends(require_auth)):
|
||||||
if ".." in filename or "/" in filename:
|
if ".." in filename or "/" in filename: raise HTTPException(400,"잘못된 파일명")
|
||||||
raise HTTPException(400, "잘못된 파일명")
|
|
||||||
path = os.path.join(OUTPUT_DIR, filename)
|
path = os.path.join(OUTPUT_DIR, filename)
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path): raise HTTPException(404,"파일을 찾을 수 없습니다")
|
||||||
raise HTTPException(404, "파일을 찾을 수 없습니다")
|
media = ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" if filename.endswith(".xlsx") else "text/plain")
|
||||||
media = ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
||||||
if filename.endswith(".xlsx") else "text/plain")
|
|
||||||
return FileResponse(path, media_type=media, filename=filename)
|
return FileResponse(path, media_type=media, filename=filename)
|
||||||
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
|
||||||
# Ollama 모델 목록
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
|
||||||
@app.get("/api/ollama/models")
|
@app.get("/api/ollama/models")
|
||||||
def ollama_models(user: dict = Depends(require_auth)):
|
def ollama_models(user: dict=Depends(require_auth)):
|
||||||
try:
|
try:
|
||||||
resp = httpx.get(f"{OLLAMA_URL}/api/tags", timeout=8.0)
|
resp = httpx.get(f"{OLLAMA_URL}/api/tags", timeout=8.0); resp.raise_for_status()
|
||||||
resp.raise_for_status()
|
return {"models":[m["name"] for m in resp.json().get("models",[])], "connected":True}
|
||||||
models = [m["name"] for m in resp.json().get("models", [])]
|
except Exception as e: return {"models":[], "connected":False, "error":str(e)}
|
||||||
return {"models": models, "connected": True}
|
|
||||||
except Exception as e:
|
|
||||||
return {"models": [], "connected": False, "error": str(e)}
|
|
||||||
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
|
||||||
# 설정
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
|
||||||
@app.get("/api/settings")
|
@app.get("/api/settings")
|
||||||
def get_settings(user: dict = Depends(require_auth)):
|
def get_settings(user: dict=Depends(require_auth)): return _load_settings()
|
||||||
return _load_settings()
|
|
||||||
|
|
||||||
@app.post("/api/settings")
|
@app.post("/api/settings")
|
||||||
def save_settings_endpoint(
|
def save_settings_endpoint(
|
||||||
stt_ollama_model: str = Form(""),
|
stt_ollama_model: str = Form(""),
|
||||||
ocr_ollama_model: str = Form(""),
|
ocr_ollama_model: str = Form(""),
|
||||||
|
cpu_threads: str = Form("0"),
|
||||||
|
stt_timeout: str = Form("0"),
|
||||||
|
ollama_timeout: str = Form("600"),
|
||||||
user: dict = Depends(require_auth),
|
user: dict = Depends(require_auth),
|
||||||
):
|
):
|
||||||
data = {"stt_ollama_model": stt_ollama_model,
|
def _int(v, default):
|
||||||
"ocr_ollama_model": ocr_ollama_model}
|
try: return max(0, int(v))
|
||||||
|
except: return default
|
||||||
|
data = {
|
||||||
|
"stt_ollama_model": stt_ollama_model,
|
||||||
|
"ocr_ollama_model": ocr_ollama_model,
|
||||||
|
"cpu_threads": _int(cpu_threads, 0),
|
||||||
|
"stt_timeout": _int(stt_timeout, 0),
|
||||||
|
"ollama_timeout": _int(ollama_timeout, 600),
|
||||||
|
}
|
||||||
_save_settings(data)
|
_save_settings(data)
|
||||||
return {"ok": True, "settings": data}
|
return {"ok":True, "settings":data}
|
||||||
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
|
||||||
# 관리자 — 사용자 관리
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
|
||||||
@app.get("/api/admin/users")
|
@app.get("/api/admin/users")
|
||||||
def admin_list_users(user: dict = Depends(require_admin)):
|
def admin_list_users(user: dict=Depends(require_admin)): return {"users":list_users()}
|
||||||
return {"users": list_users()}
|
|
||||||
|
|
||||||
@app.post("/api/admin/users")
|
@app.post("/api/admin/users")
|
||||||
def admin_create_user(
|
def admin_create_user(username:str=Form(...),password:str=Form(...),perm_stt:str=Form("false"),perm_ocr:str=Form("false"),user:dict=Depends(require_admin)):
|
||||||
username: str = Form(...),
|
perms={"stt":perm_stt.lower()=="true","ocr":perm_ocr.lower()=="true"}
|
||||||
password: str = Form(...),
|
ok,msg=create_user(username,password,perms)
|
||||||
perm_stt: str = Form("false"),
|
if not ok: raise HTTPException(400,msg)
|
||||||
perm_ocr: str = Form("false"),
|
return {"ok":True,"message":msg}
|
||||||
user: dict = Depends(require_admin),
|
|
||||||
):
|
|
||||||
perms = {"stt": perm_stt.lower()=="true", "ocr": perm_ocr.lower()=="true"}
|
|
||||||
ok, msg = create_user(username, password, perms)
|
|
||||||
if not ok:
|
|
||||||
raise HTTPException(400, msg)
|
|
||||||
return {"ok": True, "message": msg}
|
|
||||||
|
|
||||||
@app.put("/api/admin/users/{username}")
|
@app.put("/api/admin/users/{username}")
|
||||||
def admin_update_user(
|
def admin_update_user(username:str,perm_stt:str=Form("false"),perm_ocr:str=Form("false"),password:str=Form(""),user:dict=Depends(require_admin)):
|
||||||
username: str,
|
perms={"stt":perm_stt.lower()=="true","ocr":perm_ocr.lower()=="true"}
|
||||||
perm_stt: str = Form("false"),
|
ok,msg=update_user(username,perms,password or None)
|
||||||
perm_ocr: str = Form("false"),
|
if not ok: raise HTTPException(400,msg)
|
||||||
password: str = Form(""),
|
return {"ok":True,"message":msg}
|
||||||
user: dict = Depends(require_admin),
|
|
||||||
):
|
|
||||||
perms = {"stt": perm_stt.lower()=="true", "ocr": perm_ocr.lower()=="true"}
|
|
||||||
ok, msg = update_user(username, perms, password or None)
|
|
||||||
if not ok:
|
|
||||||
raise HTTPException(400, msg)
|
|
||||||
return {"ok": True, "message": msg}
|
|
||||||
|
|
||||||
@app.delete("/api/admin/users/{username}")
|
@app.delete("/api/admin/users/{username}")
|
||||||
def admin_delete_user(username: str, user: dict = Depends(require_admin)):
|
def admin_delete_user(username:str,user:dict=Depends(require_admin)):
|
||||||
ok, msg = delete_user(username)
|
ok,msg=delete_user(username)
|
||||||
if not ok:
|
if not ok: raise HTTPException(400,msg)
|
||||||
raise HTTPException(400, msg)
|
return {"ok":True,"message":msg}
|
||||||
return {"ok": True, "message": msg}
|
|
||||||
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
|
||||||
# 정리
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
|
||||||
@app.post("/api/cleanup")
|
@app.post("/api/cleanup")
|
||||||
def cleanup(user: dict = Depends(require_auth)):
|
def cleanup(user:dict=Depends(require_auth)): return {"removed":_cleanup_outputs()}
|
||||||
return {"removed": _cleanup_outputs()}
|
|
||||||
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
# 유틸
|
# 유틸
|
||||||
# ════════════════════════════════════════════════════════════════
|
# ════════════════════════════════════════════════════════════════
|
||||||
def _check_size(request: Request):
|
def _check_size(request):
|
||||||
cl = request.headers.get("content-length")
|
cl = request.headers.get("content-length")
|
||||||
if cl and int(cl) > MAX_UPLOAD_BYTES:
|
if cl and int(cl) > MAX_UPLOAD_BYTES: raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
|
||||||
raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
|
|
||||||
|
|
||||||
def _cleanup_outputs() -> int:
|
def _cleanup_outputs():
|
||||||
if OUTPUT_KEEP_SECS == 0:
|
if OUTPUT_KEEP_SECS == 0: return 0
|
||||||
return 0
|
cutoff = time.time() - OUTPUT_KEEP_SECS; removed = 0
|
||||||
cutoff = time.time() - OUTPUT_KEEP_SECS
|
for f in glob.glob(os.path.join(OUTPUT_DIR,"*")):
|
||||||
removed = 0
|
|
||||||
for f in glob.glob(os.path.join(OUTPUT_DIR, "*")):
|
|
||||||
try:
|
try:
|
||||||
if os.path.getmtime(f) < cutoff:
|
if os.path.getmtime(f) < cutoff: os.remove(f); removed += 1
|
||||||
os.remove(f); removed += 1
|
|
||||||
except: pass
|
except: pass
|
||||||
return removed
|
return removed
|
||||||
|
|
||||||
def _ext(fn): return fn.rsplit(".", 1)[-1].lower() if "." in fn else ""
|
def _ext(fn): return fn.rsplit(".",1)[-1].lower() if "." in fn else ""
|
||||||
|
|
||||||
async def _save(file: UploadFile, path: str):
|
async def _save(file, path):
|
||||||
written = 0
|
written = 0
|
||||||
async with aiofiles.open(path, "wb") as f:
|
async with aiofiles.open(path,"wb") as f:
|
||||||
while chunk := await file.read(1024 * 1024):
|
while chunk := await file.read(1024*1024):
|
||||||
written += len(chunk)
|
written += len(chunk)
|
||||||
if written > MAX_UPLOAD_BYTES:
|
if written > MAX_UPLOAD_BYTES:
|
||||||
await f.close(); os.remove(path)
|
await f.close(); os.remove(path)
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
OCR Celery Tasks
|
OCR Celery Tasks — PaddleOCR 3.x + Ollama Vision
|
||||||
- PaddleOCR 3.x 호환 (use_gpu/show_log/cls 파라미터 제거, 결과구조 변경 반영)
|
|
||||||
- backend="paddle" → PaddleOCR 로컬 실행
|
|
||||||
- backend="ollama" → Ollama Vision API 호출
|
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
import base64
|
import base64
|
||||||
@@ -16,7 +13,7 @@ REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
|
|||||||
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs")
|
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs")
|
||||||
OCR_LANG = os.getenv("OCR_LANG", "korean")
|
OCR_LANG = os.getenv("OCR_LANG", "korean")
|
||||||
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434")
|
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434")
|
||||||
OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "180"))
|
OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "600"))
|
||||||
|
|
||||||
celery_app = Celery("ocr_tasks", broker=REDIS_URL, backend=REDIS_URL)
|
celery_app = Celery("ocr_tasks", broker=REDIS_URL, backend=REDIS_URL)
|
||||||
celery_app.conf.update(
|
celery_app.conf.update(
|
||||||
@@ -27,7 +24,6 @@ celery_app.conf.update(
|
|||||||
result_expires=3600,
|
result_expires=3600,
|
||||||
)
|
)
|
||||||
|
|
||||||
# PaddleOCR 싱글톤
|
|
||||||
_ocr_engine = None
|
_ocr_engine = None
|
||||||
_struct_engine = None
|
_struct_engine = None
|
||||||
|
|
||||||
@@ -36,7 +32,6 @@ def get_ocr():
|
|||||||
if _ocr_engine is None:
|
if _ocr_engine is None:
|
||||||
from paddleocr import PaddleOCR
|
from paddleocr import PaddleOCR
|
||||||
print(f"[PaddleOCR] 로딩 (lang={OCR_LANG})")
|
print(f"[PaddleOCR] 로딩 (lang={OCR_LANG})")
|
||||||
# PaddleOCR 3.x: use_gpu/show_log 파라미터 제거됨
|
|
||||||
_ocr_engine = PaddleOCR(use_angle_cls=True, lang=OCR_LANG)
|
_ocr_engine = PaddleOCR(use_angle_cls=True, lang=OCR_LANG)
|
||||||
print("[PaddleOCR] 완료")
|
print("[PaddleOCR] 완료")
|
||||||
return _ocr_engine
|
return _ocr_engine
|
||||||
@@ -51,9 +46,6 @@ def get_structure():
|
|||||||
return _struct_engine
|
return _struct_engine
|
||||||
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
|
||||||
# 메인 Task
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
|
||||||
@celery_app.task(bind=True, name="tasks.ocr_task", queue="ocr")
|
@celery_app.task(bind=True, name="tasks.ocr_task", queue="ocr")
|
||||||
def ocr_task(self, file_id, image_path, mode="text",
|
def ocr_task(self, file_id, image_path, mode="text",
|
||||||
backend="paddle", ollama_model="granite3.2-vision", custom_prompt=""):
|
backend="paddle", ollama_model="granite3.2-vision", custom_prompt=""):
|
||||||
@@ -72,9 +64,6 @@ def ocr_task(self, file_id, image_path, mode="text",
|
|||||||
raise Exception(f"OCR 실패: {str(e)}")
|
raise Exception(f"OCR 실패: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
|
||||||
# Ollama 백엔드
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
|
||||||
_OLLAMA_PROMPTS = {
|
_OLLAMA_PROMPTS = {
|
||||||
"text": "이 이미지에서 모든 텍스트를 정확하게 추출해줘. 원본의 줄 구분과 단락 구조를 유지해줘.",
|
"text": "이 이미지에서 모든 텍스트를 정확하게 추출해줘. 원본의 줄 구분과 단락 구조를 유지해줘.",
|
||||||
"structure": "이 이미지를 분석해서 표는 마크다운 표 형식으로, 나머지 텍스트는 원본 구조를 유지하며 추출해줘.",
|
"structure": "이 이미지를 분석해서 표는 마크다운 표 형식으로, 나머지 텍스트는 원본 구조를 유지하며 추출해줘.",
|
||||||
@@ -91,8 +80,7 @@ def _run_ollama(task, file_id, image_path, mode, ollama_model, custom_prompt):
|
|||||||
resp = httpx.post(f"{OLLAMA_URL}/api/chat", json={
|
resp = httpx.post(f"{OLLAMA_URL}/api/chat", json={
|
||||||
"model": ollama_model,
|
"model": ollama_model,
|
||||||
"messages": [{"role": "user", "content": prompt, "images": [img_b64]}],
|
"messages": [{"role": "user", "content": prompt, "images": [img_b64]}],
|
||||||
"stream": False,
|
"stream": False, "options": {"temperature": 0.1},
|
||||||
"options": {"temperature": 0.1},
|
|
||||||
}, timeout=float(OLLAMA_TIMEOUT))
|
}, timeout=float(OLLAMA_TIMEOUT))
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
except httpx.ConnectError:
|
except httpx.ConnectError:
|
||||||
@@ -121,16 +109,12 @@ def _run_ollama(task, file_id, image_path, mode, ollama_model, custom_prompt):
|
|||||||
"mode": mode, "backend": "ollama", "ollama_model": ollama_model,
|
"mode": mode, "backend": "ollama", "ollama_model": ollama_model,
|
||||||
"full_text": full_text, "lines": lines, "line_count": len(lines),
|
"full_text": full_text, "lines": lines, "line_count": len(lines),
|
||||||
"txt_file": txt_file,
|
"txt_file": txt_file,
|
||||||
"tables": [{"html": h, "rows": len(t),
|
"tables": [{"html": h, "rows": len(t), "cols": max(len(r) for r in t) if t else 0}
|
||||||
"cols": max(len(r) for r in t) if t else 0}
|
|
||||||
for h, t in zip(tables_html, tables)],
|
for h, t in zip(tables_html, tables)],
|
||||||
"xlsx_file": xlsx_file,
|
"xlsx_file": xlsx_file,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
|
||||||
# PaddleOCR 백엔드
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
|
||||||
def _run_paddle(task, file_id, image_path, mode):
|
def _run_paddle(task, file_id, image_path, mode):
|
||||||
import cv2
|
import cv2
|
||||||
img = cv2.imread(image_path)
|
img = cv2.imread(image_path)
|
||||||
@@ -140,50 +124,38 @@ def _run_paddle(task, file_id, image_path, mode):
|
|||||||
return _paddle_structure(task, file_id, img) if mode == "structure" \
|
return _paddle_structure(task, file_id, img) if mode == "structure" \
|
||||||
else _paddle_text(task, file_id, img)
|
else _paddle_text(task, file_id, img)
|
||||||
|
|
||||||
|
|
||||||
def _paddle_text(task, file_id, img):
|
def _paddle_text(task, file_id, img):
|
||||||
task.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 인식 중..."})
|
task.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 인식 중..."})
|
||||||
# PaddleOCR 3.x: cls 파라미터 제거, 결과 구조 변경
|
|
||||||
result = get_ocr().ocr(img)
|
result = get_ocr().ocr(img)
|
||||||
task.update_state(state="PROGRESS", meta={"progress": 80, "message": "결과 정리 중..."})
|
task.update_state(state="PROGRESS", meta={"progress": 80, "message": "결과 정리 중..."})
|
||||||
|
|
||||||
lines = []
|
lines = []
|
||||||
if result and len(result) > 0:
|
if result and len(result) > 0:
|
||||||
r = result[0]
|
r = result[0]
|
||||||
# PaddleOCR 3.x 결과 구조: dict with rec_texts, rec_scores
|
|
||||||
if isinstance(r, dict):
|
if isinstance(r, dict):
|
||||||
texts = r.get("rec_texts", [])
|
texts = r.get("rec_texts", [])
|
||||||
scores = r.get("rec_scores", [])
|
scores = r.get("rec_scores", [])
|
||||||
for text, conf in zip(texts, scores):
|
for text, conf in zip(texts, scores):
|
||||||
if text.strip():
|
if text.strip():
|
||||||
lines.append({"text": text,
|
lines.append({"text": text, "confidence": round(float(conf), 3), "bbox": []})
|
||||||
"confidence": round(float(conf), 3),
|
|
||||||
"bbox": []})
|
|
||||||
# 구버전 호환 (list of [bbox, (text, conf)])
|
|
||||||
elif isinstance(r, list):
|
elif isinstance(r, list):
|
||||||
for item in r:
|
for item in r:
|
||||||
if item and len(item) == 2:
|
if item and len(item) == 2:
|
||||||
_, (text, conf) = item
|
_, (text, conf) = item
|
||||||
if text.strip():
|
if text.strip():
|
||||||
lines.append({"text": text,
|
lines.append({"text": text, "confidence": round(float(conf), 3), "bbox": []})
|
||||||
"confidence": round(float(conf), 3),
|
|
||||||
"bbox": []})
|
|
||||||
|
|
||||||
full_text = "\n".join(l["text"] for l in lines)
|
full_text = "\n".join(l["text"] for l in lines)
|
||||||
txt_file = f"{file_id}_ocr.txt"
|
txt_file = f"{file_id}_ocr.txt"
|
||||||
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
|
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
|
||||||
f.write(full_text)
|
f.write(full_text)
|
||||||
return {"mode": "text", "backend": "paddle",
|
return {"mode": "text", "backend": "paddle", "ollama_model": "",
|
||||||
"full_text": full_text, "lines": lines,
|
"full_text": full_text, "lines": lines,
|
||||||
"line_count": len(lines), "txt_file": txt_file,
|
"line_count": len(lines), "txt_file": txt_file,
|
||||||
"tables": [], "xlsx_file": None}
|
"tables": [], "xlsx_file": None}
|
||||||
|
|
||||||
|
|
||||||
def _paddle_structure(task, file_id, img):
|
def _paddle_structure(task, file_id, img):
|
||||||
task.update_state(state="PROGRESS", meta={"progress": 20, "message": "레이아웃 분석 중..."})
|
task.update_state(state="PROGRESS", meta={"progress": 20, "message": "레이아웃 분석 중..."})
|
||||||
result = get_structure()(img)
|
result = get_structure()(img)
|
||||||
task.update_state(state="PROGRESS", meta={"progress": 60, "message": "표 구조 추출 중..."})
|
task.update_state(state="PROGRESS", meta={"progress": 60, "message": "표 구조 추출 중..."})
|
||||||
|
|
||||||
text_blocks, tables_html, tables_data = [], [], []
|
text_blocks, tables_html, tables_data = [], [], []
|
||||||
for region in result:
|
for region in result:
|
||||||
rtype = region.get("type", "").lower()
|
rtype = region.get("type", "").lower()
|
||||||
@@ -197,32 +169,24 @@ def _paddle_structure(task, file_id, img):
|
|||||||
if isinstance(line, (list, tuple)) and len(line) == 2:
|
if isinstance(line, (list, tuple)) and len(line) == 2:
|
||||||
_, (text, _conf) = line
|
_, (text, _conf) = line
|
||||||
text_blocks.append(text)
|
text_blocks.append(text)
|
||||||
|
|
||||||
full_text = "\n".join(text_blocks)
|
full_text = "\n".join(text_blocks)
|
||||||
task.update_state(state="PROGRESS", meta={"progress": 80, "message": "Excel 생성 중..."})
|
task.update_state(state="PROGRESS", meta={"progress": 80, "message": "Excel 생성 중..."})
|
||||||
|
|
||||||
xlsx_file = None
|
xlsx_file = None
|
||||||
if tables_data:
|
if tables_data:
|
||||||
xlsx_file = f"{file_id}_tables.xlsx"
|
xlsx_file = f"{file_id}_tables.xlsx"
|
||||||
_save_excel(tables_data, os.path.join(OUTPUT_DIR, xlsx_file))
|
_save_excel(tables_data, os.path.join(OUTPUT_DIR, xlsx_file))
|
||||||
|
|
||||||
txt_file = f"{file_id}_ocr.txt"
|
txt_file = f"{file_id}_ocr.txt"
|
||||||
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
|
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
|
||||||
f.write("# 텍스트\n\n" + full_text)
|
f.write("# 텍스트\n\n" + full_text)
|
||||||
|
|
||||||
lines = [{"text": t, "confidence": 1.0, "bbox": []} for t in text_blocks]
|
lines = [{"text": t, "confidence": 1.0, "bbox": []} for t in text_blocks]
|
||||||
tables_meta = [{"html": h, "rows": len(d),
|
tables_meta = [{"html": h, "rows": len(d), "cols": max(len(r) for r in d) if d else 0}
|
||||||
"cols": max(len(r) for r in d) if d else 0}
|
|
||||||
for h, d in zip(tables_html, tables_data)]
|
for h, d in zip(tables_html, tables_data)]
|
||||||
return {"mode": "structure", "backend": "paddle",
|
return {"mode": "structure", "backend": "paddle", "ollama_model": "",
|
||||||
"full_text": full_text, "lines": lines,
|
"full_text": full_text, "lines": lines,
|
||||||
"line_count": len(lines), "txt_file": txt_file,
|
"line_count": len(lines), "txt_file": txt_file,
|
||||||
"tables": tables_meta, "xlsx_file": xlsx_file}
|
"tables": tables_meta, "xlsx_file": xlsx_file}
|
||||||
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
|
||||||
# 공통 유틸
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
|
||||||
def _parse_md_tables(text):
|
def _parse_md_tables(text):
|
||||||
tables, current = [], []
|
tables, current = [], []
|
||||||
for line in text.splitlines():
|
for line in text.splitlines():
|
||||||
@@ -241,8 +205,7 @@ def _md_table_to_html(table):
|
|||||||
rows = ""
|
rows = ""
|
||||||
for i, row in enumerate(table):
|
for i, row in enumerate(table):
|
||||||
tag = "th" if i == 0 else "td"
|
tag = "th" if i == 0 else "td"
|
||||||
cells = "".join(f"<{tag}>{c}</{tag}>" for c in row)
|
rows += "<tr>" + "".join(f"<{tag}>{c}</{tag}>" for c in row) + "</tr>"
|
||||||
rows += f"<tr>{cells}</tr>"
|
|
||||||
return f"<table>{rows}</table>"
|
return f"<table>{rows}</table>"
|
||||||
|
|
||||||
def _html_table_to_list(html):
|
def _html_table_to_list(html):
|
||||||
@@ -255,8 +218,7 @@ def _html_table_to_list(html):
|
|||||||
if tag == "tr": self._row = []
|
if tag == "tr": self._row = []
|
||||||
elif tag in ("td","th"): self._cell = []; self._in = True
|
elif tag in ("td","th"): self._cell = []; self._in = True
|
||||||
def handle_endtag(self, tag):
|
def handle_endtag(self, tag):
|
||||||
if tag in ("td","th"):
|
if tag in ("td","th"): self._row.append("".join(self._cell).strip()); self._in = False
|
||||||
self._row.append("".join(self._cell).strip()); self._in = False
|
|
||||||
elif tag == "tr":
|
elif tag == "tr":
|
||||||
if self._row: self.rows.append(self._row)
|
if self._row: self.rows.append(self._row)
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
@@ -264,8 +226,7 @@ def _html_table_to_list(html):
|
|||||||
p = P(); p.feed(html); return p.rows
|
p = P(); p.feed(html); return p.rows
|
||||||
|
|
||||||
def _save_excel(tables, path):
|
def _save_excel(tables, path):
|
||||||
wb = openpyxl.Workbook()
|
wb = openpyxl.Workbook(); wb.remove(wb.active)
|
||||||
wb.remove(wb.active)
|
|
||||||
for i, table in enumerate(tables, 1):
|
for i, table in enumerate(tables, 1):
|
||||||
ws = wb.create_sheet(f"표 {i}")
|
ws = wb.create_sheet(f"표 {i}")
|
||||||
thin = Side(style="thin", color="2A2A33")
|
thin = Side(style="thin", color="2A2A33")
|
||||||
@@ -274,8 +235,7 @@ def _save_excel(tables, path):
|
|||||||
for c_idx, val in enumerate(row, 1):
|
for c_idx, val in enumerate(row, 1):
|
||||||
cell = ws.cell(row=r_idx, column=c_idx, value=val)
|
cell = ws.cell(row=r_idx, column=c_idx, value=val)
|
||||||
cell.border = bdr
|
cell.border = bdr
|
||||||
cell.alignment = Alignment(horizontal="center",
|
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
|
||||||
vertical="center", wrap_text=True)
|
|
||||||
if r_idx == 1:
|
if r_idx == 1:
|
||||||
cell.fill = PatternFill("solid", fgColor="1A1A2E")
|
cell.fill = PatternFill("solid", fgColor="1A1A2E")
|
||||||
cell.font = Font(color="00E5A0", bold=True, size=10)
|
cell.font = Font(color="00E5A0", bold=True, size=10)
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ redis==5.0.8
|
|||||||
faster-whisper==1.0.3
|
faster-whisper==1.0.3
|
||||||
aiofiles==23.2.1
|
aiofiles==23.2.1
|
||||||
|
|
||||||
# 인증 (bcrypt 제거 — 직접 비교 방식 사용)
|
# 인증
|
||||||
python-jose[cryptography]==3.3.0
|
python-jose[cryptography]==3.3.0
|
||||||
|
|
||||||
# PaddleOCR 3.x
|
# PaddleOCR 3.x
|
||||||
@@ -19,3 +19,6 @@ httpx>=0.27.0
|
|||||||
# Excel 출력
|
# Excel 출력
|
||||||
openpyxl==3.1.2
|
openpyxl==3.1.2
|
||||||
Pillow>=10.0.0
|
Pillow>=10.0.0
|
||||||
|
|
||||||
|
# 시스템 모니터링
|
||||||
|
psutil>=5.9.0
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
42
app/tasks.py
42
app/tasks.py
@@ -1,7 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
import httpx
|
import httpx
|
||||||
from celery import Celery
|
from celery import Celery
|
||||||
from ocr_tasks import ocr_task # noqa: F401 — worker에 등록
|
from ocr_tasks import ocr_task # noqa: F401
|
||||||
|
|
||||||
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
|
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
|
||||||
MODEL_SIZE = os.getenv("WHISPER_MODEL", "medium")
|
MODEL_SIZE = os.getenv("WHISPER_MODEL", "medium")
|
||||||
@@ -12,7 +12,10 @@ BEAM_SIZE = int(os.getenv("WHISPER_BEAM_SIZE", "5"))
|
|||||||
INITIAL_PROMPT = os.getenv("WHISPER_INITIAL_PROMPT", "") or None
|
INITIAL_PROMPT = os.getenv("WHISPER_INITIAL_PROMPT", "") or None
|
||||||
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs")
|
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs")
|
||||||
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434")
|
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434")
|
||||||
OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "180"))
|
OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "600"))
|
||||||
|
|
||||||
|
_cpu_threads_env = int(os.getenv("CPU_THREADS", "0"))
|
||||||
|
CPU_THREADS = _cpu_threads_env if _cpu_threads_env > 0 else None # None = auto
|
||||||
|
|
||||||
celery_app = Celery("whisper_tasks", broker=REDIS_URL, backend=REDIS_URL)
|
celery_app = Celery("whisper_tasks", broker=REDIS_URL, backend=REDIS_URL)
|
||||||
celery_app.conf.update(
|
celery_app.conf.update(
|
||||||
@@ -29,15 +32,16 @@ def get_model():
|
|||||||
global _model
|
global _model
|
||||||
if _model is None:
|
if _model is None:
|
||||||
from faster_whisper import WhisperModel
|
from faster_whisper import WhisperModel
|
||||||
print(f"[Whisper] 로딩: {MODEL_SIZE} / {DEVICE} / {COMPUTE_TYPE}")
|
kwargs = dict(device=DEVICE, compute_type=COMPUTE_TYPE)
|
||||||
_model = WhisperModel(MODEL_SIZE, device=DEVICE, compute_type=COMPUTE_TYPE)
|
if CPU_THREADS is not None:
|
||||||
|
kwargs["cpu_threads"] = CPU_THREADS
|
||||||
|
print(f"[Whisper] 로딩: {MODEL_SIZE} / {DEVICE} / {COMPUTE_TYPE} / threads={CPU_THREADS or 'auto'}")
|
||||||
|
_model = WhisperModel(MODEL_SIZE, **kwargs)
|
||||||
print("[Whisper] 로드 완료")
|
print("[Whisper] 로드 완료")
|
||||||
return _model
|
return _model
|
||||||
|
|
||||||
|
|
||||||
# ── Ollama 후처리 ─────────────────────────────────────────────
|
|
||||||
def _ollama_postprocess(text: str, model: str) -> str:
|
def _ollama_postprocess(text: str, model: str) -> str:
|
||||||
"""Whisper 결과를 Ollama로 후처리 (문장부호·맞춤법·자연스러운 문장)"""
|
|
||||||
if not model or not text.strip():
|
if not model or not text.strip():
|
||||||
return text
|
return text
|
||||||
prompt = (
|
prompt = (
|
||||||
@@ -49,33 +53,22 @@ def _ollama_postprocess(text: str, model: str) -> str:
|
|||||||
try:
|
try:
|
||||||
resp = httpx.post(
|
resp = httpx.post(
|
||||||
f"{OLLAMA_URL}/api/chat",
|
f"{OLLAMA_URL}/api/chat",
|
||||||
json={
|
json={"model": model,
|
||||||
"model": model,
|
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
"stream": False,
|
"stream": False, "options": {"temperature": 0.1}},
|
||||||
"options": {"temperature": 0.1},
|
|
||||||
},
|
|
||||||
timeout=float(OLLAMA_TIMEOUT),
|
timeout=float(OLLAMA_TIMEOUT),
|
||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
result = resp.json().get("message", {}).get("content", "").strip()
|
result = resp.json().get("message", {}).get("content", "").strip()
|
||||||
return result if result else text
|
return result if result else text
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[Ollama 후처리 실패] {e} — 원본 텍스트 사용")
|
print(f"[Ollama 후처리 실패] {e}")
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
|
||||||
# STT Celery Task
|
|
||||||
# ════════════════════════════════════════════════════════════════
|
|
||||||
@celery_app.task(bind=True, name="tasks.transcribe_task", queue="stt")
|
@celery_app.task(bind=True, name="tasks.transcribe_task", queue="stt")
|
||||||
def transcribe_task(
|
def transcribe_task(self, file_id: str, audio_path: str,
|
||||||
self,
|
use_ollama: bool = False, ollama_model: str = ""):
|
||||||
file_id: str,
|
|
||||||
audio_path: str,
|
|
||||||
use_ollama: bool = False,
|
|
||||||
ollama_model: str = "",
|
|
||||||
):
|
|
||||||
self.update_state(state="PROGRESS", meta={"progress": 5, "message": "모델 준비 중..."})
|
self.update_state(state="PROGRESS", meta={"progress": 5, "message": "모델 준비 중..."})
|
||||||
try:
|
try:
|
||||||
model = get_model()
|
model = get_model()
|
||||||
@@ -97,8 +90,8 @@ def transcribe_task(
|
|||||||
duration = info.duration
|
duration = info.duration
|
||||||
|
|
||||||
for seg in segments_gen:
|
for seg in segments_gen:
|
||||||
segments.append({"start": round(seg.start,2),
|
segments.append({"start": round(seg.start, 2),
|
||||||
"end": round(seg.end,2),
|
"end": round(seg.end, 2),
|
||||||
"text": seg.text.strip()})
|
"text": seg.text.strip()})
|
||||||
parts.append(seg.text.strip())
|
parts.append(seg.text.strip())
|
||||||
if duration > 0:
|
if duration > 0:
|
||||||
@@ -112,7 +105,6 @@ def transcribe_task(
|
|||||||
raw_text = "\n".join(parts)
|
raw_text = "\n".join(parts)
|
||||||
full_text = raw_text
|
full_text = raw_text
|
||||||
|
|
||||||
# Ollama 후처리
|
|
||||||
if use_ollama and ollama_model:
|
if use_ollama and ollama_model:
|
||||||
self.update_state(state="PROGRESS",
|
self.update_state(state="PROGRESS",
|
||||||
meta={"progress": 85,
|
meta={"progress": 85,
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ services:
|
|||||||
image: redis:7-alpine
|
image: redis:7-alpine
|
||||||
container_name: whisper_redis
|
container_name: whisper_redis
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
# RDB 스냅샷 저장 실패 시에도 쓰기 허용 (Celery 브로커 용도)
|
|
||||||
command: redis-server --stop-writes-on-bgsave-error no
|
command: redis-server --stop-writes-on-bgsave-error no
|
||||||
environment:
|
environment:
|
||||||
- TZ=Asia/Seoul
|
- TZ=Asia/Seoul
|
||||||
@@ -19,13 +18,13 @@ services:
|
|||||||
container_name: whisper_app
|
container_name: whisper_app
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
ports:
|
ports:
|
||||||
- "8800:8000" # 호스트 Nginx가 리버스 프록시
|
- "8800:8000"
|
||||||
environment:
|
environment:
|
||||||
- TZ=Asia/Seoul
|
- TZ=Asia/Seoul
|
||||||
|
|
||||||
# ── 인증 (반드시 변경) ──────────────────────────────
|
# ── 인증 (반드시 변경) ──────────────────────────────
|
||||||
- AUTH_USERNAME=byun
|
- AUTH_USERNAME=admin
|
||||||
- AUTH_PASSWORD=admin
|
- AUTH_PASSWORD=changeme1234
|
||||||
- JWT_SECRET=your-very-secret-key-change-this
|
- JWT_SECRET=your-very-secret-key-change-this
|
||||||
- JWT_EXPIRE_HOURS=12
|
- JWT_EXPIRE_HOURS=12
|
||||||
|
|
||||||
@@ -33,24 +32,31 @@ services:
|
|||||||
- REDIS_URL=redis://redis:6379/0
|
- REDIS_URL=redis://redis:6379/0
|
||||||
- UPLOAD_DIR=/data/uploads
|
- UPLOAD_DIR=/data/uploads
|
||||||
- OUTPUT_DIR=/data/outputs
|
- OUTPUT_DIR=/data/outputs
|
||||||
- WHISPER_MODEL=medium # tiny/base/small/medium/large-v3
|
- WHISPER_MODEL=medium
|
||||||
- WHISPER_DEVICE=cpu
|
- WHISPER_DEVICE=cpu
|
||||||
- WHISPER_COMPUTE_TYPE=int8
|
- WHISPER_COMPUTE_TYPE=int8
|
||||||
- WHISPER_LANGUAGE=ko
|
- WHISPER_LANGUAGE=ko
|
||||||
- WHISPER_BEAM_SIZE=5
|
- WHISPER_BEAM_SIZE=5
|
||||||
- WHISPER_INITIAL_PROMPT= # 예: "고객 상담 녹취록입니다."
|
- WHISPER_INITIAL_PROMPT=
|
||||||
|
|
||||||
|
# ── 타임아웃 ─────────────────────────────────────────
|
||||||
|
# STT: Celery 태스크 소프트 타임아웃 (초) — 0=무제한
|
||||||
|
- STT_TIMEOUT=0
|
||||||
|
# Ollama: Vision/후처리 API 응답 대기 (초)
|
||||||
|
- OLLAMA_TIMEOUT=600
|
||||||
|
|
||||||
# ── 파일 관리 ────────────────────────────────────────
|
# ── 파일 관리 ────────────────────────────────────────
|
||||||
- MAX_UPLOAD_MB=500
|
- MAX_UPLOAD_MB=500
|
||||||
- OUTPUT_KEEP_HOURS=48
|
- OUTPUT_KEEP_HOURS=48
|
||||||
|
|
||||||
# ── PaddleOCR ────────────────────────────────────────
|
# ── PaddleOCR ────────────────────────────────────────
|
||||||
- OCR_LANG=korean # korean/en/japan/chinese_cht/ch
|
- OCR_LANG=korean
|
||||||
|
|
||||||
# ── Ollama OCR ───────────────────────────────────────
|
# ── Ollama ───────────────────────────────────────────
|
||||||
# 호스트 실제 LAN IP 사용 (host.docker.internal은 Linux에서 불안정)
|
|
||||||
- OLLAMA_URL=http://192.168.0.126:11434
|
- OLLAMA_URL=http://192.168.0.126:11434
|
||||||
- OLLAMA_TIMEOUT=600 # 11b 이상 모델은 300 이상 권장
|
|
||||||
|
# ── CPU 스레드 ───────────────────────────────────────
|
||||||
|
- CPU_THREADS=0
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
- stt_data:/data
|
- stt_data:/data
|
||||||
@@ -67,9 +73,6 @@ services:
|
|||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
container_name: whisper_worker
|
container_name: whisper_worker
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
# --pool=solo : CTranslate2(faster-whisper)가 prefork 방식과 충돌(SIGSEGV) 발생
|
|
||||||
# solo 모드로 포크 없이 실행하여 해결
|
|
||||||
# --max-tasks-per-child=50 : Whisper/Paddle 모델 메모리 누수 방지
|
|
||||||
command: >
|
command: >
|
||||||
celery -A tasks worker
|
celery -A tasks worker
|
||||||
--loglevel=info
|
--loglevel=info
|
||||||
@@ -87,11 +90,13 @@ services:
|
|||||||
- WHISPER_LANGUAGE=ko
|
- WHISPER_LANGUAGE=ko
|
||||||
- WHISPER_BEAM_SIZE=5
|
- WHISPER_BEAM_SIZE=5
|
||||||
- WHISPER_INITIAL_PROMPT=
|
- WHISPER_INITIAL_PROMPT=
|
||||||
|
- STT_TIMEOUT=0
|
||||||
|
- OLLAMA_TIMEOUT=600
|
||||||
- MAX_UPLOAD_MB=500
|
- MAX_UPLOAD_MB=500
|
||||||
- OUTPUT_KEEP_HOURS=48
|
- OUTPUT_KEEP_HOURS=48
|
||||||
- OCR_LANG=korean
|
- OCR_LANG=korean
|
||||||
- OLLAMA_URL=http://192.168.0.126:11434
|
- OLLAMA_URL=http://192.168.0.126:11434
|
||||||
- OLLAMA_TIMEOUT=600
|
- CPU_THREADS=0
|
||||||
- JWT_SECRET=your-very-secret-key-change-this
|
- JWT_SECRET=your-very-secret-key-change-this
|
||||||
volumes:
|
volumes:
|
||||||
- stt_data:/data
|
- stt_data:/data
|
||||||
|
|||||||
114
docker-compose.yml.bak1
Normal file
114
docker-compose.yml.bak1
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
services:
|
||||||
|
redis:
|
||||||
|
image: redis:7-alpine
|
||||||
|
container_name: whisper_redis
|
||||||
|
restart: unless-stopped
|
||||||
|
command: redis-server --stop-writes-on-bgsave-error no
|
||||||
|
environment:
|
||||||
|
- TZ=Asia/Seoul
|
||||||
|
volumes:
|
||||||
|
- redis_data:/data
|
||||||
|
networks:
|
||||||
|
- whisper_net
|
||||||
|
|
||||||
|
app:
|
||||||
|
build:
|
||||||
|
context: ./app
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: whisper_app
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "8800:8000"
|
||||||
|
environment:
|
||||||
|
- TZ=Asia/Seoul
|
||||||
|
|
||||||
|
# ── 인증 (반드시 변경) ──────────────────────────────
|
||||||
|
- AUTH_USERNAME=admin
|
||||||
|
- AUTH_PASSWORD=changeme1234
|
||||||
|
- JWT_SECRET=your-very-secret-key-change-this
|
||||||
|
- JWT_EXPIRE_HOURS=12
|
||||||
|
|
||||||
|
# ── Whisper STT ─────────────────────────────────────
|
||||||
|
- REDIS_URL=redis://redis:6379/0
|
||||||
|
- UPLOAD_DIR=/data/uploads
|
||||||
|
- OUTPUT_DIR=/data/outputs
|
||||||
|
- WHISPER_MODEL=medium
|
||||||
|
- WHISPER_DEVICE=cpu
|
||||||
|
- WHISPER_COMPUTE_TYPE=int8
|
||||||
|
- WHISPER_LANGUAGE=ko
|
||||||
|
- WHISPER_BEAM_SIZE=5
|
||||||
|
- WHISPER_INITIAL_PROMPT=
|
||||||
|
|
||||||
|
# ── CPU 스레드 설정 ──────────────────────────────────
|
||||||
|
# 5825u: 8코어 16스레드 → 8~12 권장
|
||||||
|
# 0 = 자동(시스템 전체 코어 사용)
|
||||||
|
- CPU_THREADS=0
|
||||||
|
|
||||||
|
# ── 파일 관리 ────────────────────────────────────────
|
||||||
|
- MAX_UPLOAD_MB=500
|
||||||
|
- OUTPUT_KEEP_HOURS=48
|
||||||
|
|
||||||
|
# ── PaddleOCR ────────────────────────────────────────
|
||||||
|
- OCR_LANG=korean
|
||||||
|
|
||||||
|
# ── Ollama ───────────────────────────────────────────
|
||||||
|
- OLLAMA_URL=http://192.168.0.126:11434
|
||||||
|
- OLLAMA_TIMEOUT=600
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
- stt_data:/data
|
||||||
|
- whisper_models:/root/.cache/huggingface
|
||||||
|
- paddle_models:/root/.paddlex
|
||||||
|
depends_on:
|
||||||
|
- redis
|
||||||
|
networks:
|
||||||
|
- whisper_net
|
||||||
|
|
||||||
|
worker:
|
||||||
|
build:
|
||||||
|
context: ./app
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: whisper_worker
|
||||||
|
restart: unless-stopped
|
||||||
|
command: >
|
||||||
|
celery -A tasks worker
|
||||||
|
--loglevel=info
|
||||||
|
--pool=solo
|
||||||
|
--max-tasks-per-child=50
|
||||||
|
-Q stt,ocr
|
||||||
|
environment:
|
||||||
|
- TZ=Asia/Seoul
|
||||||
|
- REDIS_URL=redis://redis:6379/0
|
||||||
|
- UPLOAD_DIR=/data/uploads
|
||||||
|
- OUTPUT_DIR=/data/outputs
|
||||||
|
- WHISPER_MODEL=medium
|
||||||
|
- WHISPER_DEVICE=cpu
|
||||||
|
- WHISPER_COMPUTE_TYPE=int8
|
||||||
|
- WHISPER_LANGUAGE=ko
|
||||||
|
- WHISPER_BEAM_SIZE=5
|
||||||
|
- WHISPER_INITIAL_PROMPT=
|
||||||
|
- CPU_THREADS=0
|
||||||
|
- MAX_UPLOAD_MB=500
|
||||||
|
- OUTPUT_KEEP_HOURS=48
|
||||||
|
- OCR_LANG=korean
|
||||||
|
- OLLAMA_URL=http://192.168.0.126:11434
|
||||||
|
- OLLAMA_TIMEOUT=600
|
||||||
|
- JWT_SECRET=your-very-secret-key-change-this
|
||||||
|
volumes:
|
||||||
|
- stt_data:/data
|
||||||
|
- whisper_models:/root/.cache/huggingface
|
||||||
|
- paddle_models:/root/.paddlex
|
||||||
|
depends_on:
|
||||||
|
- redis
|
||||||
|
networks:
|
||||||
|
- whisper_net
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
redis_data:
|
||||||
|
stt_data:
|
||||||
|
whisper_models:
|
||||||
|
paddle_models:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
whisper_net:
|
||||||
|
driver: bridge
|
||||||
BIN
whisper-stt.tar.gz
Executable file
BIN
whisper-stt.tar.gz
Executable file
Binary file not shown.
Reference in New Issue
Block a user