fix: --pool=solo SIGSEGV 해결 및 전체 설정 정리

This commit is contained in:
root
2026-04-20 20:39:24 +09:00
commit 248ac1deea
13 changed files with 2979 additions and 0 deletions

34
app/Dockerfile Normal file
View File

@@ -0,0 +1,34 @@
FROM python:3.11-slim
RUN apt-get update && apt-get install -y \
ffmpeg \
libsndfile1 \
libgomp1 \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender1 \
libgl1 \
libgles2 \
libegl1 \
wget \
curl \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY requirements.txt .
# PaddlePaddle CPU — PyPI 공식 서버
RUN pip install --no-cache-dir paddlepaddle==3.0.0
# 나머지 패키지
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
RUN mkdir -p /data/uploads /data/outputs
EXPOSE 8000
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

146
app/auth.py Normal file
View File

@@ -0,0 +1,146 @@
"""
인증 모듈 — 다중 사용자 JSON 파일 기반
/data/users.json 에 사용자 정보 저장
관리자(admin)는 환경변수 AUTH_USERNAME/AUTH_PASSWORD 기준으로 초기화
"""
import os, json, threading
from pathlib import Path
from datetime import datetime, timedelta
from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from jose import JWTError, jwt
SECRET_KEY = os.getenv("JWT_SECRET", "fallback-secret-change-this")
ALGORITHM = "HS256"
EXPIRE_HOURS = int(os.getenv("JWT_EXPIRE_HOURS", "12"))
ADMIN_USERNAME = os.getenv("AUTH_USERNAME", "admin")
ADMIN_PASSWORD = os.getenv("AUTH_PASSWORD", "changeme1234")
DATA_DIR = Path(os.getenv("UPLOAD_DIR", "/data/uploads")).parent
USERS_FILE = DATA_DIR / "users.json"
_lock = threading.Lock()
bearer = HTTPBearer(auto_error=False)
# ── 파일 I/O ───────────────────────────────────────────────────
def _load() -> dict:
if not USERS_FILE.exists():
return {}
with open(USERS_FILE, "r", encoding="utf-8") as f:
return json.load(f)
def _save(users: dict):
USERS_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(USERS_FILE, "w", encoding="utf-8") as f:
json.dump(users, f, ensure_ascii=False, indent=2)
# ── 초기화 (앱 시작 시 1회) ────────────────────────────────────
def init_users():
with _lock:
users = _load()
# 관리자 계정은 항상 env var 기준으로 동기화
users[ADMIN_USERNAME] = {
"password": ADMIN_PASSWORD,
"role": "admin",
"permissions": {"stt": True, "ocr": True},
}
_save(users)
# ── CRUD ──────────────────────────────────────────────────────
def authenticate(username: str, password: str):
"""성공 시 user dict, 실패 시 None"""
with _lock:
users = _load()
u = users.get(username)
if not u or u["password"] != password:
return None
return {"username": username, **u}
def get_user(username: str):
with _lock:
return _load().get(username)
def list_users() -> dict:
with _lock:
users = _load()
# 비밀번호 마스킹
return {k: {**{kk: vv for kk, vv in v.items() if kk != "password"}}
for k, v in users.items()}
def create_user(username: str, password: str, permissions: dict) -> tuple:
with _lock:
users = _load()
if username in users:
return False, "이미 존재하는 사용자입니다"
users[username] = {"password": password, "role": "user",
"permissions": permissions}
_save(users)
return True, "사용자가 생성되었습니다"
def update_user(username: str, permissions: dict, password: str = None) -> tuple:
if username == ADMIN_USERNAME:
return False, "기본 관리자 계정은 수정할 수 없습니다"
with _lock:
users = _load()
if username not in users:
return False, "사용자를 찾을 수 없습니다"
users[username]["permissions"] = permissions
if password:
users[username]["password"] = password
_save(users)
return True, "업데이트되었습니다"
def delete_user(username: str) -> tuple:
if username == ADMIN_USERNAME:
return False, "기본 관리자 계정은 삭제할 수 없습니다"
with _lock:
users = _load()
if username not in users:
return False, "사용자를 찾을 수 없습니다"
del users[username]
_save(users)
return True, "삭제되었습니다"
# ── JWT ───────────────────────────────────────────────────────
def create_access_token(username: str) -> str:
exp = datetime.utcnow() + timedelta(hours=EXPIRE_HOURS)
return jwt.encode({"sub": username, "exp": exp}, SECRET_KEY, algorithm=ALGORITHM)
# ── FastAPI 의존성 ────────────────────────────────────────────
def require_auth(credentials: HTTPAuthorizationCredentials = Depends(bearer)) -> dict:
if credentials is None:
raise HTTPException(401, "인증이 필요합니다",
headers={"WWW-Authenticate": "Bearer"})
try:
payload = jwt.decode(credentials.credentials, SECRET_KEY, algorithms=[ALGORITHM])
username = payload.get("sub")
if not username:
raise JWTError()
u = get_user(username)
if not u:
raise JWTError()
return {"username": username, **u}
except JWTError:
raise HTTPException(401, "토큰이 유효하지 않거나 만료되었습니다",
headers={"WWW-Authenticate": "Bearer"})
def require_admin(user: dict = Depends(require_auth)) -> dict:
if user.get("role") != "admin":
raise HTTPException(403, "관리자 권한이 필요합니다")
return user
def require_stt(user: dict = Depends(require_auth)) -> dict:
if not user.get("permissions", {}).get("stt", False):
raise HTTPException(403, "STT 사용 권한이 없습니다")
return user
def require_ocr(user: dict = Depends(require_auth)) -> dict:
if not user.get("permissions", {}).get("ocr", False):
raise HTTPException(403, "OCR 사용 권한이 없습니다")
return user

275
app/main.py Normal file
View File

@@ -0,0 +1,275 @@
import os, uuid, time, glob, json
import httpx
import aiofiles
from pathlib import Path
from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Form, Request
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pydantic import BaseModel
from auth import (authenticate, create_access_token, init_users,
require_auth, require_admin, require_stt, require_ocr,
list_users, create_user, update_user, delete_user)
from tasks import celery_app, transcribe_task
from ocr_tasks import ocr_task
app = FastAPI(title="VoiceScript API")
UPLOAD_DIR = os.getenv("UPLOAD_DIR", "/data/uploads")
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs")
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434")
MAX_UPLOAD_BYTES = int(os.getenv("MAX_UPLOAD_MB", "500")) * 1024 * 1024
OUTPUT_KEEP_SECS = int(os.getenv("OUTPUT_KEEP_HOURS", "48")) * 3600
DATA_DIR = Path(UPLOAD_DIR).parent
SETTINGS_FILE = DATA_DIR / "settings.json"
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
AUDIO_EXT = {"mp3","mp4","wav","m4a","ogg","flac","aac","wma","webm","mkv","avi","mov"}
IMAGE_EXT = {"jpg","jpeg","png","bmp","tiff","tif","webp","gif"}
# ── 설정 I/O ─────────────────────────────────────────────────
def _load_settings() -> dict:
if not SETTINGS_FILE.exists():
return {"stt_ollama_model": "", "ocr_ollama_model": "granite3.2-vision:latest"}
with open(SETTINGS_FILE, "r", encoding="utf-8") as f:
return json.load(f)
def _save_settings(data: dict):
SETTINGS_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(SETTINGS_FILE, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
# ════════════════════════════════════════════════════════════════
# 시작 이벤트
# ════════════════════════════════════════════════════════════════
@app.on_event("startup")
async def on_startup():
init_users()
_cleanup_outputs()
# ════════════════════════════════════════════════════════════════
# 인증
# ════════════════════════════════════════════════════════════════
@app.post("/api/login")
def login(username: str = Form(...), password: str = Form(...)):
user = authenticate(username, password)
if not user:
raise HTTPException(401, "아이디 또는 비밀번호가 올바르지 않습니다")
return {"access_token": create_access_token(username), "token_type": "bearer"}
@app.get("/api/me")
def me(user: dict = Depends(require_auth)):
return {
"username": user["username"],
"role": user.get("role", "user"),
"permissions": user.get("permissions", {"stt": False, "ocr": False}),
}
# ════════════════════════════════════════════════════════════════
# STT
# ════════════════════════════════════════════════════════════════
@app.post("/api/transcribe")
async def transcribe(
request: Request,
file: UploadFile = File(...),
use_ollama: str = Form("false"),
ollama_model: str = Form(""),
user: dict = Depends(require_stt),
):
_check_size(request)
ext = _ext(file.filename)
if ext not in AUDIO_EXT:
raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(AUDIO_EXT))}")
file_id = str(uuid.uuid4())
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
await _save(file, save_path)
_use_ollama = use_ollama.lower() == "true"
# 모델 미지정 시 설정에서 가져옴
if _use_ollama and not ollama_model.strip():
ollama_model = _load_settings().get("stt_ollama_model", "")
task = transcribe_task.delay(file_id, save_path, _use_ollama, ollama_model)
return {"task_id": task.id, "file_id": file_id, "filename": file.filename}
# ════════════════════════════════════════════════════════════════
# OCR
# ════════════════════════════════════════════════════════════════
@app.post("/api/ocr")
async def ocr(
request: Request,
file: UploadFile = File(...),
mode: str = Form("text"),
backend: str = Form("paddle"),
ollama_model: str = Form(""),
custom_prompt: str = Form(""),
user: dict = Depends(require_ocr),
):
_check_size(request)
ext = _ext(file.filename)
if ext not in IMAGE_EXT:
raise HTTPException(400, f"지원하지 않는 형식: {', '.join(sorted(IMAGE_EXT))}")
if mode not in ("text", "structure"): mode = "text"
if backend not in ("paddle", "ollama"): backend = "paddle"
# 모델 미지정 시 설정에서 가져옴
if backend == "ollama" and not ollama_model.strip():
ollama_model = _load_settings().get("ocr_ollama_model", "granite3.2-vision:latest")
file_id = str(uuid.uuid4())
save_path = os.path.join(UPLOAD_DIR, f"{file_id}.{ext}")
await _save(file, save_path)
task = ocr_task.delay(file_id, save_path, mode, backend, ollama_model, custom_prompt)
return {"task_id": task.id, "file_id": file_id,
"filename": file.filename, "mode": mode, "backend": backend}
# ════════════════════════════════════════════════════════════════
# 작업 상태 / 다운로드
# ════════════════════════════════════════════════════════════════
@app.get("/api/status/{task_id}")
def get_status(task_id: str, user: dict = Depends(require_auth)):
r = celery_app.AsyncResult(task_id)
if r.state == "PENDING": return {"state": "pending", "progress": 0, "message": "대기 중..."}
if r.state == "PROGRESS": m = r.info or {}; return {"state": "progress","progress": m.get("progress",0),"message": m.get("message","처리 중...")}
if r.state == "SUCCESS": return {"state": "success", "progress": 100, **r.result}
if r.state == "FAILURE": return {"state": "failure", "progress": 0, "message": str(r.info)}
return {"state": r.state.lower(), "progress": 0}
@app.get("/api/download/{filename}")
def download(filename: str, user: dict = Depends(require_auth)):
if ".." in filename or "/" in filename:
raise HTTPException(400, "잘못된 파일명")
path = os.path.join(OUTPUT_DIR, filename)
if not os.path.exists(path):
raise HTTPException(404, "파일을 찾을 수 없습니다")
media = ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
if filename.endswith(".xlsx") else "text/plain")
return FileResponse(path, media_type=media, filename=filename)
# ════════════════════════════════════════════════════════════════
# Ollama 모델 목록
# ════════════════════════════════════════════════════════════════
@app.get("/api/ollama/models")
def ollama_models(user: dict = Depends(require_auth)):
try:
resp = httpx.get(f"{OLLAMA_URL}/api/tags", timeout=8.0)
resp.raise_for_status()
models = [m["name"] for m in resp.json().get("models", [])]
return {"models": models, "connected": True}
except Exception as e:
return {"models": [], "connected": False, "error": str(e)}
# ════════════════════════════════════════════════════════════════
# 설정
# ════════════════════════════════════════════════════════════════
@app.get("/api/settings")
def get_settings(user: dict = Depends(require_auth)):
return _load_settings()
@app.post("/api/settings")
def save_settings_endpoint(
stt_ollama_model: str = Form(""),
ocr_ollama_model: str = Form(""),
user: dict = Depends(require_auth),
):
data = {"stt_ollama_model": stt_ollama_model,
"ocr_ollama_model": ocr_ollama_model}
_save_settings(data)
return {"ok": True, "settings": data}
# ════════════════════════════════════════════════════════════════
# 관리자 — 사용자 관리
# ════════════════════════════════════════════════════════════════
@app.get("/api/admin/users")
def admin_list_users(user: dict = Depends(require_admin)):
return {"users": list_users()}
@app.post("/api/admin/users")
def admin_create_user(
username: str = Form(...),
password: str = Form(...),
perm_stt: str = Form("false"),
perm_ocr: str = Form("false"),
user: dict = Depends(require_admin),
):
perms = {"stt": perm_stt.lower()=="true", "ocr": perm_ocr.lower()=="true"}
ok, msg = create_user(username, password, perms)
if not ok:
raise HTTPException(400, msg)
return {"ok": True, "message": msg}
@app.put("/api/admin/users/{username}")
def admin_update_user(
username: str,
perm_stt: str = Form("false"),
perm_ocr: str = Form("false"),
password: str = Form(""),
user: dict = Depends(require_admin),
):
perms = {"stt": perm_stt.lower()=="true", "ocr": perm_ocr.lower()=="true"}
ok, msg = update_user(username, perms, password or None)
if not ok:
raise HTTPException(400, msg)
return {"ok": True, "message": msg}
@app.delete("/api/admin/users/{username}")
def admin_delete_user(username: str, user: dict = Depends(require_admin)):
ok, msg = delete_user(username)
if not ok:
raise HTTPException(400, msg)
return {"ok": True, "message": msg}
# ════════════════════════════════════════════════════════════════
# 정리
# ════════════════════════════════════════════════════════════════
@app.post("/api/cleanup")
def cleanup(user: dict = Depends(require_auth)):
return {"removed": _cleanup_outputs()}
# ════════════════════════════════════════════════════════════════
# 유틸
# ════════════════════════════════════════════════════════════════
def _check_size(request: Request):
cl = request.headers.get("content-length")
if cl and int(cl) > MAX_UPLOAD_BYTES:
raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
def _cleanup_outputs() -> int:
if OUTPUT_KEEP_SECS == 0:
return 0
cutoff = time.time() - OUTPUT_KEEP_SECS
removed = 0
for f in glob.glob(os.path.join(OUTPUT_DIR, "*")):
try:
if os.path.getmtime(f) < cutoff:
os.remove(f); removed += 1
except: pass
return removed
def _ext(fn): return fn.rsplit(".", 1)[-1].lower() if "." in fn else ""
async def _save(file: UploadFile, path: str):
written = 0
async with aiofiles.open(path, "wb") as f:
while chunk := await file.read(1024 * 1024):
written += len(chunk)
if written > MAX_UPLOAD_BYTES:
await f.close(); os.remove(path)
raise HTTPException(413, f"파일이 너무 큽니다. 최대 {MAX_UPLOAD_BYTES//1024//1024}MB")
await f.write(chunk)
app.mount("/", StaticFiles(directory="static", html=True), name="static")

288
app/ocr_tasks.py Normal file
View File

@@ -0,0 +1,288 @@
"""
OCR Celery Tasks
- PaddleOCR 3.x 호환 (use_gpu/show_log/cls 파라미터 제거, 결과구조 변경 반영)
- backend="paddle" → PaddleOCR 로컬 실행
- backend="ollama" → Ollama Vision API 호출
"""
import os
import base64
import httpx
from celery import Celery
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs")
OCR_LANG = os.getenv("OCR_LANG", "korean")
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434")
OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "180"))
celery_app = Celery("ocr_tasks", broker=REDIS_URL, backend=REDIS_URL)
celery_app.conf.update(
task_serializer="json",
result_serializer="json",
accept_content=["json"],
task_track_started=True,
result_expires=3600,
)
# PaddleOCR 싱글톤
_ocr_engine = None
_struct_engine = None
def get_ocr():
global _ocr_engine
if _ocr_engine is None:
from paddleocr import PaddleOCR
print(f"[PaddleOCR] 로딩 (lang={OCR_LANG})")
# PaddleOCR 3.x: use_gpu/show_log 파라미터 제거됨
_ocr_engine = PaddleOCR(use_angle_cls=True, lang=OCR_LANG)
print("[PaddleOCR] 완료")
return _ocr_engine
def get_structure():
global _struct_engine
if _struct_engine is None:
from paddleocr import PPStructure
print("[PPStructure] 로딩")
_struct_engine = PPStructure(table=True, ocr=True, lang=OCR_LANG)
print("[PPStructure] 완료")
return _struct_engine
# ════════════════════════════════════════════════════════════════
# 메인 Task
# ════════════════════════════════════════════════════════════════
@celery_app.task(bind=True, name="tasks.ocr_task", queue="ocr")
def ocr_task(self, file_id, image_path, mode="text",
backend="paddle", ollama_model="granite3.2-vision", custom_prompt=""):
self.update_state(state="PROGRESS", meta={"progress": 8, "message": "엔진 준비 중..."})
try:
if backend == "ollama":
result = _run_ollama(self, file_id, image_path, mode, ollama_model, custom_prompt)
else:
result = _run_paddle(self, file_id, image_path, mode)
try: os.remove(image_path)
except: pass
return result
except Exception as e:
try: os.remove(image_path)
except: pass
raise Exception(f"OCR 실패: {str(e)}")
# ════════════════════════════════════════════════════════════════
# Ollama 백엔드
# ════════════════════════════════════════════════════════════════
_OLLAMA_PROMPTS = {
"text": "이 이미지에서 모든 텍스트를 정확하게 추출해줘. 원본의 줄 구분과 단락 구조를 유지해줘.",
"structure": "이 이미지를 분석해서 표는 마크다운 표 형식으로, 나머지 텍스트는 원본 구조를 유지하며 추출해줘.",
}
def _run_ollama(task, file_id, image_path, mode, ollama_model, custom_prompt):
task.update_state(state="PROGRESS",
meta={"progress": 15, "message": f"Ollama ({ollama_model}) 연결 중..."})
with open(image_path, "rb") as f:
img_b64 = base64.b64encode(f.read()).decode()
prompt = custom_prompt.strip() or _OLLAMA_PROMPTS.get(mode, _OLLAMA_PROMPTS["text"])
task.update_state(state="PROGRESS", meta={"progress": 30, "message": "모델 추론 중..."})
try:
resp = httpx.post(f"{OLLAMA_URL}/api/chat", json={
"model": ollama_model,
"messages": [{"role": "user", "content": prompt, "images": [img_b64]}],
"stream": False,
"options": {"temperature": 0.1},
}, timeout=float(OLLAMA_TIMEOUT))
resp.raise_for_status()
except httpx.ConnectError:
raise Exception(f"Ollama 서버 연결 실패 ({OLLAMA_URL})")
except httpx.TimeoutException:
raise Exception(f"Ollama 응답 시간 초과 ({OLLAMA_TIMEOUT}초). OLLAMA_TIMEOUT 값을 늘려주세요.")
task.update_state(state="PROGRESS", meta={"progress": 85, "message": "결과 저장 중..."})
full_text = resp.json().get("message", {}).get("content", "").strip()
if not full_text:
raise Exception("Ollama 빈 응답. 모델이 설치되어 있는지 확인하세요.")
tables = _parse_md_tables(full_text) if mode == "structure" else []
os.makedirs(OUTPUT_DIR, exist_ok=True)
txt_file = f"{file_id}_ocr.txt"
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
f.write(f"# OCR 결과 (Ollama / {ollama_model})\n\n{full_text}")
xlsx_file = None
if tables:
xlsx_file = f"{file_id}_tables.xlsx"
_save_excel(tables, os.path.join(OUTPUT_DIR, xlsx_file))
tables_html = [_md_table_to_html(t) for t in tables]
lines = [{"text": l, "confidence": 1.0, "bbox": []}
for l in full_text.splitlines() if l.strip()]
return {
"mode": mode, "backend": "ollama", "ollama_model": ollama_model,
"full_text": full_text, "lines": lines, "line_count": len(lines),
"txt_file": txt_file,
"tables": [{"html": h, "rows": len(t),
"cols": max(len(r) for r in t) if t else 0}
for h, t in zip(tables_html, tables)],
"xlsx_file": xlsx_file,
}
# ════════════════════════════════════════════════════════════════
# PaddleOCR 백엔드
# ════════════════════════════════════════════════════════════════
def _run_paddle(task, file_id, image_path, mode):
import cv2
img = cv2.imread(image_path)
if img is None:
raise ValueError("이미지를 읽을 수 없습니다")
os.makedirs(OUTPUT_DIR, exist_ok=True)
return _paddle_structure(task, file_id, img) if mode == "structure" \
else _paddle_text(task, file_id, img)
def _paddle_text(task, file_id, img):
task.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 인식 중..."})
# PaddleOCR 3.x: cls 파라미터 제거, 결과 구조 변경
result = get_ocr().ocr(img)
task.update_state(state="PROGRESS", meta={"progress": 80, "message": "결과 정리 중..."})
lines = []
if result and len(result) > 0:
r = result[0]
# PaddleOCR 3.x 결과 구조: dict with rec_texts, rec_scores
if isinstance(r, dict):
texts = r.get("rec_texts", [])
scores = r.get("rec_scores", [])
for text, conf in zip(texts, scores):
if text.strip():
lines.append({"text": text,
"confidence": round(float(conf), 3),
"bbox": []})
# 구버전 호환 (list of [bbox, (text, conf)])
elif isinstance(r, list):
for item in r:
if item and len(item) == 2:
_, (text, conf) = item
if text.strip():
lines.append({"text": text,
"confidence": round(float(conf), 3),
"bbox": []})
full_text = "\n".join(l["text"] for l in lines)
txt_file = f"{file_id}_ocr.txt"
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
f.write(full_text)
return {"mode": "text", "backend": "paddle",
"full_text": full_text, "lines": lines,
"line_count": len(lines), "txt_file": txt_file,
"tables": [], "xlsx_file": None}
def _paddle_structure(task, file_id, img):
task.update_state(state="PROGRESS", meta={"progress": 20, "message": "레이아웃 분석 중..."})
result = get_structure()(img)
task.update_state(state="PROGRESS", meta={"progress": 60, "message": "표 구조 추출 중..."})
text_blocks, tables_html, tables_data = [], [], []
for region in result:
rtype = region.get("type", "").lower()
if rtype == "table":
html = region.get("res", {}).get("html", "")
if html:
tables_html.append(html)
tables_data.append(_html_table_to_list(html))
elif rtype in ("text", "title", "figure_caption"):
for line in (region.get("res", []) or []):
if isinstance(line, (list, tuple)) and len(line) == 2:
_, (text, _conf) = line
text_blocks.append(text)
full_text = "\n".join(text_blocks)
task.update_state(state="PROGRESS", meta={"progress": 80, "message": "Excel 생성 중..."})
xlsx_file = None
if tables_data:
xlsx_file = f"{file_id}_tables.xlsx"
_save_excel(tables_data, os.path.join(OUTPUT_DIR, xlsx_file))
txt_file = f"{file_id}_ocr.txt"
with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f:
f.write("# 텍스트\n\n" + full_text)
lines = [{"text": t, "confidence": 1.0, "bbox": []} for t in text_blocks]
tables_meta = [{"html": h, "rows": len(d),
"cols": max(len(r) for r in d) if d else 0}
for h, d in zip(tables_html, tables_data)]
return {"mode": "structure", "backend": "paddle",
"full_text": full_text, "lines": lines,
"line_count": len(lines), "txt_file": txt_file,
"tables": tables_meta, "xlsx_file": xlsx_file}
# ════════════════════════════════════════════════════════════════
# 공통 유틸
# ════════════════════════════════════════════════════════════════
def _parse_md_tables(text):
tables, current = [], []
for line in text.splitlines():
s = line.strip()
if s.startswith("|") and s.endswith("|"):
if all(c in "| -:" for c in s): continue
current.append([c.strip() for c in s.strip("|").split("|")])
else:
if len(current) >= 2: tables.append(current)
current = []
if len(current) >= 2: tables.append(current)
return tables
def _md_table_to_html(table):
if not table: return ""
rows = ""
for i, row in enumerate(table):
tag = "th" if i == 0 else "td"
cells = "".join(f"<{tag}>{c}</{tag}>" for c in row)
rows += f"<tr>{cells}</tr>"
return f"<table>{rows}</table>"
def _html_table_to_list(html):
from html.parser import HTMLParser
class P(HTMLParser):
def __init__(self):
super().__init__()
self.rows, self._row, self._cell, self._in = [], [], [], False
def handle_starttag(self, tag, attrs):
if tag == "tr": self._row = []
elif tag in ("td","th"): self._cell = []; self._in = True
def handle_endtag(self, tag):
if tag in ("td","th"):
self._row.append("".join(self._cell).strip()); self._in = False
elif tag == "tr":
if self._row: self.rows.append(self._row)
def handle_data(self, data):
if self._in: self._cell.append(data)
p = P(); p.feed(html); return p.rows
def _save_excel(tables, path):
wb = openpyxl.Workbook()
wb.remove(wb.active)
for i, table in enumerate(tables, 1):
ws = wb.create_sheet(f"{i}")
thin = Side(style="thin", color="2A2A33")
bdr = Border(left=thin, right=thin, top=thin, bottom=thin)
for r_idx, row in enumerate(table, 1):
for c_idx, val in enumerate(row, 1):
cell = ws.cell(row=r_idx, column=c_idx, value=val)
cell.border = bdr
cell.alignment = Alignment(horizontal="center",
vertical="center", wrap_text=True)
if r_idx == 1:
cell.fill = PatternFill("solid", fgColor="1A1A2E")
cell.font = Font(color="00E5A0", bold=True, size=10)
else:
cell.font = Font(size=10)
for col in ws.columns:
w = max((len(str(c.value or "")) for c in col), default=8)
ws.column_dimensions[col[0].column_letter].width = min(w + 4, 40)
if not wb.sheetnames: wb.create_sheet("Sheet1")
wb.save(path)

21
app/requirements.txt Normal file
View File

@@ -0,0 +1,21 @@
fastapi==0.115.0
uvicorn[standard]==0.30.6
python-multipart==0.0.9
celery==5.4.0
redis==5.0.8
faster-whisper==1.0.3
aiofiles==23.2.1
# 인증 (bcrypt 제거 — 직접 비교 방식 사용)
python-jose[cryptography]==3.3.0
# PaddleOCR 3.x
paddleocr>=3.0.0
opencv-python-headless>=4.8.0
# Ollama API 호출
httpx>=0.27.0
# Excel 출력
openpyxl==3.1.2
Pillow>=10.0.0

1131
app/static/index.html Normal file

File diff suppressed because it is too large Load Diff

155
app/tasks.py Normal file
View File

@@ -0,0 +1,155 @@
import os
import httpx
from celery import Celery
from ocr_tasks import ocr_task # noqa: F401 — worker에 등록
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
MODEL_SIZE = os.getenv("WHISPER_MODEL", "medium")
DEVICE = os.getenv("WHISPER_DEVICE", "cpu")
COMPUTE_TYPE = os.getenv("WHISPER_COMPUTE_TYPE", "int8")
LANGUAGE = os.getenv("WHISPER_LANGUAGE", "ko") or None
BEAM_SIZE = int(os.getenv("WHISPER_BEAM_SIZE", "5"))
INITIAL_PROMPT = os.getenv("WHISPER_INITIAL_PROMPT", "") or None
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs")
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434")
OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "180"))
celery_app = Celery("whisper_tasks", broker=REDIS_URL, backend=REDIS_URL)
celery_app.conf.update(
task_serializer="json",
result_serializer="json",
accept_content=["json"],
task_track_started=True,
result_expires=3600,
)
_model = None
def get_model():
global _model
if _model is None:
from faster_whisper import WhisperModel
print(f"[Whisper] 로딩: {MODEL_SIZE} / {DEVICE} / {COMPUTE_TYPE}")
_model = WhisperModel(MODEL_SIZE, device=DEVICE, compute_type=COMPUTE_TYPE)
print("[Whisper] 로드 완료")
return _model
# ── Ollama 후처리 ─────────────────────────────────────────────
def _ollama_postprocess(text: str, model: str) -> str:
"""Whisper 결과를 Ollama로 후처리 (문장부호·맞춤법·자연스러운 문장)"""
if not model or not text.strip():
return text
prompt = (
"다음은 음성 인식으로 추출된 텍스트입니다. "
"내용은 절대 변경하지 말고, 문장 부호를 추가하고 자연스럽게 다듬어줘. "
"결과 텍스트만 출력하고 설명은 하지 마.\n\n"
f"{text}"
)
try:
resp = httpx.post(
f"{OLLAMA_URL}/api/chat",
json={
"model": model,
"messages": [{"role": "user", "content": prompt}],
"stream": False,
"options": {"temperature": 0.1},
},
timeout=float(OLLAMA_TIMEOUT),
)
resp.raise_for_status()
result = resp.json().get("message", {}).get("content", "").strip()
return result if result else text
except Exception as e:
print(f"[Ollama 후처리 실패] {e} — 원본 텍스트 사용")
return text
# ════════════════════════════════════════════════════════════════
# STT Celery Task
# ════════════════════════════════════════════════════════════════
@celery_app.task(bind=True, name="tasks.transcribe_task", queue="stt")
def transcribe_task(
self,
file_id: str,
audio_path: str,
use_ollama: bool = False,
ollama_model: str = "",
):
self.update_state(state="PROGRESS", meta={"progress": 5, "message": "모델 준비 중..."})
try:
model = get_model()
self.update_state(state="PROGRESS", meta={"progress": 15, "message": "오디오 분석 중..."})
segments_gen, info = model.transcribe(
audio_path,
language=LANGUAGE,
beam_size=BEAM_SIZE,
initial_prompt=INITIAL_PROMPT,
vad_filter=True,
vad_parameters=dict(min_silence_duration_ms=500),
word_timestamps=False,
)
self.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 변환 중..."})
segments, parts = [], []
duration = info.duration
for seg in segments_gen:
segments.append({"start": round(seg.start,2),
"end": round(seg.end,2),
"text": seg.text.strip()})
parts.append(seg.text.strip())
if duration > 0:
pct = 30 + int((seg.end / duration) * 50)
self.update_state(
state="PROGRESS",
meta={"progress": min(pct, 80),
"message": f"변환 중... {seg.end:.0f}s / {duration:.0f}s"},
)
raw_text = "\n".join(parts)
full_text = raw_text
# Ollama 후처리
if use_ollama and ollama_model:
self.update_state(state="PROGRESS",
meta={"progress": 85,
"message": f"Ollama({ollama_model}) 후처리 중..."})
full_text = _ollama_postprocess(raw_text, ollama_model)
self.update_state(state="PROGRESS", meta={"progress": 95, "message": "파일 저장 중..."})
os.makedirs(OUTPUT_DIR, exist_ok=True)
output_filename = f"{file_id}.txt"
with open(os.path.join(OUTPUT_DIR, output_filename), "w", encoding="utf-8") as f:
f.write(f"# 변환 결과\n# 언어: {info.language} | 재생 시간: {duration:.1f}")
if use_ollama and ollama_model:
f.write(f" | Ollama 후처리: {ollama_model}")
f.write("\n\n## 전체 텍스트\n\n" + full_text + "\n\n")
f.write("## 타임스탬프별 세그먼트\n\n")
for seg in segments:
f.write(f"[{_fmt(seg['start'])}{_fmt(seg['end'])}] {seg['text']}\n")
try: os.remove(audio_path)
except: pass
return {
"text": full_text,
"raw_text": raw_text,
"segments": segments,
"language": info.language,
"duration": round(duration, 1),
"output_file": output_filename,
"ollama_used": use_ollama and bool(ollama_model),
"ollama_model": ollama_model if (use_ollama and ollama_model) else "",
}
except Exception as e:
raise Exception(f"변환 실패: {str(e)}")
def _fmt(s):
m, sec = divmod(int(s), 60)
return f"{m:02d}:{sec:02d}"