feat: OpenRouter 외부 AI 연동 (STT 교정 + OCR Vision)

This commit is contained in:
root
2026-04-28 15:38:06 +09:00
parent f9075ae3f6
commit f35fe1143a
5 changed files with 667 additions and 299 deletions

View File

@@ -15,15 +15,12 @@ OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434")
OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "600"))
_cpu_threads_env = int(os.getenv("CPU_THREADS", "0"))
CPU_THREADS = _cpu_threads_env if _cpu_threads_env > 0 else None # None = auto
CPU_THREADS = _cpu_threads_env if _cpu_threads_env > 0 else None
celery_app = Celery("whisper_tasks", broker=REDIS_URL, backend=REDIS_URL)
celery_app.conf.update(
task_serializer="json",
result_serializer="json",
accept_content=["json"],
task_track_started=True,
result_expires=3600,
task_serializer="json", result_serializer="json",
accept_content=["json"], task_track_started=True, result_expires=3600,
)
_model = None
@@ -33,92 +30,128 @@ def get_model():
if _model is None:
from faster_whisper import WhisperModel
kwargs = dict(device=DEVICE, compute_type=COMPUTE_TYPE)
if CPU_THREADS is not None:
kwargs["cpu_threads"] = CPU_THREADS
if CPU_THREADS is not None: kwargs["cpu_threads"] = CPU_THREADS
print(f"[Whisper] 로딩: {MODEL_SIZE} / {DEVICE} / {COMPUTE_TYPE} / threads={CPU_THREADS or 'auto'}")
_model = WhisperModel(MODEL_SIZE, **kwargs)
print("[Whisper] 로드 완료")
return _model
# ── 후처리: Ollama ────────────────────────────────────────────
def _ollama_postprocess(text: str, model: str) -> str:
if not model or not text.strip():
return text
if not model or not text.strip(): return text
prompt = (
"다음은 음성 인식으로 추출된 텍스트입니다. "
"내용은 절대 변경하지 말고, 문장 부호를 추가하고 자연스럽게 다듬어줘. "
"결과 텍스트만 출력하고 설명은 하지 마.\n\n"
f"{text}"
"결과 텍스트만 출력하고 설명은 하지 마.\n\n" + text
)
try:
resp = httpx.post(f"{OLLAMA_URL}/api/chat", json={
"model": model,
"messages": [{"role":"user","content":prompt}],
"stream": False, "options": {"temperature": 0.1},
}, timeout=float(OLLAMA_TIMEOUT))
resp.raise_for_status()
result = resp.json().get("message",{}).get("content","").strip()
return result if result else text
except Exception as e:
print(f"[Ollama 후처리 실패] {e}"); return text
# ── 후처리: OpenRouter (OpenAI 호환) ─────────────────────────
def _openrouter_postprocess(text: str, model: str, base_url: str, api_key: str) -> str:
if not model or not api_key or not text.strip(): return text
prompt = (
"다음은 음성 인식으로 추출된 텍스트입니다. "
"내용은 절대 변경하지 말고, 문장 부호를 추가하고 자연스럽게 다듬어줘. "
"결과 텍스트만 출력하고 설명은 하지 마.\n\n" + text
)
try:
resp = httpx.post(
f"{OLLAMA_URL}/api/chat",
json={"model": model,
"messages": [{"role": "user", "content": prompt}],
"stream": False, "options": {"temperature": 0.1}},
f"{base_url.rstrip('/')}/chat/completions",
headers={
"Authorization": f"Bearer {api_key}",
"HTTP-Referer": "https://voicescript.local",
"X-Title": "VoiceScript",
"Content-Type": "application/json",
},
json={
"model": model,
"messages": [{"role":"user","content":prompt}],
"temperature": 0.1,
},
timeout=float(OLLAMA_TIMEOUT),
)
resp.raise_for_status()
result = resp.json().get("message", {}).get("content", "").strip()
result = resp.json()["choices"][0]["message"]["content"].strip()
return result if result else text
except Exception as e:
print(f"[Ollama 후처리 실패] {e}")
return text
print(f"[OpenRouter 후처리 실패] {e}"); return text
# ════════════════════════════════════════════════════════════════
# STT Task
# ════════════════════════════════════════════════════════════════
@celery_app.task(bind=True, name="tasks.transcribe_task", queue="stt")
def transcribe_task(self, file_id: str, audio_path: str,
use_ollama: bool = False, ollama_model: str = ""):
self.update_state(state="PROGRESS", meta={"progress": 5, "message": "모델 준비 중..."})
def transcribe_task(
self,
file_id: str,
audio_path: str,
use_ollama: bool = False,
ollama_model: str = "",
use_openrouter: bool = False,
openrouter_model: str = "",
openrouter_url: str = "",
openrouter_key: str = "",
):
self.update_state(state="PROGRESS", meta={"progress":5,"message":"모델 준비 중..."})
try:
model = get_model()
self.update_state(state="PROGRESS", meta={"progress": 15, "message": "오디오 분석 중..."})
self.update_state(state="PROGRESS", meta={"progress":15,"message":"오디오 분석 중..."})
segments_gen, info = model.transcribe(
audio_path,
language=LANGUAGE,
beam_size=BEAM_SIZE,
initial_prompt=INITIAL_PROMPT,
vad_filter=True,
vad_parameters=dict(min_silence_duration_ms=500),
word_timestamps=False,
audio_path, language=LANGUAGE, beam_size=BEAM_SIZE,
initial_prompt=INITIAL_PROMPT, vad_filter=True,
vad_parameters=dict(min_silence_duration_ms=500), word_timestamps=False,
)
self.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 변환 중..."})
self.update_state(state="PROGRESS", meta={"progress":30,"message":"텍스트 변환 중..."})
segments, parts = [], []
duration = info.duration
for seg in segments_gen:
segments.append({"start": round(seg.start, 2),
"end": round(seg.end, 2),
"text": seg.text.strip()})
segments.append({"start":round(seg.start,2),"end":round(seg.end,2),"text":seg.text.strip()})
parts.append(seg.text.strip())
if duration > 0:
pct = 30 + int((seg.end / duration) * 50)
self.update_state(
state="PROGRESS",
meta={"progress": min(pct, 80),
"message": f"변환 중... {seg.end:.0f}s / {duration:.0f}s"},
)
pct = 30 + int((seg.end/duration)*50)
self.update_state(state="PROGRESS",
meta={"progress":min(pct,80),"message":f"변환 중... {seg.end:.0f}s / {duration:.0f}s"})
raw_text = "\n".join(parts)
full_text = raw_text
# Ollama 후처리
if use_ollama and ollama_model:
self.update_state(state="PROGRESS",
meta={"progress": 85,
"message": f"Ollama({ollama_model}) 후처리 중..."})
meta={"progress":85,"message":f"Ollama({ollama_model}) 후처리 중..."})
full_text = _ollama_postprocess(raw_text, ollama_model)
self.update_state(state="PROGRESS", meta={"progress": 95, "message": "파일 저장 중..."})
# OpenRouter 후처리
elif use_openrouter and openrouter_model and openrouter_key:
self.update_state(state="PROGRESS",
meta={"progress":85,"message":f"OpenRouter({openrouter_model}) 후처리 중..."})
full_text = _openrouter_postprocess(raw_text, openrouter_model, openrouter_url, openrouter_key)
self.update_state(state="PROGRESS", meta={"progress":95,"message":"파일 저장 중..."})
os.makedirs(OUTPUT_DIR, exist_ok=True)
output_filename = f"{file_id}.txt"
with open(os.path.join(OUTPUT_DIR, output_filename), "w", encoding="utf-8") as f:
f.write(f"# 변환 결과\n# 언어: {info.language} | 재생 시간: {duration:.1f}")
if use_ollama and ollama_model:
f.write(f" | Ollama 후처리: {ollama_model}")
f.write(f" | Ollama: {ollama_model}")
elif use_openrouter and openrouter_model:
f.write(f" | OpenRouter: {openrouter_model}")
f.write("\n\n## 전체 텍스트\n\n" + full_text + "\n\n")
f.write("## 타임스탬프별 세그먼트\n\n")
for seg in segments:
@@ -128,14 +161,16 @@ def transcribe_task(self, file_id: str, audio_path: str,
except: pass
return {
"text": full_text,
"raw_text": raw_text,
"segments": segments,
"language": info.language,
"duration": round(duration, 1),
"output_file": output_filename,
"ollama_used": use_ollama and bool(ollama_model),
"ollama_model": ollama_model if (use_ollama and ollama_model) else "",
"text": full_text,
"raw_text": raw_text,
"segments": segments,
"language": info.language,
"duration": round(duration, 1),
"output_file": output_filename,
"ollama_used": use_ollama and bool(ollama_model),
"ollama_model": ollama_model if (use_ollama and ollama_model) else "",
"openrouter_used": use_openrouter and bool(openrouter_model) and bool(openrouter_key),
"openrouter_model": openrouter_model if (use_openrouter and openrouter_model) else "",
}
except Exception as e: