feat: 복수 파일 배치 변환 (STT/OCR)
This commit is contained in:
338
app/tasks.py
338
app/tasks.py
@@ -1,4 +1,10 @@
|
||||
import os
|
||||
"""
|
||||
STT Celery Tasks
|
||||
- faster-whisper 변환
|
||||
- Ollama / OpenRouter 후처리 (교정 또는 번역)
|
||||
- SRT / VTT / TXT 자막 파일 생성
|
||||
"""
|
||||
import os, json
|
||||
import httpx
|
||||
from celery import Celery
|
||||
from ocr_tasks import ocr_task # noqa: F401
|
||||
@@ -31,13 +37,144 @@ def get_model():
|
||||
from faster_whisper import WhisperModel
|
||||
kwargs = dict(device=DEVICE, compute_type=COMPUTE_TYPE)
|
||||
if CPU_THREADS is not None: kwargs["cpu_threads"] = CPU_THREADS
|
||||
print(f"[Whisper] 로딩: {MODEL_SIZE} / {DEVICE} / {COMPUTE_TYPE} / threads={CPU_THREADS or 'auto'}")
|
||||
print(f"[Whisper] 로딩: {MODEL_SIZE}/{DEVICE}/{COMPUTE_TYPE}/threads={CPU_THREADS or 'auto'}")
|
||||
_model = WhisperModel(MODEL_SIZE, **kwargs)
|
||||
print("[Whisper] 로드 완료")
|
||||
return _model
|
||||
|
||||
|
||||
# ── 후처리: Ollama ────────────────────────────────────────────
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
# 언어 코드 매핑
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
LANG_NAMES = {
|
||||
"ko":"한국어","en":"English","ja":"日本語","zh":"中文","fr":"Français",
|
||||
"de":"Deutsch","es":"Español","it":"Italiano","pt":"Português","ru":"Русский",
|
||||
"ar":"العربية","vi":"Tiếng Việt","th":"ไทย","id":"Bahasa Indonesia",
|
||||
"nl":"Nederlands","pl":"Polski","tr":"Türkçe","sv":"Svenska","uk":"Українська",
|
||||
}
|
||||
|
||||
def _lang_name(code: str) -> str:
|
||||
return LANG_NAMES.get(code, code)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
# 자막 포맷 생성
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
def _fmt_srt_time(s: float) -> str:
|
||||
"""초 → SRT 시간 포맷 00:00:00,000"""
|
||||
ms = int(round(s * 1000))
|
||||
h, rem = divmod(ms, 3600000)
|
||||
m, rem = divmod(rem, 60000)
|
||||
sec, ms = divmod(rem, 1000)
|
||||
return f"{h:02d}:{m:02d}:{sec:02d},{ms:03d}"
|
||||
|
||||
def _fmt_vtt_time(s: float) -> str:
|
||||
"""초 → VTT 시간 포맷 00:00:00.000"""
|
||||
return _fmt_srt_time(s).replace(",", ".")
|
||||
|
||||
def _make_srt(segments: list) -> str:
|
||||
lines = []
|
||||
for i, seg in enumerate(segments, 1):
|
||||
lines.append(str(i))
|
||||
lines.append(f"{_fmt_srt_time(seg['start'])} --> {_fmt_srt_time(seg['end'])}")
|
||||
lines.append(seg["text"].strip())
|
||||
lines.append("")
|
||||
return "\n".join(lines)
|
||||
|
||||
def _make_vtt(segments: list) -> str:
|
||||
lines = ["WEBVTT", ""]
|
||||
for i, seg in enumerate(segments, 1):
|
||||
lines.append(f"{i}")
|
||||
lines.append(f"{_fmt_vtt_time(seg['start'])} --> {_fmt_vtt_time(seg['end'])}")
|
||||
lines.append(seg["text"].strip())
|
||||
lines.append("")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
# 번역 (Ollama / OpenRouter)
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
def _translate_segments(segments: list, target_lang: str,
|
||||
use_openrouter: bool, model: str,
|
||||
openrouter_url: str, openrouter_key: str,
|
||||
task_self=None) -> list:
|
||||
"""세그먼트 텍스트를 target_lang으로 번역해서 새 세그먼트 리스트 반환"""
|
||||
if not model or not target_lang:
|
||||
return segments
|
||||
|
||||
lang_name = _lang_name(target_lang)
|
||||
translated = []
|
||||
|
||||
# 세그먼트를 청크로 묶어서 번역 (API 호출 최소화)
|
||||
# 최대 20개씩 묶음
|
||||
CHUNK = 20
|
||||
chunks = [segments[i:i+CHUNK] for i in range(0, len(segments), CHUNK)]
|
||||
|
||||
for ci, chunk in enumerate(chunks):
|
||||
if task_self:
|
||||
pct = 85 + int((ci / len(chunks)) * 10)
|
||||
task_self.update_state(state="PROGRESS",
|
||||
meta={"progress": pct,
|
||||
"message": f"번역 중... ({ci*CHUNK+1}/{len(segments)})"})
|
||||
|
||||
# JSON 배열로 텍스트만 전달
|
||||
texts = [seg["text"].strip() for seg in chunk]
|
||||
prompt = (
|
||||
f"다음 문장들을 {lang_name}로 번역해줘.\n"
|
||||
f"JSON 배열 형식으로만 답해줘. 설명 없이 번역된 문장 배열만 출력해.\n"
|
||||
f"입력 배열과 동일한 개수, 동일한 순서로 출력해.\n\n"
|
||||
f"입력: {json.dumps(texts, ensure_ascii=False)}"
|
||||
)
|
||||
|
||||
try:
|
||||
if use_openrouter and openrouter_key:
|
||||
resp = httpx.post(
|
||||
f"{openrouter_url.rstrip('/')}/chat/completions",
|
||||
headers={"Authorization": f"Bearer {openrouter_key}",
|
||||
"HTTP-Referer": "https://voicescript.local",
|
||||
"Content-Type": "application/json"},
|
||||
json={"model": model,
|
||||
"messages": [{"role":"user","content":prompt}],
|
||||
"temperature": 0.2},
|
||||
timeout=float(OLLAMA_TIMEOUT),
|
||||
)
|
||||
resp.raise_for_status()
|
||||
raw = resp.json()["choices"][0]["message"]["content"].strip()
|
||||
else:
|
||||
resp = httpx.post(f"{OLLAMA_URL}/api/chat",
|
||||
json={"model": model,
|
||||
"messages": [{"role":"user","content":prompt}],
|
||||
"stream": False, "options": {"temperature": 0.2}},
|
||||
timeout=float(OLLAMA_TIMEOUT))
|
||||
resp.raise_for_status()
|
||||
raw = resp.json().get("message",{}).get("content","").strip()
|
||||
|
||||
# JSON 파싱
|
||||
# 코드블록 제거
|
||||
if "```" in raw:
|
||||
raw = raw.split("```")[1].lstrip("json").strip()
|
||||
trans_texts = json.loads(raw)
|
||||
if not isinstance(trans_texts, list):
|
||||
trans_texts = texts # 파싱 실패 시 원본 유지
|
||||
|
||||
except Exception as e:
|
||||
print(f"[번역 실패 chunk {ci}] {e}")
|
||||
trans_texts = texts # 실패 시 원본 유지
|
||||
|
||||
# 번역된 텍스트를 세그먼트에 결합
|
||||
for seg, t_text in zip(chunk, trans_texts):
|
||||
translated.append({**seg, "text": t_text})
|
||||
# 남은 세그먼트 (번역 누락)
|
||||
if len(trans_texts) < len(chunk):
|
||||
for seg in chunk[len(trans_texts):]:
|
||||
translated.append(seg)
|
||||
|
||||
return translated
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
# Ollama 텍스트 후처리 (교정)
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
def _ollama_postprocess(text: str, model: str) -> str:
|
||||
if not model or not text.strip(): return text
|
||||
prompt = (
|
||||
@@ -46,19 +183,16 @@ def _ollama_postprocess(text: str, model: str) -> str:
|
||||
"결과 텍스트만 출력하고 설명은 하지 마.\n\n" + text
|
||||
)
|
||||
try:
|
||||
resp = httpx.post(f"{OLLAMA_URL}/api/chat", json={
|
||||
"model": model,
|
||||
"messages": [{"role":"user","content":prompt}],
|
||||
"stream": False, "options": {"temperature": 0.1},
|
||||
}, timeout=float(OLLAMA_TIMEOUT))
|
||||
resp = httpx.post(f"{OLLAMA_URL}/api/chat",
|
||||
json={"model":model,"messages":[{"role":"user","content":prompt}],
|
||||
"stream":False,"options":{"temperature":0.1}},
|
||||
timeout=float(OLLAMA_TIMEOUT))
|
||||
resp.raise_for_status()
|
||||
result = resp.json().get("message",{}).get("content","").strip()
|
||||
return result if result else text
|
||||
except Exception as e:
|
||||
print(f"[Ollama 후처리 실패] {e}"); return text
|
||||
|
||||
|
||||
# ── 후처리: OpenRouter (OpenAI 호환) ─────────────────────────
|
||||
def _openrouter_postprocess(text: str, model: str, base_url: str, api_key: str) -> str:
|
||||
if not model or not api_key or not text.strip(): return text
|
||||
prompt = (
|
||||
@@ -67,21 +201,10 @@ def _openrouter_postprocess(text: str, model: str, base_url: str, api_key: str)
|
||||
"결과 텍스트만 출력하고 설명은 하지 마.\n\n" + text
|
||||
)
|
||||
try:
|
||||
resp = httpx.post(
|
||||
f"{base_url.rstrip('/')}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"HTTP-Referer": "https://voicescript.local",
|
||||
"X-Title": "VoiceScript",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": model,
|
||||
"messages": [{"role":"user","content":prompt}],
|
||||
"temperature": 0.1,
|
||||
},
|
||||
timeout=float(OLLAMA_TIMEOUT),
|
||||
)
|
||||
resp = httpx.post(f"{base_url.rstrip('/')}/chat/completions",
|
||||
headers={"Authorization":f"Bearer {api_key}","HTTP-Referer":"https://voicescript.local","Content-Type":"application/json"},
|
||||
json={"model":model,"messages":[{"role":"user","content":prompt}],"temperature":0.1},
|
||||
timeout=float(OLLAMA_TIMEOUT))
|
||||
resp.raise_for_status()
|
||||
result = resp.json()["choices"][0]["message"]["content"].strip()
|
||||
return result if result else text
|
||||
@@ -89,30 +212,44 @@ def _openrouter_postprocess(text: str, model: str, base_url: str, api_key: str)
|
||||
print(f"[OpenRouter 후처리 실패] {e}"); return text
|
||||
|
||||
|
||||
# ════════════════════════════════════════════════════════════════
|
||||
# STT Task
|
||||
# ════════════════════════════════════════════════════════════════
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
# 메인 STT Task
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
@celery_app.task(bind=True, name="tasks.transcribe_task", queue="stt")
|
||||
def transcribe_task(
|
||||
self,
|
||||
file_id: str,
|
||||
audio_path: str,
|
||||
use_ollama: bool = False,
|
||||
ollama_model: str = "",
|
||||
use_openrouter: bool = False,
|
||||
openrouter_model: str = "",
|
||||
openrouter_url: str = "",
|
||||
openrouter_key: str = "",
|
||||
file_id: str,
|
||||
audio_path: str,
|
||||
# 후처리
|
||||
use_ollama: bool = False,
|
||||
ollama_model: str = "",
|
||||
use_openrouter: bool = False,
|
||||
openrouter_model: str = "",
|
||||
openrouter_url: str = "",
|
||||
openrouter_key: str = "",
|
||||
# 자막
|
||||
subtitle_mode: bool = False, # True → 자막 파일 생성
|
||||
subtitle_format: str = "srt", # srt | vtt | both
|
||||
translate_to: str = "", # 번역 대상 언어 코드 (없으면 원어 자막)
|
||||
translate_model: str = "", # 번역에 쓸 모델
|
||||
translate_via: str = "ollama",# ollama | openrouter
|
||||
# 원본 언어 강제 지정 (없으면 auto)
|
||||
force_language: str = "",
|
||||
):
|
||||
self.update_state(state="PROGRESS", meta={"progress":5,"message":"모델 준비 중..."})
|
||||
try:
|
||||
model = get_model()
|
||||
self.update_state(state="PROGRESS", meta={"progress":15,"message":"오디오 분석 중..."})
|
||||
|
||||
lang = force_language.strip() or LANGUAGE
|
||||
segments_gen, info = model.transcribe(
|
||||
audio_path, language=LANGUAGE, beam_size=BEAM_SIZE,
|
||||
initial_prompt=INITIAL_PROMPT, vad_filter=True,
|
||||
vad_parameters=dict(min_silence_duration_ms=500), word_timestamps=False,
|
||||
audio_path,
|
||||
language=lang,
|
||||
beam_size=BEAM_SIZE,
|
||||
initial_prompt=INITIAL_PROMPT,
|
||||
vad_filter=True,
|
||||
vad_parameters=dict(min_silence_duration_ms=500),
|
||||
word_timestamps=False,
|
||||
)
|
||||
|
||||
self.update_state(state="PROGRESS", meta={"progress":30,"message":"텍스트 변환 중..."})
|
||||
@@ -120,63 +257,126 @@ def transcribe_task(
|
||||
duration = info.duration
|
||||
|
||||
for seg in segments_gen:
|
||||
segments.append({"start":round(seg.start,2),"end":round(seg.end,2),"text":seg.text.strip()})
|
||||
segments.append({"start":round(seg.start,3),"end":round(seg.end,3),"text":seg.text.strip()})
|
||||
parts.append(seg.text.strip())
|
||||
if duration > 0:
|
||||
pct = 30 + int((seg.end/duration)*50)
|
||||
pct = 30 + int((seg.end/duration)*45)
|
||||
self.update_state(state="PROGRESS",
|
||||
meta={"progress":min(pct,80),"message":f"변환 중... {seg.end:.0f}s / {duration:.0f}s"})
|
||||
meta={"progress":min(pct,75),
|
||||
"message":f"변환 중... {seg.end:.0f}s / {duration:.0f}s"})
|
||||
|
||||
raw_text = "\n".join(parts)
|
||||
full_text = raw_text
|
||||
detected_lang = info.language
|
||||
|
||||
# Ollama 후처리
|
||||
if use_ollama and ollama_model:
|
||||
# ── 텍스트 후처리 (교정) ──────────────────────────────
|
||||
if use_ollama and ollama_model and not subtitle_mode:
|
||||
self.update_state(state="PROGRESS",
|
||||
meta={"progress":85,"message":f"Ollama({ollama_model}) 후처리 중..."})
|
||||
meta={"progress":80,"message":f"Ollama({ollama_model}) 교정 중..."})
|
||||
full_text = _ollama_postprocess(raw_text, ollama_model)
|
||||
|
||||
# OpenRouter 후처리
|
||||
elif use_openrouter and openrouter_model and openrouter_key:
|
||||
elif use_openrouter and openrouter_model and openrouter_key and not subtitle_mode:
|
||||
self.update_state(state="PROGRESS",
|
||||
meta={"progress":85,"message":f"OpenRouter({openrouter_model}) 후처리 중..."})
|
||||
meta={"progress":80,"message":f"OpenRouter({openrouter_model}) 교정 중..."})
|
||||
full_text = _openrouter_postprocess(raw_text, openrouter_model, openrouter_url, openrouter_key)
|
||||
|
||||
self.update_state(state="PROGRESS", meta={"progress":95,"message":"파일 저장 중..."})
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
output_filename = f"{file_id}.txt"
|
||||
# ── 자막 모드: 번역 ──────────────────────────────────
|
||||
translated_segments = segments
|
||||
is_translated = False
|
||||
|
||||
with open(os.path.join(OUTPUT_DIR, output_filename), "w", encoding="utf-8") as f:
|
||||
f.write(f"# 변환 결과\n# 언어: {info.language} | 재생 시간: {duration:.1f}초")
|
||||
if use_ollama and ollama_model:
|
||||
f.write(f" | Ollama: {ollama_model}")
|
||||
elif use_openrouter and openrouter_model:
|
||||
f.write(f" | OpenRouter: {openrouter_model}")
|
||||
f.write("\n\n## 전체 텍스트\n\n" + full_text + "\n\n")
|
||||
f.write("## 타임스탬프별 세그먼트\n\n")
|
||||
for seg in segments:
|
||||
f.write(f"[{_fmt(seg['start'])} → {_fmt(seg['end'])}] {seg['text']}\n")
|
||||
if subtitle_mode and translate_to and translate_to != detected_lang:
|
||||
t_model = translate_model or (ollama_model if translate_via=="ollama" else openrouter_model)
|
||||
t_via_or = (translate_via == "openrouter" and bool(openrouter_key))
|
||||
self.update_state(state="PROGRESS",
|
||||
meta={"progress":82,
|
||||
"message":f"{_lang_name(translate_to)}로 번역 중..."})
|
||||
translated_segments = _translate_segments(
|
||||
segments, translate_to,
|
||||
use_openrouter=t_via_or,
|
||||
model=t_model,
|
||||
openrouter_url=openrouter_url,
|
||||
openrouter_key=openrouter_key,
|
||||
task_self=self,
|
||||
)
|
||||
is_translated = True
|
||||
# 번역된 전체 텍스트
|
||||
full_text = "\n".join(s["text"] for s in translated_segments)
|
||||
|
||||
self.update_state(state="PROGRESS", meta={"progress":93,"message":"파일 저장 중..."})
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
result_files = {}
|
||||
|
||||
# ── TXT 저장 ─────────────────────────────────────────
|
||||
txt_filename = f"{file_id}.txt"
|
||||
with open(os.path.join(OUTPUT_DIR, txt_filename), "w", encoding="utf-8") as f:
|
||||
f.write(f"# 변환 결과\n")
|
||||
f.write(f"# 언어: {detected_lang} | 재생 시간: {duration:.1f}초\n")
|
||||
if is_translated:
|
||||
f.write(f"# 번역: {_lang_name(translate_to)}\n")
|
||||
f.write(f"\n## 전체 텍스트\n\n{full_text}\n\n")
|
||||
f.write(f"## 타임스탬프별 세그먼트\n\n")
|
||||
for seg in (translated_segments if is_translated else segments):
|
||||
f.write(f"[{_fmt_ts(seg['start'])} → {_fmt_ts(seg['end'])}] {seg['text']}\n")
|
||||
result_files["txt"] = txt_filename
|
||||
|
||||
# ── 자막 파일 저장 ────────────────────────────────────
|
||||
if subtitle_mode:
|
||||
sub_segs = translated_segments if is_translated else segments
|
||||
lang_suffix = f".{translate_to}" if is_translated else f".{detected_lang}"
|
||||
|
||||
if subtitle_format in ("srt", "both"):
|
||||
srt_fn = f"{file_id}{lang_suffix}.srt"
|
||||
with open(os.path.join(OUTPUT_DIR, srt_fn), "w", encoding="utf-8") as f:
|
||||
f.write(_make_srt(sub_segs))
|
||||
result_files["srt"] = srt_fn
|
||||
|
||||
if subtitle_format in ("vtt", "both"):
|
||||
vtt_fn = f"{file_id}{lang_suffix}.vtt"
|
||||
with open(os.path.join(OUTPUT_DIR, vtt_fn), "w", encoding="utf-8") as f:
|
||||
f.write(_make_vtt(sub_segs))
|
||||
result_files["vtt"] = vtt_fn
|
||||
|
||||
# 원본 언어 SRT도 함께 (번역 시)
|
||||
if is_translated and subtitle_format in ("srt","both"):
|
||||
orig_fn = f"{file_id}.{detected_lang}.srt"
|
||||
with open(os.path.join(OUTPUT_DIR, orig_fn), "w", encoding="utf-8") as f:
|
||||
f.write(_make_srt(segments))
|
||||
result_files["srt_original"] = orig_fn
|
||||
|
||||
try: os.remove(audio_path)
|
||||
except: pass
|
||||
|
||||
return {
|
||||
# 기본 STT 결과
|
||||
"text": full_text,
|
||||
"raw_text": raw_text,
|
||||
"segments": segments,
|
||||
"language": info.language,
|
||||
"segments": translated_segments if is_translated else segments,
|
||||
"orig_segments": segments,
|
||||
"language": detected_lang,
|
||||
"duration": round(duration, 1),
|
||||
"output_file": output_filename,
|
||||
"ollama_used": use_ollama and bool(ollama_model),
|
||||
"ollama_model": ollama_model if (use_ollama and ollama_model) else "",
|
||||
"openrouter_used": use_openrouter and bool(openrouter_model) and bool(openrouter_key),
|
||||
"openrouter_model": openrouter_model if (use_openrouter and openrouter_model) else "",
|
||||
# 후처리
|
||||
"ollama_used": use_ollama and bool(ollama_model) and not subtitle_mode,
|
||||
"ollama_model": ollama_model if (use_ollama and not subtitle_mode) else "",
|
||||
"openrouter_used": use_openrouter and bool(openrouter_model) and not subtitle_mode,
|
||||
"openrouter_model": openrouter_model if (use_openrouter and not subtitle_mode) else "",
|
||||
# 자막
|
||||
"subtitle_mode": subtitle_mode,
|
||||
"subtitle_format": subtitle_format,
|
||||
"translated": is_translated,
|
||||
"translate_to": translate_to if is_translated else "",
|
||||
"translate_model": translate_model if is_translated else "",
|
||||
# 파일
|
||||
"output_file": result_files.get("txt",""),
|
||||
"srt_file": result_files.get("srt",""),
|
||||
"vtt_file": result_files.get("vtt",""),
|
||||
"srt_original_file": result_files.get("srt_original",""),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"변환 실패: {str(e)}")
|
||||
|
||||
|
||||
def _fmt(s):
|
||||
def _fmt_ts(s: float) -> str:
|
||||
m, sec = divmod(int(s), 60)
|
||||
return f"{m:02d}:{sec:02d}"
|
||||
|
||||
Reference in New Issue
Block a user