98 lines
3.6 KiB
Python
98 lines
3.6 KiB
Python
import os
|
|
from celery import Celery
|
|
|
|
# ocr_task를 직접 import해서 worker에 등록
|
|
from ocr_tasks import ocr_task # noqa: F401
|
|
|
|
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
|
|
MODEL_SIZE = os.getenv("WHISPER_MODEL", "medium")
|
|
DEVICE = os.getenv("WHISPER_DEVICE", "cpu")
|
|
COMPUTE_TYPE = os.getenv("WHISPER_COMPUTE_TYPE", "int8")
|
|
LANGUAGE = os.getenv("WHISPER_LANGUAGE", "ko") or None
|
|
BEAM_SIZE = int(os.getenv("WHISPER_BEAM_SIZE", "5"))
|
|
INITIAL_PROMPT = os.getenv("WHISPER_INITIAL_PROMPT", "") or None
|
|
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs")
|
|
|
|
celery_app = Celery("whisper_tasks", broker=REDIS_URL, backend=REDIS_URL)
|
|
celery_app.conf.update(
|
|
task_serializer="json",
|
|
result_serializer="json",
|
|
accept_content=["json"],
|
|
task_track_started=True,
|
|
result_expires=3600,
|
|
)
|
|
|
|
_model = None
|
|
|
|
def get_model():
|
|
global _model
|
|
if _model is None:
|
|
from faster_whisper import WhisperModel
|
|
print(f"[Whisper] 로딩: {MODEL_SIZE} / {DEVICE} / {COMPUTE_TYPE}")
|
|
_model = WhisperModel(MODEL_SIZE, device=DEVICE, compute_type=COMPUTE_TYPE)
|
|
print("[Whisper] 로드 완료")
|
|
return _model
|
|
|
|
|
|
@celery_app.task(bind=True, name="tasks.transcribe_task", queue="stt")
|
|
def transcribe_task(self, file_id: str, audio_path: str):
|
|
self.update_state(state="PROGRESS", meta={"progress": 5, "message": "모델 준비 중..."})
|
|
try:
|
|
model = get_model()
|
|
self.update_state(state="PROGRESS", meta={"progress": 15, "message": "오디오 분석 중..."})
|
|
|
|
segments_gen, info = model.transcribe(
|
|
audio_path,
|
|
language=LANGUAGE,
|
|
beam_size=BEAM_SIZE,
|
|
initial_prompt=INITIAL_PROMPT,
|
|
vad_filter=True,
|
|
vad_parameters=dict(min_silence_duration_ms=500),
|
|
word_timestamps=False,
|
|
)
|
|
|
|
self.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 변환 중..."})
|
|
|
|
segments, full_text_parts = [], []
|
|
duration = info.duration
|
|
|
|
for seg in segments_gen:
|
|
segments.append({"start": round(seg.start, 2),
|
|
"end": round(seg.end, 2),
|
|
"text": seg.text.strip()})
|
|
full_text_parts.append(seg.text.strip())
|
|
if duration > 0:
|
|
pct = 30 + int((seg.end / duration) * 60)
|
|
self.update_state(
|
|
state="PROGRESS",
|
|
meta={"progress": min(pct, 90),
|
|
"message": f"변환 중... {seg.end:.0f}s / {duration:.0f}s"},
|
|
)
|
|
|
|
full_text = "\n".join(full_text_parts)
|
|
|
|
self.update_state(state="PROGRESS", meta={"progress": 95, "message": "파일 저장 중..."})
|
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
|
output_filename = f"{file_id}.txt"
|
|
with open(os.path.join(OUTPUT_DIR, output_filename), "w", encoding="utf-8") as f:
|
|
f.write(f"# 변환 결과\n# 언어: {info.language} | 재생 시간: {duration:.1f}초\n\n")
|
|
f.write("## 전체 텍스트\n\n" + full_text + "\n\n")
|
|
f.write("## 타임스탬프별 세그먼트\n\n")
|
|
for seg in segments:
|
|
f.write(f"[{_fmt(seg['start'])} → {_fmt(seg['end'])}] {seg['text']}\n")
|
|
|
|
try: os.remove(audio_path)
|
|
except: pass
|
|
|
|
return {"text": full_text, "segments": segments,
|
|
"language": info.language, "duration": round(duration, 1),
|
|
"output_file": output_filename}
|
|
|
|
except Exception as e:
|
|
raise Exception(f"변환 실패: {str(e)}")
|
|
|
|
|
|
def _fmt(s):
|
|
m, sec = divmod(int(s), 60)
|
|
return f"{m:02d}:{sec:02d}"
|