""" OCR Celery Tasks — PaddleOCR 3.x + Ollama Vision + OpenRouter Vision backend: paddle → PaddleOCR 3.x 로컬 (PPStructure 제거됨, 표는 마크다운 파싱) ollama → Ollama Vision API openrouter → OpenRouter Vision API (OpenAI 호환) """ import os, base64, json import httpx from celery import Celery import openpyxl from openpyxl.styles import Font, PatternFill, Alignment, Border, Side REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0") OUTPUT_DIR = os.getenv("OUTPUT_DIR", "/data/outputs") OCR_LANG = os.getenv("OCR_LANG", "korean") OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.0.126:11434") OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "600")) celery_app = Celery("ocr_tasks", broker=REDIS_URL, backend=REDIS_URL) celery_app.conf.update( task_serializer="json", result_serializer="json", accept_content=["json"], task_track_started=True, result_expires=3600, ) # PaddleOCR 싱글톤 _ocr_engine = None def get_ocr(): global _ocr_engine if _ocr_engine is None: from paddleocr import PaddleOCR print(f"[PaddleOCR] 로딩 (lang={OCR_LANG})") _ocr_engine = PaddleOCR(use_angle_cls=True, lang=OCR_LANG) print("[PaddleOCR] 완료") return _ocr_engine # ════════════════════════════════════════════════════════════════ # 메인 Celery Task # 인자: file_id, image_path, mode, backend, # ollama_model, openrouter_model, # openrouter_url, openrouter_key, # custom_prompt # ════════════════════════════════════════════════════════════════ @celery_app.task(bind=True, name="tasks.ocr_task", queue="ocr") def ocr_task( self, file_id: str, image_path: str, mode: str = "text", backend: str = "paddle", ollama_model: str = "granite3.2-vision", openrouter_model: str = "", openrouter_url: str = "", openrouter_key: str = "", custom_prompt: str = "", ): self.update_state(state="PROGRESS", meta={"progress": 8, "message": "엔진 준비 중..."}) try: if backend == "openrouter": result = _run_openrouter( self, file_id, image_path, mode, openrouter_model, openrouter_url, openrouter_key, custom_prompt ) elif backend == "ollama": result = _run_ollama( self, file_id, image_path, mode, ollama_model, custom_prompt ) else: result = _run_paddle(self, file_id, image_path, mode) try: os.remove(image_path) except: pass return result except Exception as e: try: os.remove(image_path) except: pass raise Exception(f"OCR 실패: {str(e)}") # ════════════════════════════════════════════════════════════════ # 공통 프롬프트 # ════════════════════════════════════════════════════════════════ _PROMPT_TEXT = ( "이 이미지에서 모든 텍스트를 정확하게 추출해줘. " "원본의 줄 구분과 단락 구조를 최대한 유지해줘. " "이미지에 없는 내용은 절대 추가하지 마." ) _PROMPT_STRUCTURE = ( "이 이미지를 분석해서 다음을 수행해줘:\n" "1. 표(table)가 있으면 반드시 마크다운 표 형식(| col | col |)으로 변환\n" "2. 나머지 텍스트는 원본 구조를 유지하며 추출\n" "3. 표와 텍스트를 구분해서 순서대로 출력\n" "이미지에 없는 내용은 추가하지 마." ) def _get_prompt(mode, custom_prompt): if custom_prompt and custom_prompt.strip(): return custom_prompt.strip() return _PROMPT_STRUCTURE if mode == "structure" else _PROMPT_TEXT # ════════════════════════════════════════════════════════════════ # OpenRouter Vision 백엔드 # ════════════════════════════════════════════════════════════════ def _run_openrouter(task, file_id, image_path, mode, model, base_url, api_key, custom_prompt): if not api_key: raise Exception("OpenRouter API 키가 설정되지 않았습니다. 설정 → OpenRouter에서 저장하세요.") if not model: raise Exception("OpenRouter 모델이 선택되지 않았습니다.") task.update_state(state="PROGRESS", meta={"progress": 15, "message": f"OpenRouter ({model}) 연결 중..."}) # 이미지 → base64 data URL with open(image_path, "rb") as f: raw = f.read() ext = image_path.rsplit(".", 1)[-1].lower() mime = {"jpg":"image/jpeg","jpeg":"image/jpeg","png":"image/png", "bmp":"image/bmp","gif":"image/gif","webp":"image/webp"}.get(ext, "image/jpeg") data_url = f"data:{mime};base64,{base64.b64encode(raw).decode()}" prompt = _get_prompt(mode, custom_prompt) task.update_state(state="PROGRESS", meta={"progress": 30, "message": "모델 추론 중..."}) try: resp = httpx.post( f"{base_url.rstrip('/')}/chat/completions", headers={ "Authorization": f"Bearer {api_key}", "HTTP-Referer": "https://voicescript.local", "X-Title": "VoiceScript", "Content-Type": "application/json", }, json={ "model": model, "messages": [{ "role": "user", "content": [ {"type": "image_url", "image_url": {"url": data_url}}, {"type": "text", "text": prompt}, ], }], "temperature": 0.1, }, timeout=float(OLLAMA_TIMEOUT), ) resp.raise_for_status() except httpx.HTTPStatusError as e: body = "" try: body = e.response.json().get("error", {}).get("message", "") except: pass if e.response.status_code == 400: raise Exception( f"이 모델은 이미지를 지원하지 않습니다.\n" f"Vision 기능을 지원하는 모델을 선택하세요 (Claude-3, GPT-4o, Gemini 등)\n" f"모델: {model}" ) raise Exception(f"OpenRouter 오류 ({e.response.status_code}): {body or str(e)}") except httpx.TimeoutException: raise Exception(f"OpenRouter 응답 시간 초과 ({OLLAMA_TIMEOUT}초). OLLAMA_TIMEOUT 값을 늘려주세요.") task.update_state(state="PROGRESS", meta={"progress": 85, "message": "결과 저장 중..."}) choices = resp.json().get("choices", []) if not choices: raise Exception("OpenRouter 빈 응답") full_text = choices[0]["message"]["content"].strip() if not full_text: raise Exception("OpenRouter 빈 응답") return _build_result( task, file_id, full_text, mode, backend="openrouter", ollama_model="", openrouter_model=model ) # ════════════════════════════════════════════════════════════════ # Ollama Vision 백엔드 # ════════════════════════════════════════════════════════════════ def _run_ollama(task, file_id, image_path, mode, ollama_model, custom_prompt): task.update_state(state="PROGRESS", meta={"progress": 15, "message": f"Ollama ({ollama_model}) 연결 중..."}) with open(image_path, "rb") as f: img_b64 = base64.b64encode(f.read()).decode() prompt = _get_prompt(mode, custom_prompt) task.update_state(state="PROGRESS", meta={"progress": 30, "message": "모델 추론 중..."}) try: resp = httpx.post( f"{OLLAMA_URL}/api/chat", json={ "model": ollama_model, "messages": [{"role": "user", "content": prompt, "images": [img_b64]}], "stream": False, "options": {"temperature": 0.1}, }, timeout=float(OLLAMA_TIMEOUT), ) resp.raise_for_status() except httpx.ConnectError: raise Exception(f"Ollama 서버 연결 실패 ({OLLAMA_URL})") except httpx.TimeoutException: raise Exception(f"Ollama 응답 시간 초과 ({OLLAMA_TIMEOUT}초). 설정에서 타임아웃을 늘려주세요.") task.update_state(state="PROGRESS", meta={"progress": 85, "message": "결과 저장 중..."}) full_text = resp.json().get("message", {}).get("content", "").strip() if not full_text: raise Exception( f"Ollama 빈 응답.\n" f"이 모델이 Vision(이미지)을 지원하는지 확인하세요: {ollama_model}\n" f"Vision 지원 모델: granite3.2-vision, llava 등" ) return _build_result( task, file_id, full_text, mode, backend="ollama", ollama_model=ollama_model, openrouter_model="" ) # ════════════════════════════════════════════════════════════════ # PaddleOCR 백엔드 (3.x — PPStructure 미사용) # ════════════════════════════════════════════════════════════════ def _run_paddle(task, file_id, image_path, mode): import cv2 img = cv2.imread(image_path) if img is None: raise ValueError("이미지를 읽을 수 없습니다. 지원 형식: jpg, png, bmp, tiff, webp") task.update_state(state="PROGRESS", meta={"progress": 30, "message": "텍스트 인식 중..."}) os.makedirs(OUTPUT_DIR, exist_ok=True) result = get_ocr().ocr(img) task.update_state(state="PROGRESS", meta={"progress": 80, "message": "결과 정리 중..."}) lines = [] if result and len(result) > 0: r = result[0] if isinstance(r, dict): # PaddleOCR 3.x 딕셔너리 형태 texts = r.get("rec_texts", []) scores = r.get("rec_scores", []) polys = r.get("rec_polys", [None] * len(texts)) for text, conf, poly in zip(texts, scores, polys): if text.strip(): lines.append({ "text": text, "confidence": round(float(conf), 3), "bbox": poly.tolist() if poly is not None and hasattr(poly, 'tolist') else [], }) elif isinstance(r, list): # 구버전 호환 [[bbox, (text, conf)], ...] for item in r: if item and len(item) == 2: bbox, (text, conf) = item if text.strip(): lines.append({"text": text, "confidence": round(float(conf), 3), "bbox": []}) full_text = "\n".join(l["text"] for l in lines) # structure 모드: 텍스트에서 마크다운 표 파싱 시도 tables = [] xlsx_file = None if mode == "structure": tables = _parse_md_tables(full_text) if tables: xlsx_file = f"{file_id}_tables.xlsx" _save_excel(tables, os.path.join(OUTPUT_DIR, xlsx_file)) txt_file = f"{file_id}_ocr.txt" with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f: f.write(full_text) tables_html = [_md_table_to_html(t) for t in tables] tables_meta = [{"html": h, "rows": len(t), "cols": max(len(r) for r in t) if t else 0} for h, t in zip(tables_html, tables)] return { "mode": mode, "backend": "paddle", "ollama_model": "", "openrouter_model": "", "full_text": full_text, "lines": lines, "line_count": len(lines), "txt_file": txt_file, "tables": tables_meta, "xlsx_file": xlsx_file, } # ════════════════════════════════════════════════════════════════ # 공통 결과 빌더 (Ollama / OpenRouter 공용) # ════════════════════════════════════════════════════════════════ def _build_result(task, file_id, full_text, mode, backend, ollama_model, openrouter_model): """마크다운 표 파싱 → Excel 생성 → 결과 딕셔너리 반환""" os.makedirs(OUTPUT_DIR, exist_ok=True) tables = _parse_md_tables(full_text) if mode == "structure" else [] txt_file = f"{file_id}_ocr.txt" label = ollama_model if backend == "ollama" else openrouter_model with open(os.path.join(OUTPUT_DIR, txt_file), "w", encoding="utf-8") as f: f.write(f"# OCR 결과 ({backend} / {label})\n\n{full_text}") xlsx_file = None if tables: task.update_state(state="PROGRESS", meta={"progress": 92, "message": "Excel 생성 중..."}) xlsx_file = f"{file_id}_tables.xlsx" _save_excel(tables, os.path.join(OUTPUT_DIR, xlsx_file)) tables_html = [_md_table_to_html(t) for t in tables] tables_meta = [{"html": h, "rows": len(t), "cols": max(len(r) for r in t) if t else 0} for h, t in zip(tables_html, tables)] lines = [{"text": l, "confidence": 1.0, "bbox": []} for l in full_text.splitlines() if l.strip()] return { "mode": mode, "backend": backend, "ollama_model": ollama_model, "openrouter_model": openrouter_model, "full_text": full_text, "lines": lines, "line_count": len(lines), "txt_file": txt_file, "tables": tables_meta, "xlsx_file": xlsx_file, } # ════════════════════════════════════════════════════════════════ # 마크다운 표 파싱 # ════════════════════════════════════════════════════════════════ def _parse_md_tables(text: str) -> list: """텍스트에서 마크다운 표 추출 → [[row, row, ...], ...]""" tables, current = [], [] for line in text.splitlines(): s = line.strip() if s.startswith("|") and s.endswith("|"): # 구분선 (|---|---) 건너뜀 if all(c in "| -:" for c in s): continue cells = [c.strip() for c in s.strip("|").split("|")] current.append(cells) else: if len(current) >= 2: tables.append(current) current = [] if len(current) >= 2: tables.append(current) return tables def _md_table_to_html(table: list) -> str: if not table: return "" rows = "" for i, row in enumerate(table): tag = "th" if i == 0 else "td" rows += "