diff --git a/lib/camoufox_anilife.py b/lib/camoufox_anilife.py index 5c4aa34..4003fec 100644 --- a/lib/camoufox_anilife.py +++ b/lib/camoufox_anilife.py @@ -13,40 +13,34 @@ import re import os async def _wait_for_aldata(page, timeout=8): - """_aldata 변수가 나타날 때까지 폴링 (최대 timeout초)""" + """_aldata 변수가 나타날 때까지 고속 폴링 (50ms)""" start_time = asyncio.get_event_loop().time() while asyncio.get_event_loop().time() - start_time < timeout: try: - # 1. JS 변수 확인 + # 1. JS 변수 확인 (가장 빠름) aldata = await page.evaluate("typeof _aldata !== 'undefined' ? _aldata : null") if aldata: return aldata, "JS" - # 2. HTML 소스 패턴 확인 + # 2. HTML 소스 패턴 확인 (커밋 직후에 바로 걸릴 수 있음) html = await page.content() match = re.search(r'_aldata\s*=\s*["\']([A-Za-z0-9+/=]+)["\']', html) if match: return match.group(1), "HTML" except: pass - await asyncio.sleep(0.2) + await asyncio.sleep(0.05) # 50ms로 단축 return None, None -async def _run_browser(browser, detail_url, episode_num, result): - """최적화된 브라우저 작업 수행""" +async def _run_browser(browser, detail_url, episode_num, result, provider_url=None): + """세션 유지 + 직접 리다이렉트 방식 (클릭/새창 없음)""" start_time_all = asyncio.get_event_loop().time() page = await browser.new_page() - # 공격적 리소스 및 트래킹 차단 + # 리소스 차단 (스크립트는 허용) async def intercept(route): - req_url = route.request.url.lower() resource_type = route.request.resource_type - - # 차단 목록: 이미지, 미디어, 폰트, 스타일시트, 분석/광고 스크립트 - block_patterns = ["google-analytics", "googletagmanager", "facebook.net", "ads"] - block_types = ["image", "media", "font", "stylesheet"] - - if resource_type in block_types or any(p in req_url for p in block_patterns): + if resource_type in ["image", "media", "font", "stylesheet"]: await route.abort() else: await route.continue_() @@ -54,78 +48,55 @@ async def _run_browser(browser, detail_url, episode_num, result): await page.route("**/*", intercept) try: - # 1. Detail 페이지 이동 + # 1. Detail 페이지 이동 → 세션/쿠키 획득 t_nav_start = asyncio.get_event_loop().time() - print(f"1. Navigating: {detail_url}", file=sys.stderr) - await page.goto(detail_url, wait_until="commit", timeout=15000) - print(f" Navigation took: {round(asyncio.get_event_loop().time() - t_nav_start, 2)}s", file=sys.stderr) + print(f"1. Session: {detail_url}", file=sys.stderr) + await page.goto(detail_url, wait_until="commit", timeout=10000) + print(f" Done in {round(asyncio.get_event_loop().time() - t_nav_start, 2)}s", file=sys.stderr) - # 2. 에피소드 링크 찾기 및 클릭 + # 2. 에피소드 링크에서 href 추출 (클릭 X) t_find_start = asyncio.get_event_loop().time() - print(f"2. Searching episode {episode_num}...", file=sys.stderr) - episode_link = None - for _ in range(20): # 약 4초 - try: - # epl-num 텍스트 매칭 - episode_link = page.locator(f'a:has(.epl-num:text("{episode_num}"))').first - if await episode_link.is_visible(): - break - - # 대체: provider 링크 - links = await page.locator('a[href*="/ani/provider/"]').all() - for link in links: - if episode_num in await link.inner_text(): - episode_link = link - break - if episode_link: break - except: pass - await asyncio.sleep(0.2) + print(f"2. Finding ep {episode_num} link...", file=sys.stderr) - if not episode_link: - result["error"] = f"Episode {episode_num} not found" - return result - - print(f" Finding link took: {round(asyncio.get_event_loop().time() - t_find_start, 2)}s", file=sys.stderr) - - # 3. 에피소드 클릭 - t_click_start = asyncio.get_event_loop().time() + episode_link = page.locator(f'a:has(.epl-num:text("{episode_num}"))').first + for _ in range(20): + if await episode_link.is_visible(): break + await asyncio.sleep(0.1) + + # 클릭 방식으로 네비게이션 (직접 URL 접근은 사이트에서 막힘) + print(f" Link found in {round(asyncio.get_event_loop().time() - t_find_start, 2)}s. Clicking...", file=sys.stderr) await episode_link.click() - # 4. _aldata 추출 (최대 6초 폴링) + # 3. _aldata 추출 (고속 폴링) + print("3. Extracting _aldata...", file=sys.stderr) aldata, source = await _wait_for_aldata(page, timeout=6) + # 버튼 클릭 폴백 + if not aldata: + print(" Trying player button...", file=sys.stderr) + btn = page.locator('a[onclick*="moveCloudvideo"], a[onclick*="moveJawcloud"]').first + for _ in range(20): # 2초 대기 + if await btn.is_visible(): break + await asyncio.sleep(0.1) + if await btn.is_visible(): + await btn.click(force=True) + aldata, source = await _wait_for_aldata(page, timeout=4) + if aldata: source = f"{source}-btn" + if aldata: elapsed = asyncio.get_event_loop().time() - start_time_all - result.update({ - "aldata": aldata, "success": True, - "elapsed": round(elapsed, 2), "source": source - }) - print(f" SUCCESS! Extracted via {source} in {result['elapsed']}s", file=sys.stderr) + result.update({"aldata": aldata, "success": True, "elapsed": round(elapsed, 2), "source": source}) + print(f" SUCCESS in {result['elapsed']}s ({source})", file=sys.stderr) return result - - # 5. 최후의 수단: 플레이어 버튼 클릭 시도 - print(f" Initial extraction failed ({round(asyncio.get_event_loop().time() - t_click_start, 2)}s). Trying player button...", file=sys.stderr) - btn = page.locator('a[onclick*="moveCloudvideo"], a[onclick*="moveJawcloud"]').first - if await btn.is_visible(timeout=1500): - await btn.click() - aldata, source = await _wait_for_aldata(page, timeout=4) - if aldata: - elapsed = asyncio.get_event_loop().time() - start_time_all - result.update({ - "aldata": aldata, "success": True, - "elapsed": round(elapsed, 2), "source": f"{source}-player" - }) - print(f" SUCCESS! Got aldata via player in {result['elapsed']}s", file=sys.stderr) - return result - - result["error"] = "Aldata extraction failed" + + result["error"] = "Failed to extract aldata" finally: await page.close() return result -async def extract_aldata(detail_url: str, episode_num: str) -> dict: +async def extract_aldata(detail_url: str, episode_num: str, provider_url: str = None) -> dict: """AsyncCamoufox Stealth-Headless mode""" try: from camoufox.async_api import AsyncCamoufox @@ -137,7 +108,7 @@ async def extract_aldata(detail_url: str, episode_num: str) -> dict: try: # Camoufox는 headless=True에서도 강력한 스텔스를 제공함 (Xvfb 오버헤드 불필요) async with AsyncCamoufox(headless=True) as browser: - return await _run_browser(browser, detail_url, episode_num, result) + return await _run_browser(browser, detail_url, episode_num, result, provider_url) except Exception as e: result["error"] = str(e) @@ -148,9 +119,13 @@ if __name__ == "__main__": if len(sys.argv) < 3: sys.exit(1) + detail_url = sys.argv[1] + episode_num = sys.argv[2] + provider_url = sys.argv[3] if len(sys.argv) > 3 else None + # stdout에는 오직 JSON만 출력하도록 보장 try: - res = asyncio.run(extract_aldata(sys.argv[1], sys.argv[2])) + res = asyncio.run(extract_aldata(detail_url, episode_num, provider_url)) # 최종 JSON 결과 출력 print(json.dumps(res, ensure_ascii=False)) except Exception as e: diff --git a/lib/test_camoufox_direct.py b/lib/test_camoufox_direct.py new file mode 100644 index 0000000..5bd9a03 --- /dev/null +++ b/lib/test_camoufox_direct.py @@ -0,0 +1,48 @@ +import asyncio +import sys +import json +import re +from camoufox.async_api import AsyncCamoufox + +async def test_extraction(detail_url, provider_url): + print(f"Testing direct navigation with Referer...") + async with AsyncCamoufox(headless=True) as browser: + page = await browser.new_page() + + # 1. Detail page (establish Referer) + print(f"1. Estabilishing Referer: {detail_url}") + t1 = asyncio.get_event_loop().time() + await page.goto(detail_url, wait_until="commit") + print(f" Took {round(asyncio.get_event_loop().time() - t1, 2)}s") + + # 2. Same-session direct navigation to provider + print(f"2. Navigating directly to provider: {provider_url}") + t2 = asyncio.get_event_loop().time() + await page.goto(provider_url, wait_until="commit") + print(f" Took {round(asyncio.get_event_loop().time() - t2, 2)}s") + + # 3. Check for aldata + html = await page.content() + final_url = page.url + print(f"Final URL: {final_url}") + + if "google.com" in final_url: + print("FAILED: Redirected to Google (Bot detection triggered)") + else: + match = re.search(r'_aldata\s*=\s*["\']([A-Za-z0-9+/=]+)["\']', html) + if match: + print("SUCCESS: Got aldata via direct navigation!") + else: + print("FAILED: Aldata not found in HTML") + +if __name__ == "__main__": + # Sample URLs for testing + # Note: These are placeholders, I will use real ones if available from logs + d_url = "https://anilife.live/detail/id/2967" + p_url = "https://anilife.live/ani/provider/31db6215-62bb-420a-8d18-9717013854eb" + + if len(sys.argv) > 2: + d_url = sys.argv[1] + p_url = sys.argv[2] + + asyncio.run(test_extraction(d_url, p_url)) diff --git a/mod_anilife.py b/mod_anilife.py index bd14b3a..9e8cdbe 100644 --- a/mod_anilife.py +++ b/mod_anilife.py @@ -1281,7 +1281,7 @@ class AniLifeQueueEntity(FfmpegQueueEntity): import json as json_module # 셋업 확인 (이미 완료되었으면 즉시 반환, 아니면 대기) - if not self.ensure_camoufox_installed(): + if not self.module_logic.ensure_camoufox_installed(): logger.error("Camoufox installation failed. Cannot proceed.") return @@ -1291,28 +1291,51 @@ class AniLifeQueueEntity(FfmpegQueueEntity): # detail_url과 episode_num 추출 detail_url = self.info.get("ep_url", f"https://anilife.live/detail/id/{self.info.get('content_code', '')}") episode_num = str(self.info.get("ep_num", "1")) + provider_url = self.info.get("va") # 직접 진입용 프로바이더 URL + if provider_url and provider_url.startswith("/"): + provider_url = f"https://anilife.live{provider_url}" logger.debug(f"Running Camoufox subprocess: {script_path}") - logger.debug(f"Detail URL: {detail_url}, Episode: {episode_num}") + logger.debug(f"Detail URL: {detail_url}, Episode: {episode_num}, Provider: {provider_url}") - # subprocess로 Camoufox 스크립트 실행 - result = subprocess.run( - [sys.executable, script_path, detail_url, episode_num], - capture_output=True, - text=True, - timeout=120 # 120초 타임아웃 + # subprocess로 Camoufox 스크립트 실행 (stderr 실시간 로그 연동) + cmd = [sys.executable, script_path, detail_url, episode_num] + if provider_url: + cmd.append(provider_url) + + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True ) - if result.returncode != 0: - logger.error(f"Camoufox subprocess failed: {result.stderr}") - raise Exception(f"Subprocess error: {result.stderr}") + # stderr를 실시간으로 logger.info에 기록 (진단 가시성 확보) + stdout_data = [] + import threading + def log_stderr(pipe): + for line in iter(pipe.readline, ''): + if line.strip(): + logger.info(f"[Camoufox] {line.strip()}") - # JSON 결과 파싱 (엄격한 분리를 통해 stdout에는 JSON만 남음) + stderr_thread = threading.Thread(target=log_stderr, args=(process.stderr,)) + stderr_thread.start() + + # stdout 캡처 (JSON 결과) + for line in iter(process.stdout.readline, ''): + stdout_data.append(line) + + process.wait(timeout=120) + stderr_thread.join(timeout=5) + + stdout_full = "".join(stdout_data) + + # JSON 결과 파싱 try: - cf_result = json_module.loads(result.stdout) + cf_result = json_module.loads(stdout_full) except json_module.JSONDecodeError as e: logger.error(f"Failed to parse Camoufox result: {e}") - logger.error(f"Raw stdout: {result.stdout}") + logger.debug(f"Raw stdout: {stdout_full}") return elapsed = cf_result.get("elapsed", "?")