v0.6.23: Fix Linkkf download - CDP Headers wrapper, yt-dlp --add-header support

- Fix zendriver_daemon CDP Headers bug (wrap dict with zd.cdp.network.Headers()) - Fix HTML entity decoding in iframe URLs (use html.unescape()) - Simplify GDM source_type to always use 'linkkf'
2026-01-08 01:29:36 +09:00
parent d1866111c7
commit 24217712a6
7 changed files with 181 additions and 79 deletions
@@ -81,6 +81,13 @@

 ## 📝 변경 이력 (Changelog)

+### v0.6.23 (2026-01-08)
+- **Linkkf 다운로드 완전 복구**:
+    - **Zendriver Daemon CDP 헤더 버그 수정**: `zd.cdp.network.Headers()` 타입 래핑 누락으로 Referer 헤더가 적용되지 않던 문제 해결.
+    - **HTML 엔티티 디코딩 개선**: iframe URL의 `&amp;` 등 HTML 엔티티를 `html.unescape()`로 올바르게 디코딩.
+    - **GDM yt-dlp 헤더 전달**: `--add-header` 옵션으로 Referer/User-Agent를 yt-dlp에 전달하여 CDN 리다이렉트 방지.
+- **부수 효과**: Ohli24 등 모든 브라우저 기반 추출에서 동일한 헤더 적용 개선.
+
 ### v0.6.22 (2026-01-08)
 - **Linkkf 추출 로직 강화**: Cloudflare 보호가 강화된 Linkkf 도메인(flexora.xyz 등)에 대응하기 위해 브라우저 기반(Zendriver/Camoufox) 추출 엔진을 도입했습니다.
 - **오추출 방지**: 광고나 서비스 차단 페이지(Google Cloud 등)의 iframe을 비디오 URL로 오인하는 문제를 수정했습니다.
@@ -1,5 +1,5 @@
 title: "애니 다운로더"
-version: 0.6.22
+version: 0.6.23
 package_name: "anime_downloader"
 developer: "projectdx"
 description: "anime downloader"
@@ -0,0 +1,31 @@
+import asyncio
+import zendriver as zd
+import json
+import os
+
+async def test():
+    try:
+        browser = await zd.start(headless=True)
+        page = await browser.get("about:blank")
+        
+        # Test header setting
+        headers = {"Referer": "https://v2.linkkf.app/"}
+        try:
+            await page.send(zd.cdp.network.enable())
+            headers_obj = zd.cdp.network.Headers(headers)
+            await page.send(zd.cdp.network.set_extra_http_headers(headers_obj))
+            print("Successfully set headers")
+        except Exception as e:
+            print(f"Failed to set headers: {e}")
+            import traceback
+            traceback.print_exc()
+
+        methods = [m for m in dir(page) if not m.startswith("_")]
+        print(json.dumps({"methods": methods}))
+        await browser.stop()
+    except Exception as e:
+        import traceback
+        print(json.dumps({"error": str(e), "traceback": traceback.format_exc()}))
+
+if __name__ == "__main__":
+    asyncio.run(test())
@@ -109,6 +109,7 @@ class ZendriverHandler(BaseHTTPRequestHandler):
                data: Dict[str, Any] = json.loads(body)
                
                url: Optional[str] = data.get("url")
+                headers: Optional[Dict[str, str]] = data.get("headers")
                timeout: int = cast(int, data.get("timeout", 30))
                
                if not url:
@@ -118,7 +119,7 @@ class ZendriverHandler(BaseHTTPRequestHandler):
                # 비동기 fetch 실행
                if loop:
                    future = asyncio.run_coroutine_threadsafe(
-                        fetch_with_browser(url, timeout), loop
+                        fetch_with_browser(url, timeout, headers), loop
                    )
                    result: Dict[str, Any] = future.result(timeout=timeout + 15)
                    self._send_json(200, result)
@@ -254,8 +255,8 @@ async def ensure_browser() -> Any:
    return browser


-async def fetch_with_browser(url: str, timeout: int = 30) -> Dict[str, Any]:
-    """상시 대기 브라우저로 HTML 페칭 (탭 유지 방식)"""
+async def fetch_with_browser(url: str, timeout: int = 30, headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
+    """상시 대기 브라우저로 HTML 페칭 (탭 유지 방식, 헤더 지원)"""
    global browser
    
    result: Dict[str, Any] = {"success": False, "html": "", "elapsed": 0.0}
@@ -283,7 +284,22 @@ async def fetch_with_browser(url: str, timeout: int = 30) -> Dict[str, Any]:
            
            # 페이지 로드 시도
            try:
-                page = await asyncio.wait_for(browser.get(url), timeout=20)
+                # 탭(페이지) 열기 (브라우저가 없으면 생성)
+                page = await browser.get("about:blank") # 새 탭 열기 대신 기존 탭 재활용 혹은 about:blank 이동
+
+                # 헤더 설정 (CDP 사용)
+                if headers:
+                    try:
+                        log_debug(f"[ZendriverDaemon] Setting headers: {list(headers.keys())}")
+                        await page.send(zd.cdp.network.enable())
+                        # Wrap dict with Headers type for CDP compatibility
+                        cdp_headers = zd.cdp.network.Headers(headers)
+                        await page.send(zd.cdp.network.set_extra_http_headers(cdp_headers))
+                    except Exception as e:
+                        log_debug(f"[ZendriverDaemon] Failed to set headers: {e}")
+
+                # 실제 페이지 로드
+                await asyncio.wait_for(page.get(url), timeout=20)
                nav_elapsed = time.time() - nav_start
            except asyncio.TimeoutError:
                log_debug(f"[ZendriverDaemon] Navigation timeout after 20s")
@@ -543,7 +543,7 @@ class LogicLinkkf(AnimeModuleBase):
        try:
            if LogicOhli24.is_zendriver_daemon_running():
                logger.info(f"[Linkkf] Trying Zendriver Daemon: {url}")
-                daemon_res = LogicOhli24.fetch_via_daemon(url, timeout=30)
+                daemon_res = LogicOhli24.fetch_via_daemon(url, timeout=30, headers=LogicLinkkf.headers)
                if daemon_res.get("success") and daemon_res.get("html"):
                    elapsed = time.time() - start_time
                    logger.info(f"[Linkkf] Daemon success in {elapsed:.2f}s")
@@ -712,9 +712,9 @@ class LogicLinkkf(AnimeModuleBase):
            
            if iframe and iframe.get("src"):
                iframe_src = iframe.get("src")
-                # HTML entity decoding (&#038; -> &)
-                if "&#038;" in iframe_src:
-                    iframe_src = iframe_src.replace("&#038;", "&")
+                # HTML entity decoding (&#038; -> &, &amp; -> &, etc.)
+                import html as html_lib
+                iframe_src = html_lib.unescape(iframe_src)
                
                logger.info(f"Found player iframe: {iframe_src}")
                
@@ -725,46 +725,67 @@ class LogicLinkkf(AnimeModuleBase):
                    return None, iframe_src, None
                
                # m3u8 URL 패턴 찾기 (더 정밀하게)
-                # 패턴 1: url: 'https://...m3u8'
+                # 패턴 1: url: 'https://...m3u8' 또는 url: "https://...m3u8"
                m3u8_pattern = re.compile(r"url:\s*['\"]([^'\"]*\.m3u8[^'\"]*)['\"]")
                m3u8_match = m3u8_pattern.search(iframe_content)
                
                # 패턴 2: <source src="https://...m3u8">
                if not m3u8_match:
-                    source_pattern = re.compile(r"<source[^>]+src=['\"]([^'\"]*\.m3u8[^'\"]*)['\"]")
+                    source_pattern = re.compile(r"<source[^>]+src=['\"]([^'\"]*\.m3u8[^'\"]*)['\"]", re.IGNORECASE)
                    m3u8_match = source_pattern.search(iframe_content)
                
                # 패턴 3: var src = '...m3u8'
                if not m3u8_match:
-                    src_pattern = re.compile(r"src\s*=\s*['\"]([^'\"]*\.m3u8[^'\"]*)['\"]")
+                    src_pattern = re.compile(r"src\s*=\s*['\"]([^'\"]*\.m3u8[^'\"]*)['\"]", re.IGNORECASE)
                    m3u8_match = src_pattern.search(iframe_content)

-                if m3u8_match:
+                # 패턴 4: Artplayer 전용 더 넓은 범위
+                if not m3u8_match:
+                    art_pattern = re.compile(r"url\s*:\s*['\"]([^'\"]+)['\"]")
+                    matches = art_pattern.findall(iframe_content)
+                    for m in matches:
+                        if ".m3u8" in m:
+                            video_url = m
+                            break
+                    if video_url:
+                        logger.info(f"Extracted m3u8 via Artplayer pattern: {video_url}")
+
+                if m3u8_match and not video_url:
                    video_url = m3u8_match.group(1)
+                
+                if video_url:
                    # 상대 경로 처리 (예: cache/...)
                    if video_url.startswith('cache/') or video_url.startswith('/cache/'):
                        from urllib.parse import urljoin
                        video_url = urljoin(iframe_src, video_url)
                    logger.info(f"Extracted m3u8 URL: {video_url}")
                else:
-                    logger.warning(f"m3u8 URL not found in iframe. Content snippet: {iframe_content[:200]}...")
+                    logger.warning(f"m3u8 URL not found in iframe for: {playid_url}")
+                    # HTML 내용이 너무 길면 앞부분만 로깅
+                    snippet = iframe_content.replace('\n', ' ')
+                    logger.debug(f"Iframe Content snippet (500 chars): {snippet[:500]}...")
+                    # 'cache/' 가 들어있는지 확인
+                    if 'cache/' in iframe_content:
+                        logger.debug("Found 'cache/' keyword in iframe content but regex failed. Inspection required.")
                
                # VTT 자막 URL 추출
-                vtt_pattern = re.compile(r"['\"]src['\"]?:\s*['\"]([^'\"]*\.vtt)['\"]")
+                # VTT 자막 URL 추출 (패턴 1: generic src)
+                vtt_pattern = re.compile(r"['\"]src['\"]?:\s*['\"]([^'\"]*\.vtt)['\"]", re.IGNORECASE)
                vtt_match = vtt_pattern.search(iframe_content)
-                if not vtt_match:
-                    vtt_pattern2 = re.compile(r"url:\s*['\"]([^'\"]*\.vtt)['\"]")
-                    vtt_match = vtt_pattern2.search(iframe_content)
-                if not vtt_match:
-                    vtt_pattern3 = re.compile(r"<track[^>]+src=['\"]([^'\"]*\.vtt)['\"]")
-                    vtt_match = vtt_pattern3.search(iframe_content)
                
+                # 패턴 2: url: '...vtt' (Artplayer 등)
+                if not vtt_match:
+                    vtt_pattern = re.compile(r"url:\s*['\"]([^'\"]*\.vtt[^'\"]*)['\"]", re.IGNORECASE)
+                    vtt_match = vtt_pattern.search(iframe_content)
+
                if vtt_match:
                    vtt_url = vtt_match.group(1)
-                    if vtt_url.startswith('/'):
+                    if vtt_url.startswith('s/') or vtt_url.startswith('/s/'):
                        from urllib.parse import urljoin
                        vtt_url = urljoin(iframe_src, vtt_url)
                    logger.info(f"Extracted VTT URL: {vtt_url}")
+                else:
+                    logger.debug("VTT URL not found in iframe content.")
                
                referer_url = iframe_src
            else:
@@ -1664,9 +1685,8 @@ class LogicLinkkf(AnimeModuleBase):
            download_method = P.ModelSetting.get("linkkf_download_method") or "ytdlp"
            download_threads = P.ModelSetting.get_int("linkkf_download_threads") or 16
            
+            # Linkkf는 항상 'linkkf' source_type 사용 (GDM에서 YtdlpAria2Downloader로 매핑됨)
            gdm_source_type = "linkkf"
-            if download_method in ['ytdlp', 'aria2c']:
-                gdm_source_type = "general"

            # Prepare GDM options
            gdm_options = {
@@ -290,13 +290,17 @@ class LogicOhli24(AnimeModuleBase):
            return False

    @classmethod
-    def fetch_via_daemon(cls, url: str, timeout: int = 30) -> dict:
-        """데몬을 통한 HTML 페칭 (빠름)"""
+    def fetch_via_daemon(cls, url: str, timeout: int = 30, headers: dict = None) -> dict:
+        """데몬을 통한 HTML 페칭 (빠름, 헤더 지원)"""
        try:
            import requests
+            payload = {"url": url, "timeout": timeout}
+            if headers:
+                payload["headers"] = headers
+                
            resp = requests.post(
                f"http://127.0.0.1:{cls.zendriver_daemon_port}/fetch",
-                json={"url": url, "timeout": timeout},
+                json=payload,
                timeout=timeout + 5
            )
            if resp.status_code == 200:
@@ -1,61 +1,85 @@
-
-import asyncio
-import zendriver as zd
+import requests
+import json
+import re
 import sys
-import os
-import subprocess

-async def test():
-    print("=== Zendriver Google Chrome Debug (v0.5.14) ===")
+def test_fetch():
+    url = "https://playv2.sub3.top/r2/play.php?&id=n20&url=405686s1"
+    headers = {
+        "Referer": "https://linkkf.live/",
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
+    }
    
-    # Check possible paths
-    bin_paths = ["/usr/bin/google-chrome", "/usr/bin/google-chrome-stable", "/usr/bin/chromium-browser"]
+    daemon_url = "http://127.0.0.1:19876/fetch"
+    payload = {
+        "url": url,
+        "headers": headers,
+        "timeout": 30
+    }
    
-    for browser_bin in bin_paths:
-        if not os.path.exists(browser_bin):
-            continue
-            
-        print(f"\n>>> Testing binary: {browser_bin}")
+    print(f"Fetching {url} via daemon...")
+    try:
+        resp = requests.post(daemon_url, json=payload, timeout=40)
+        if resp.status_code != 200:
+            print(f"Error: HTTP {resp.status_code}")
+            print(resp.text)
+            return
        
-        # 1. Version Check
-        try:
-            out = subprocess.check_output([browser_bin, "--version"], stderr=subprocess.STDOUT).decode()
-            print(f"Version: {out.strip()}")
-        except Exception as e:
-            print(f"Version check failed: {e}")
-            if hasattr(e, 'output'):
-                print(f"Output: {e.output.decode()}")
-
-        # 2. Minimum execution test (Headless + No Sandbox)
-        print("--- Direct Execution Test ---")
-        try:
-            cmd = [browser_bin, "--headless", "--no-sandbox", "--disable-gpu", "--user-data-dir=/tmp/test_chrome", "--about:blank"]
-            proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-            await asyncio.sleep(3)
-            if proc.poll() is None:
-                print("SUCCESS: Browser process is alive!")
-                proc.terminate()
+        data = resp.json()
+        if not data.get("success"):
+            print(f"Fetch failed: {data.get('error')}")
+            return
+        
+        html = data.get("html", "")
+        print(f"Fetch success. Length: {len(html)}")
+        
+        # Save for inspection
+        with open("linkkf_player_test.html", "w", encoding="utf-8") as f:
+            f.write(html)
+        print("Saved to linkkf_player_test.html")
+        
+        # Try regex patterns from mod_linkkf.py
+        patterns = [
+            r"url:\s*['\"]([^'\"]*\.m3u8[^'\"]*)['\"]",
+            r"<source[^>]+src=['\"]([^'\"]*\.m3u8[^'\"]*)['\"]",
+            r"src\s*=\s*['\"]([^'\"]*\.m3u8[^'\"]*)['\"]",
+            r"url\s*:\s*['\"]([^'\"]+)['\"]"
+        ]
+        
+        found = False
+        for p in patterns:
+            match = re.search(p, html, re.IGNORECASE)
+            if match:
+                url_found = match.group(1)
+                if ".m3u8" in url_found or "m3u8" in p:
+                    print(f"Pattern '{p}' found: {url_found}")
+                    found = True
+        
+        if not found:
+            print("No m3u8 found with existing patterns.")
+            # Search for any .m3u8
+            any_m3u8 = re.findall(r"['\"]([^'\"]*\.m3u8[^'\"]*)['\"]", html)
+            if any_m3u8:
+                print(f"Generic search found {len(any_m3u8)} m3u8 links:")
+                for m in any_m3u8[:5]:
+                    print(f"  - {m}")
            else:
-                stdout, stderr = proc.communicate()
-                print(f"FAIL: Browser process died (code {proc.returncode})")
-                print(f"STDERR: {stderr.decode()}")
-        except Exception as e:
-            print(f"Execution test failed: {e}")
+                print("No .m3u8 found in generic search either.")
+                # Check for other video extensions or potential indicators
+                if "Artplayer" in html:
+                    print("Artplayer detected.")
+                if "video" in html:
+                    print("Video tag found.")
+                
+                # Check for 'cache/'
+                if "cache/" in html:
+                    print("Found 'cache/' keyword.")
+                    cache_links = re.findall(r"['\"]([^'\"]*cache/[^'\"]*)['\"]", html)
+                    for c in cache_links:
+                        print(f"  - Possible cache link: {c}")

-        # 3. Zendriver Test
-        print("--- Zendriver Integration Test ---")
-        try:
-            browser = await zd.start(
-                browser_executable_path=browser_bin,
-                headless=True,
-                sandbox=False
-            )
-            print("SUCCESS: Zendriver connected!")
-            await browser.stop()
-            # If we found one that works, we can stop
-            print("\n!!! This path works. Set this in the plugin settings or leave empty if it is the first found.")
-        except Exception as e:
-            print(f"Zendriver failed: {e}")
+    except Exception as e:
+        print(f"Exception: {e}")

 if __name__ == "__main__":
-    asyncio.run(test())
+    test_fetch()