v0.6.22: Robust Linkkf extraction with zendriver fallback

2026-01-07 23:48:00 +09:00
parent 76be367a9e
commit d1866111c7
3 changed files with 135 additions and 66 deletions
@@ -81,7 +81,11 @@
 ## 📝 변경 이력 (Changelog)
-### v0.6.21 (2026-01-07)
+### v0.6.22 (2026-01-08)
 - **Linkkf 추출 로직 강화**: Cloudflare 보호가 강화된 Linkkf 도메인(flexora.xyz 등)에 대응하기 위해 브라우저 기반(Zendriver/Camoufox) 추출 엔진을 도입했습니다.
 - **오추출 방지**: 광고나 서비스 차단 페이지(Google Cloud 등)의 iframe을 비디오 URL로 오인하는 문제를 수정했습니다.
 ## v0.6.21 (2026-01-07)
 - **Linkkf GDM 연동 수정**:
    - GDM 위임 전 실제 스트림 URL(m3u8) 추출 로직을 강제 호출하여 "Invalid data" 오류 해결.
    - Linkkf 설정의 다운로드 방식 및 쓰레드 수를 GDM에 전달하도록 개선.
@@ -1,5 +1,5 @@
 title: "애니 다운로더"
-version: "0.6.21"
+version: 0.6.22
 package_name: "anime_downloader"
 developer: "projectdx"
 description: "anime downloader"
@@ -41,6 +41,7 @@ import cloudscraper
 from anime_downloader.lib.ffmpeg_queue_v1 import FfmpegQueue, FfmpegQueueEntity
 from anime_downloader.lib.util import Util
 from .mod_ohli24 import LogicOhli24
 # 패키지
 # from .plugin import P
@@ -529,28 +530,79 @@ class LogicLinkkf(AnimeModuleBase):
    @staticmethod
    def get_html_cloudflare(url, cached=False, timeout=10):
-        """Cloudflare 보호 우회를 위한 HTTP 요청 (싱글톤 패턴)"""
+        """Cloudflare 보호 우회를 위한 HTTP 요청 ( Zendriver Daemon -> Subprocess -> Camoufox -> Scraper 순)"""
-        user_agents_list = [
+        start_time = time.time()
-            "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148",
+        
-            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.83 Safari/537.36",
+        # 0. Referer 설정
-            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36",
+        if LogicLinkkf.referer is None:
-        ]
+            LogicLinkkf.referer = f"{P.ModelSetting.get('linkkf_url')}"
        LogicLinkkf.headers["User-Agent"] = random.choice(user_agents_list)
        LogicLinkkf.headers["Referer"] = LogicLinkkf.referer or ""
-        # cloudscraper 싱글톤 패턴 - 매 요청마다 생성하지 않음
+        # 1. Zendriver Daemon 시도 (최우선)
-        if LogicLinkkf._scraper is None:
+        try:
-            LogicLinkkf._scraper = cloudscraper.create_scraper(
+            if LogicOhli24.is_zendriver_daemon_running():
-                delay=10,
+                logger.info(f"[Linkkf] Trying Zendriver Daemon: {url}")
-                browser={"custom": "linkkf"},
+                daemon_res = LogicOhli24.fetch_via_daemon(url, timeout=30)
-            )
+                if daemon_res.get("success") and daemon_res.get("html"):
                    elapsed = time.time() - start_time
                    logger.info(f"[Linkkf] Daemon success in {elapsed:.2f}s")
                    return daemon_res["html"]
        except Exception as e:
            logger.warning(f"[Linkkf] Daemon error: {e}")
-        return LogicLinkkf._scraper.get(
+        # 2. Scraper 시도 (기본)
-            url,
+        try:
-            headers=LogicLinkkf.headers,
+            if LogicLinkkf._scraper is None:
-            timeout=timeout,
+                LogicLinkkf._scraper = cloudscraper.create_scraper(
-        ).content.decode("utf8", errors="replace")
+                    delay=10,
                    browser={"custom": "linkkf"},
                )
            user_agents_list = [
                "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148",
                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.83 Safari/537.36",
            ]
            LogicLinkkf.headers["User-Agent"] = random.choice(user_agents_list)
            response = LogicLinkkf._scraper.get(url, headers=LogicLinkkf.headers, timeout=timeout)
            # 챌린지 페이지가 아닌 실제 콘텐츠가 포함되었는지 확인
            content = response.text
            if "Cloudflare" not in content or "video-player" in content or "iframe" in content:
                return content
            logger.warning("[Linkkf] Scraper returned challenge page, falling back to browser...")
        except Exception as e:
            logger.warning(f"[Linkkf] Scraper error: {e}")
        # 3. Zendriver Subprocess Fallback
        try:
            if LogicOhli24.ensure_zendriver_installed():
                logger.info(f"[Linkkf] Trying Zendriver subprocess: {url}")
                script_path = os.path.join(os.path.dirname(__file__), "lib", "zendriver_ohli24.py")
                cmd = [sys.executable, script_path, url, str(30)]
                result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
                if result.returncode == 0 and result.stdout.strip():
                    zd_result = json.loads(result.stdout.strip())
                    if zd_result.get("success") and zd_result.get("html"):
                        return zd_result["html"]
        except Exception as e:
            logger.warning(f"[Linkkf] Zendriver fallback error: {e}")
        # 4. Camoufox Fallback
        try:
            logger.info(f"[Linkkf] Trying Camoufox fallback: {url}")
            script_path = os.path.join(os.path.dirname(__file__), "lib", "camoufox_ohli24.py")
            result = subprocess.run([sys.executable, script_path, url, str(30)], capture_output=True, text=True, timeout=60)
            if result.returncode == 0 and result.stdout.strip():
                cf_result = json.loads(result.stdout.strip())
                if cf_result.get("success") and cf_result.get("html"):
                    return cf_result["html"]
        except Exception as e:
            logger.warning(f"[Linkkf] Camoufox fallback error: {e}")
        return ""
    @staticmethod
    def add_whitelist(*args):
@@ -632,81 +684,94 @@ class LogicLinkkf(AnimeModuleBase):
        try:
            logger.info(f"Extracting video URL from: {playid_url}")
-            # Step 1: playid 페이지에서 iframe src 추출
+            # Step 1: playid 페이지에서 iframe src 추출 (cloudscraper 사용)
-            headers = {
+            html_content = LogicLinkkf.get_html(playid_url)
-                "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
+            if not html_content:
-                "Referer": "https://linkkf.live/"
+                logger.error(f"Failed to fetch playid page: {playid_url}")
-            }
+                return None, None, None
            response = requests.get(playid_url, headers=headers, timeout=15)
            html_content = response.text
            soup = BeautifulSoup(html_content, "html.parser")
-            # iframe 찾기 (id="video-player-iframe" 또는 play.sub3.top 포함)
+            # iframe 찾기 (광고 iframe 제외를 위해 id나 src 패턴 강조)
            iframe = soup.select_one("iframe#video-player-iframe")
            if not iframe:
                iframe = soup.select_one("iframe[src*='play.sub']")
            if not iframe:
-                iframe = soup.select_one("iframe")
+                iframe = soup.select_one("iframe[src*='play.php']")
            # fallback if strictly needed but skip ad domains
            if not iframe:
                all_iframes = soup.select("iframe")
                for f in all_iframes:
                    src = f.get("src", "")
                    if any(x in src for x in ["googletag", "googlead", "adsystem", "cloud.google"]): 
                        continue
                    if src.startswith("http"):
                        iframe = f
                        break
            if iframe and iframe.get("src"):
                iframe_src = iframe.get("src")
-                logger.info(f"Found iframe: {iframe_src}")
+                # HTML entity decoding (&#038; -> &)
                if "&#038;" in iframe_src:
                    iframe_src = iframe_src.replace("&#038;", "&")
                logger.info(f"Found player iframe: {iframe_src}")
                # Step 2: iframe 페이지에서 m3u8 URL과 vtt URL 추출
-                iframe_headers = {
+                iframe_content = LogicLinkkf.get_html(iframe_src)
-                    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
+                if not iframe_content:
-                    "Referer": playid_url
+                    logger.error(f"Failed to fetch iframe content: {iframe_src}")
-                }
+                    return None, iframe_src, None
-                iframe_response = requests.get(iframe_src, headers=iframe_headers, timeout=15)
+                # m3u8 URL 패턴 찾기 (더 정밀하게)
-                iframe_content = iframe_response.text
+                # 패턴 1: url: 'https://...m3u8'
-                
+                m3u8_pattern = re.compile(r"url:\s*['\"]([^'\"]*\.m3u8[^'\"]*)['\"]")
                # m3u8 URL 패턴 찾기
                # 예: url: 'https://n8.hlz3.top/403116s11/index.m3u8'
                m3u8_pattern = re.compile(r"url:\s*['\"]([^'\"]*\.m3u8)['\"]")
                m3u8_match = m3u8_pattern.search(iframe_content)
                # 패턴 2: <source src="https://...m3u8">
                if not m3u8_match:
                    source_pattern = re.compile(r"<source[^>]+src=['\"]([^'\"]*\.m3u8[^'\"]*)['\"]")
                    m3u8_match = source_pattern.search(iframe_content)
                # 패턴 3: var src = '...m3u8'
                if not m3u8_match:
                    src_pattern = re.compile(r"src\s*=\s*['\"]([^'\"]*\.m3u8[^'\"]*)['\"]")
                    m3u8_match = src_pattern.search(iframe_content)
                if m3u8_match:
                    video_url = m3u8_match.group(1)
                    # 상대 경로 처리 (예: cache/...)
-                    if video_url.startswith('cache/'):
+                    if video_url.startswith('cache/') or video_url.startswith('/cache/'):
                        from urllib.parse import urljoin
                        video_url = urljoin(iframe_src, video_url)
-                    logger.info(f"Found m3u8 URL: {video_url}")
+                    logger.info(f"Extracted m3u8 URL: {video_url}")
                else:
-                    # 대안 패턴: source src
+                    logger.warning(f"m3u8 URL not found in iframe. Content snippet: {iframe_content[:200]}...")
                    source_pattern = re.compile(r"<source[^>]+src=['\"]([^'\"]+)['\"]")
                    source_match = source_pattern.search(iframe_content)
                    if source_match:
                        video_url = source_match.group(1)
                        if video_url.startswith('cache/'):
                            from urllib.parse import urljoin
                            video_url = urljoin(iframe_src, video_url)
                        logger.info(f"Found source URL: {video_url}")
                # VTT 자막 URL 추출
-                # 예: <track src="https://...vtt" kind="subtitles">
+                vtt_pattern = re.compile(r"['\"]src['\"]?:\s*['\"]([^'\"]*\.vtt)['\"]")
                vtt_pattern = re.compile(r"<track[^>]+src=['\"]([^'\"]*\.vtt)['\"]")
                vtt_match = vtt_pattern.search(iframe_content)
                if not vtt_match:
                    vtt_pattern2 = re.compile(r"url:\s*['\"]([^'\"]*\.vtt)['\"]")
                    vtt_match = vtt_pattern2.search(iframe_content)
                if not vtt_match:
                    vtt_pattern3 = re.compile(r"<track[^>]+src=['\"]([^'\"]*\.vtt)['\"]")
                    vtt_match = vtt_pattern3.search(iframe_content)
                if vtt_match:
                    vtt_url = vtt_match.group(1)
-                    logger.info(f"Found VTT subtitle URL: {vtt_url}")
+                    if vtt_url.startswith('/'):
-                else:
+                        from urllib.parse import urljoin
-                    # 대안 패턴: url: '...vtt'
+                        vtt_url = urljoin(iframe_src, vtt_url)
-                    vtt_pattern2 = re.compile(r"url:\s*['\"]([^'\"]*\.vtt)['\"]")
+                    logger.info(f"Extracted VTT URL: {vtt_url}")
                    vtt_match2 = vtt_pattern2.search(iframe_content)
                    if vtt_match2:
                        vtt_url = vtt_match2.group(1)
                        logger.info(f"Found VTT subtitle URL (alt pattern): {vtt_url}")
                referer_url = iframe_src
            else:
-                logger.warning("No iframe found in playid page")
+                logger.warning(f"No player iframe found in playid page. HTML snippet: {html_content[:200]}...")
        except Exception as e:
-            logger.error(f"Error extracting video URL: {e}")
+            logger.error(f"Error in extract_video_url_from_playid: {e}")
            logger.error(traceback.format_exc())
        return video_url, referer_url, vtt_url