v0.6.22: Robust Linkkf extraction with zendriver fallback

This commit is contained in:
2026-01-07 23:48:00 +09:00
parent 76be367a9e
commit d1866111c7
3 changed files with 135 additions and 66 deletions

View File

@@ -81,7 +81,11 @@
## 📝 변경 이력 (Changelog) ## 📝 변경 이력 (Changelog)
### v0.6.21 (2026-01-07) ### v0.6.22 (2026-01-08)
- **Linkkf 추출 로직 강화**: Cloudflare 보호가 강화된 Linkkf 도메인(flexora.xyz 등)에 대응하기 위해 브라우저 기반(Zendriver/Camoufox) 추출 엔진을 도입했습니다.
- **오추출 방지**: 광고나 서비스 차단 페이지(Google Cloud 등)의 iframe을 비디오 URL로 오인하는 문제를 수정했습니다.
## v0.6.21 (2026-01-07)
- **Linkkf GDM 연동 수정**: - **Linkkf GDM 연동 수정**:
- GDM 위임 전 실제 스트림 URL(m3u8) 추출 로직을 강제 호출하여 "Invalid data" 오류 해결. - GDM 위임 전 실제 스트림 URL(m3u8) 추출 로직을 강제 호출하여 "Invalid data" 오류 해결.
- Linkkf 설정의 다운로드 방식 및 쓰레드 수를 GDM에 전달하도록 개선. - Linkkf 설정의 다운로드 방식 및 쓰레드 수를 GDM에 전달하도록 개선.

View File

@@ -1,5 +1,5 @@
title: "애니 다운로더" title: "애니 다운로더"
version: "0.6.21" version: 0.6.22
package_name: "anime_downloader" package_name: "anime_downloader"
developer: "projectdx" developer: "projectdx"
description: "anime downloader" description: "anime downloader"

View File

@@ -41,6 +41,7 @@ import cloudscraper
from anime_downloader.lib.ffmpeg_queue_v1 import FfmpegQueue, FfmpegQueueEntity from anime_downloader.lib.ffmpeg_queue_v1 import FfmpegQueue, FfmpegQueueEntity
from anime_downloader.lib.util import Util from anime_downloader.lib.util import Util
from .mod_ohli24 import LogicOhli24
# 패키지 # 패키지
# from .plugin import P # from .plugin import P
@@ -529,28 +530,79 @@ class LogicLinkkf(AnimeModuleBase):
@staticmethod @staticmethod
def get_html_cloudflare(url, cached=False, timeout=10): def get_html_cloudflare(url, cached=False, timeout=10):
"""Cloudflare 보호 우회를 위한 HTTP 요청 (싱글톤 패턴)""" """Cloudflare 보호 우회를 위한 HTTP 요청 ( Zendriver Daemon -> Subprocess -> Camoufox -> Scraper 순)"""
user_agents_list = [ start_time = time.time()
"Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.83 Safari/537.36", # 0. Referer 설정
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36", if LogicLinkkf.referer is None:
] LogicLinkkf.referer = f"{P.ModelSetting.get('linkkf_url')}"
LogicLinkkf.headers["User-Agent"] = random.choice(user_agents_list)
LogicLinkkf.headers["Referer"] = LogicLinkkf.referer or "" LogicLinkkf.headers["Referer"] = LogicLinkkf.referer or ""
# cloudscraper 싱글톤 패턴 - 매 요청마다 생성하지 않음 # 1. Zendriver Daemon 시도 (최우선)
if LogicLinkkf._scraper is None: try:
LogicLinkkf._scraper = cloudscraper.create_scraper( if LogicOhli24.is_zendriver_daemon_running():
delay=10, logger.info(f"[Linkkf] Trying Zendriver Daemon: {url}")
browser={"custom": "linkkf"}, daemon_res = LogicOhli24.fetch_via_daemon(url, timeout=30)
) if daemon_res.get("success") and daemon_res.get("html"):
elapsed = time.time() - start_time
logger.info(f"[Linkkf] Daemon success in {elapsed:.2f}s")
return daemon_res["html"]
except Exception as e:
logger.warning(f"[Linkkf] Daemon error: {e}")
return LogicLinkkf._scraper.get( # 2. Scraper 시도 (기본)
url, try:
headers=LogicLinkkf.headers, if LogicLinkkf._scraper is None:
timeout=timeout, LogicLinkkf._scraper = cloudscraper.create_scraper(
).content.decode("utf8", errors="replace") delay=10,
browser={"custom": "linkkf"},
)
user_agents_list = [
"Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.83 Safari/537.36",
]
LogicLinkkf.headers["User-Agent"] = random.choice(user_agents_list)
response = LogicLinkkf._scraper.get(url, headers=LogicLinkkf.headers, timeout=timeout)
# 챌린지 페이지가 아닌 실제 콘텐츠가 포함되었는지 확인
content = response.text
if "Cloudflare" not in content or "video-player" in content or "iframe" in content:
return content
logger.warning("[Linkkf] Scraper returned challenge page, falling back to browser...")
except Exception as e:
logger.warning(f"[Linkkf] Scraper error: {e}")
# 3. Zendriver Subprocess Fallback
try:
if LogicOhli24.ensure_zendriver_installed():
logger.info(f"[Linkkf] Trying Zendriver subprocess: {url}")
script_path = os.path.join(os.path.dirname(__file__), "lib", "zendriver_ohli24.py")
cmd = [sys.executable, script_path, url, str(30)]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
if result.returncode == 0 and result.stdout.strip():
zd_result = json.loads(result.stdout.strip())
if zd_result.get("success") and zd_result.get("html"):
return zd_result["html"]
except Exception as e:
logger.warning(f"[Linkkf] Zendriver fallback error: {e}")
# 4. Camoufox Fallback
try:
logger.info(f"[Linkkf] Trying Camoufox fallback: {url}")
script_path = os.path.join(os.path.dirname(__file__), "lib", "camoufox_ohli24.py")
result = subprocess.run([sys.executable, script_path, url, str(30)], capture_output=True, text=True, timeout=60)
if result.returncode == 0 and result.stdout.strip():
cf_result = json.loads(result.stdout.strip())
if cf_result.get("success") and cf_result.get("html"):
return cf_result["html"]
except Exception as e:
logger.warning(f"[Linkkf] Camoufox fallback error: {e}")
return ""
@staticmethod @staticmethod
def add_whitelist(*args): def add_whitelist(*args):
@@ -632,81 +684,94 @@ class LogicLinkkf(AnimeModuleBase):
try: try:
logger.info(f"Extracting video URL from: {playid_url}") logger.info(f"Extracting video URL from: {playid_url}")
# Step 1: playid 페이지에서 iframe src 추출 # Step 1: playid 페이지에서 iframe src 추출 (cloudscraper 사용)
headers = { html_content = LogicLinkkf.get_html(playid_url)
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", if not html_content:
"Referer": "https://linkkf.live/" logger.error(f"Failed to fetch playid page: {playid_url}")
} return None, None, None
response = requests.get(playid_url, headers=headers, timeout=15)
html_content = response.text
soup = BeautifulSoup(html_content, "html.parser") soup = BeautifulSoup(html_content, "html.parser")
# iframe 찾기 (id="video-player-iframe" 또는 play.sub3.top 포함) # iframe 찾기 (광고 iframe 제외를 위해 id나 src 패턴 강조)
iframe = soup.select_one("iframe#video-player-iframe") iframe = soup.select_one("iframe#video-player-iframe")
if not iframe: if not iframe:
iframe = soup.select_one("iframe[src*='play.sub']") iframe = soup.select_one("iframe[src*='play.sub']")
if not iframe: if not iframe:
iframe = soup.select_one("iframe") iframe = soup.select_one("iframe[src*='play.php']")
# fallback if strictly needed but skip ad domains
if not iframe:
all_iframes = soup.select("iframe")
for f in all_iframes:
src = f.get("src", "")
if any(x in src for x in ["googletag", "googlead", "adsystem", "cloud.google"]):
continue
if src.startswith("http"):
iframe = f
break
if iframe and iframe.get("src"): if iframe and iframe.get("src"):
iframe_src = iframe.get("src") iframe_src = iframe.get("src")
logger.info(f"Found iframe: {iframe_src}") # HTML entity decoding (& -> &)
if "&" in iframe_src:
iframe_src = iframe_src.replace("&", "&")
logger.info(f"Found player iframe: {iframe_src}")
# Step 2: iframe 페이지에서 m3u8 URL과 vtt URL 추출 # Step 2: iframe 페이지에서 m3u8 URL과 vtt URL 추출
iframe_headers = { iframe_content = LogicLinkkf.get_html(iframe_src)
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", if not iframe_content:
"Referer": playid_url logger.error(f"Failed to fetch iframe content: {iframe_src}")
} return None, iframe_src, None
iframe_response = requests.get(iframe_src, headers=iframe_headers, timeout=15) # m3u8 URL 패턴 찾기 (더 정밀하게)
iframe_content = iframe_response.text # 패턴 1: url: 'https://...m3u8'
m3u8_pattern = re.compile(r"url:\s*['\"]([^'\"]*\.m3u8[^'\"]*)['\"]")
# m3u8 URL 패턴 찾기
# 예: url: 'https://n8.hlz3.top/403116s11/index.m3u8'
m3u8_pattern = re.compile(r"url:\s*['\"]([^'\"]*\.m3u8)['\"]")
m3u8_match = m3u8_pattern.search(iframe_content) m3u8_match = m3u8_pattern.search(iframe_content)
# 패턴 2: <source src="https://...m3u8">
if not m3u8_match:
source_pattern = re.compile(r"<source[^>]+src=['\"]([^'\"]*\.m3u8[^'\"]*)['\"]")
m3u8_match = source_pattern.search(iframe_content)
# 패턴 3: var src = '...m3u8'
if not m3u8_match:
src_pattern = re.compile(r"src\s*=\s*['\"]([^'\"]*\.m3u8[^'\"]*)['\"]")
m3u8_match = src_pattern.search(iframe_content)
if m3u8_match: if m3u8_match:
video_url = m3u8_match.group(1) video_url = m3u8_match.group(1)
# 상대 경로 처리 (예: cache/...) # 상대 경로 처리 (예: cache/...)
if video_url.startswith('cache/'): if video_url.startswith('cache/') or video_url.startswith('/cache/'):
from urllib.parse import urljoin from urllib.parse import urljoin
video_url = urljoin(iframe_src, video_url) video_url = urljoin(iframe_src, video_url)
logger.info(f"Found m3u8 URL: {video_url}") logger.info(f"Extracted m3u8 URL: {video_url}")
else: else:
# 대안 패턴: source src logger.warning(f"m3u8 URL not found in iframe. Content snippet: {iframe_content[:200]}...")
source_pattern = re.compile(r"<source[^>]+src=['\"]([^'\"]+)['\"]")
source_match = source_pattern.search(iframe_content)
if source_match:
video_url = source_match.group(1)
if video_url.startswith('cache/'):
from urllib.parse import urljoin
video_url = urljoin(iframe_src, video_url)
logger.info(f"Found source URL: {video_url}")
# VTT 자막 URL 추출 # VTT 자막 URL 추출
# 예: <track src="https://...vtt" kind="subtitles"> vtt_pattern = re.compile(r"['\"]src['\"]?:\s*['\"]([^'\"]*\.vtt)['\"]")
vtt_pattern = re.compile(r"<track[^>]+src=['\"]([^'\"]*\.vtt)['\"]")
vtt_match = vtt_pattern.search(iframe_content) vtt_match = vtt_pattern.search(iframe_content)
if not vtt_match:
vtt_pattern2 = re.compile(r"url:\s*['\"]([^'\"]*\.vtt)['\"]")
vtt_match = vtt_pattern2.search(iframe_content)
if not vtt_match:
vtt_pattern3 = re.compile(r"<track[^>]+src=['\"]([^'\"]*\.vtt)['\"]")
vtt_match = vtt_pattern3.search(iframe_content)
if vtt_match: if vtt_match:
vtt_url = vtt_match.group(1) vtt_url = vtt_match.group(1)
logger.info(f"Found VTT subtitle URL: {vtt_url}") if vtt_url.startswith('/'):
else: from urllib.parse import urljoin
# 대안 패턴: url: '...vtt' vtt_url = urljoin(iframe_src, vtt_url)
vtt_pattern2 = re.compile(r"url:\s*['\"]([^'\"]*\.vtt)['\"]") logger.info(f"Extracted VTT URL: {vtt_url}")
vtt_match2 = vtt_pattern2.search(iframe_content)
if vtt_match2:
vtt_url = vtt_match2.group(1)
logger.info(f"Found VTT subtitle URL (alt pattern): {vtt_url}")
referer_url = iframe_src referer_url = iframe_src
else: else:
logger.warning("No iframe found in playid page") logger.warning(f"No player iframe found in playid page. HTML snippet: {html_content[:200]}...")
except Exception as e: except Exception as e:
logger.error(f"Error extracting video URL: {e}") logger.error(f"Error in extract_video_url_from_playid: {e}")
logger.error(traceback.format_exc()) logger.error(traceback.format_exc())
return video_url, referer_url, vtt_url return video_url, referer_url, vtt_url