v0.6.10: Fix Ohli24 GDM integration and update README

This commit is contained in:
2026-01-07 15:09:04 +09:00
parent 759f772ca8
commit c532ffaef8
7 changed files with 368 additions and 191 deletions

View File

@@ -7,22 +7,55 @@ Botasaurus 기반 Ohli24 HTML 페칭 스크립트
import sys
import json
import os
import time
import traceback
from typing import Dict, Any, Optional
def fetch_html(url, headers=None, proxy=None):
result = {"success": False, "html": "", "elapsed": 0}
start_time = time.time()
# 봇사우루스 디버깅 일시정지 방지 및 자동 종료 설정
os.environ["BOTASAURUS_ENV"] = "production"
def fetch_html(url: str, headers: Optional[Dict[str, str]] = None, proxy: Optional[str] = None) -> Dict[str, Any]:
result: Dict[str, Any] = {"success": False, "html": "", "elapsed": 0}
start_time: float = time.time()
try:
from botasaurus.request import request as b_request
@b_request(headers=headers, use_stealth=True, proxy=proxy)
def fetch_url(request, data):
return request.get(data)
# raise_exception=True는 에러 시 exception을 발생시키게 함
# close_on_crash=True는 에러 발생 시 대기하지 않고 즉시 종료 (배포 환경용)
@b_request(proxy=proxy, raise_exception=True, close_on_crash=True)
def fetch_url(request: Any, data: Dict[str, Any]) -> str:
target_url = data.get('url')
headers = data.get('headers') or {}
# 기본적인 헤더 보강 (Ohli24 대응 - Cloudflare 우회 시도)
default_headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Cache-Control": "max-age=0",
"sec-ch-ua": '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"macOS"',
}
for k, v in default_headers.items():
if k not in headers and k.lower() not in [hk.lower() for hk in headers]:
headers[k] = v
return request.get(target_url, headers=headers, timeout=30)
b_resp = fetch_url(url)
elapsed = time.time() - start_time
# 봇사우루스는 실패 시 자동 재시도 등을 하기도 함.
# 여기서는 단발성 요청이므로 직접 호출.
b_resp: str = fetch_url({'url': url, 'headers': headers})
elapsed: float = time.time() - start_time
if b_resp and len(b_resp) > 10:
result.update({
@@ -36,7 +69,6 @@ def fetch_html(url, headers=None, proxy=None):
except Exception as e:
result["error"] = str(e)
result["traceback"] = traceback.format_exc()
result["elapsed"] = round(time.time() - start_time, 2)
return result
@@ -46,9 +78,9 @@ if __name__ == "__main__":
print(json.dumps({"success": False, "error": "Usage: python botasaurus_ohli24.py <url> [headers_json] [proxy]"}))
sys.exit(1)
target_url = sys.argv[1]
headers_arg = json.loads(sys.argv[2]) if len(sys.argv) > 2 and sys.argv[2] else None
proxy_arg = sys.argv[3] if len(sys.argv) > 3 and sys.argv[3] else None
target_url: str = sys.argv[1]
headers_arg: Optional[Dict[str, str]] = json.loads(sys.argv[2]) if len(sys.argv) > 2 and sys.argv[2] else None
proxy_arg: Optional[str] = sys.argv[3] if len(sys.argv) > 3 and sys.argv[3] else None
res = fetch_html(target_url, headers_arg, proxy_arg)
res: Dict[str, Any] = fetch_html(target_url, headers_arg, proxy_arg)
print(json.dumps(res, ensure_ascii=False))

View File

@@ -16,6 +16,7 @@ import traceback
from http.server import HTTPServer, BaseHTTPRequestHandler
from threading import Thread, Lock
from typing import Any, Optional, Dict, List, Type, cast
import zendriver as zd
# 터미널 및 파일로 로그 출력 설정
LOG_FILE: str = "/tmp/zendriver_daemon.log"
@@ -38,38 +39,51 @@ loop: Optional[asyncio.AbstractEventLoop] = None
manual_browser_path: Optional[str] = None
def find_browser_executable() -> Optional[str]:
"""시스템에서 브라우저 실행 파일 찾기 (Docker/Ubuntu 환경 대응)"""
def find_browser_executable() -> List[str]:
"""시스템에서 브라우저 실행 파일 찾기 (OS별 대응)"""
import platform
import shutil
# 수동 설정된 경로 최우선
if manual_browser_path and os.path.exists(manual_browser_path):
return manual_browser_path
return [manual_browser_path]
common_paths: List[str] = [
"/usr/bin/google-chrome",
"/usr/bin/google-chrome-stable",
"/usr/bin/chromium-browser",
"/usr/bin/chromium",
"/usr/lib/chromium-browser/chromium-browser",
"google-chrome", # PATH에서 찾기
"chromium-browser",
"chromium",
]
system = platform.system()
app_dirs = ["/Applications", "/Volumes/WD/Users/Applications"]
common_paths = []
# 먼저 절대 경로 확인
for path in common_paths:
if path.startswith("/") and os.path.exists(path):
log_debug(f"[ZendriverDaemon] Found browser at absolute path: {path}")
return path
# shutil.which로 PATH 확인
import shutil
for cmd in ["google-chrome", "google-chrome-stable", "chromium-browser", "chromium"]:
if system == "Darwin": # Mac
for base in app_dirs:
common_paths.extend([
f"{base}/Google Chrome.app/Contents/MacOS/Google Chrome",
f"{base}/Chromium.app/Contents/MacOS/Chromium",
f"{base}/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
])
elif system == "Windows":
common_paths = [
os.path.expandvars(r"%ProgramFiles%\Google\Chrome\Application\chrome.exe"),
os.path.expandvars(r"%ProgramFiles(x86)%\Google\Chrome\Application\chrome.exe"),
os.path.expandvars(r"%LocalAppData%\Google\Chrome\Application\chrome.exe"),
]
else: # Linux/Other
common_paths = [
"/usr/bin/google-chrome",
"/usr/bin/google-chrome-stable",
"/usr/bin/chromium-browser",
"/usr/bin/chromium",
"/usr/lib/chromium-browser/chromium-browser",
]
# 존재하는 모든 후보들 반환
candidates = [p for p in common_paths if os.path.exists(p)]
# PATH에서 찾기 추가
for cmd in ["google-chrome", "google-chrome-stable", "chromium-browser", "chromium", "chrome", "microsoft-edge"]:
found = shutil.which(cmd)
if found:
log_debug(f"[ZendriverDaemon] Found browser via shutil.which: {found}")
return found
if found and found not in candidates:
candidates.append(found)
return None
return candidates
class ZendriverHandler(BaseHTTPRequestHandler):
@@ -154,30 +168,64 @@ async def ensure_browser() -> Any:
with browser_lock:
if browser is None:
try:
import zendriver as zd
log_debug("[ZendriverDaemon] Starting new browser instance...")
# 존재하는 후보군 가져오기
candidates = find_browser_executable()
if not candidates:
log_debug("[ZendriverDaemon] No browser candidates found!")
return None
# 실행 가능한 브라우저 찾기
exec_path = find_browser_executable()
log_debug(f"[ZendriverDaemon] Startup params: headless=True, no_sandbox=True, path={exec_path}")
# 사용자 데이터 디렉토리 설정 (Mac/Root 권한 이슈 대응)
import tempfile
uid = os.getuid() if hasattr(os, 'getuid') else 'win'
if exec_path:
log_debug(f"[ZendriverDaemon] Starting browser at: {exec_path}")
browser = await zd.start(
headless=True,
browser_executable_path=exec_path,
no_sandbox=True,
browser_args=["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu", "--no-first-run"]
)
else:
log_debug("[ZendriverDaemon] Starting browser with default path")
browser = await zd.start(
headless=True,
no_sandbox=True,
browser_args=["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu", "--no-first-run"]
)
browser_args = [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage",
"--disable-gpu",
"--no-first-run",
"--no-service-autorun",
"--password-store=basic",
"--mute-audio",
"--disable-notifications",
"--disable-background-networking",
"--disable-background-timer-throttling",
"--disable-backgrounding-occluded-windows",
"--disable-breakpad",
"--disable-client-side-phishing-detection",
"--disable-default-apps",
"--disable-hang-monitor",
"--disable-popup-blocking",
"--disable-prompt-on-repost",
"--disable-sync",
"--disable-translate",
"--metrics-recording-only",
"--no-default-browser-check",
"--safebrowsing-disable-auto-update",
"--remote-allow-origins=*",
"--blink-settings=imagesEnabled=false",
]
for exec_path in candidates:
user_data_dir = os.path.join(tempfile.gettempdir(), f"zd_daemon_{uid}_{os.path.basename(exec_path).replace(' ', '_')}")
os.makedirs(user_data_dir, exist_ok=True)
log_debug("[ZendriverDaemon] Browser started successfully")
try:
log_debug(f"[ZendriverDaemon] Trying browser at: {exec_path}")
browser = await zd.start(
headless=True,
browser_executable_path=exec_path,
no_sandbox=True,
user_data_dir=user_data_dir,
browser_args=browser_args
)
log_debug(f"[ZendriverDaemon] Browser started successfully with: {exec_path}")
return browser
except Exception as e:
log_debug(f"[ZendriverDaemon] Failed to start {exec_path}: {e}")
browser = None
raise Exception("All browser candidates failed to start")
except Exception as e:
log_debug(f"[ZendriverDaemon] Failed to start browser: {e}")
browser = None
@@ -209,9 +257,10 @@ async def fetch_with_browser(url: str, timeout: int = 30) -> Dict[str, Any]:
# browser.get(url)은 새 탭을 열거나 기존 탭을 사용함
page: Any = await browser.get(url)
# 페이지 로드 대기 - cdndania iframe 로딩될 때까지 폴링 (최대 15초)
max_wait = 15
poll_interval = 1
# 페이지 로드 대기 - 지능형 폴링 (최대 10초)
# 1. 리스트 페이지는 바로 반환, 2. 에피소드 페이지는 플레이어 로딩 대기
max_wait = 10
poll_interval = 0.2 # 1.0s -> 0.2s로 단축하여 반응속도 향상
waited = 0
html_content = ""
@@ -220,9 +269,14 @@ async def fetch_with_browser(url: str, timeout: int = 30) -> Dict[str, Any]:
waited += poll_interval
html_content = await page.get_content()
# cdndania iframe이 로드되었는지 확인
# 리스트 페이지 마커 확인 (발견 즉시 탈출)
if "post-list" in html_content or "list-box" in html_content or "post-row" in html_content:
log_debug(f"[ZendriverDaemon] List page detected in {waited:.1f}s")
break
# cdndania/fireplayer iframe이 로드되었는지 확인 (에피소드 페이지)
if "cdndania" in html_content or "fireplayer" in html_content:
log_debug(f"[ZendriverDaemon] cdndania/fireplayer found after {waited}s")
log_debug(f"[ZendriverDaemon] Player detected in {waited:.1f}s")
break
elapsed: float = time.time() - start_time

View File

@@ -15,31 +15,49 @@ import shutil
def find_browser_executable(manual_path=None):
"""시스템에서 브라우저 실행 파일 찾기 (Docker/Ubuntu 환경 대응)"""
"""시스템에서 브라우저 실행 파일 찾기 (OS별 대응)"""
import platform
# 수동 설정 시 우선
if manual_path and os.path.exists(manual_path):
return manual_path
common_paths = [
"/usr/bin/google-chrome",
"/usr/bin/google-chrome-stable",
"/usr/bin/chromium-browser",
"/usr/bin/chromium",
"/usr/lib/chromium-browser/chromium-browser",
]
system = platform.system()
app_dirs = ["/Applications", "/Volumes/WD/Users/Applications"]
common_paths = []
# 먼저 절대 경로 확인
for path in common_paths:
if os.path.exists(path):
return path
# shutil.which로 PATH 확인
for cmd in ["google-chrome", "google-chrome-stable", "chromium-browser", "chromium"]:
if system == "Darwin": # Mac
for base in app_dirs:
common_paths.extend([
f"{base}/Google Chrome.app/Contents/MacOS/Google Chrome",
f"{base}/Chromium.app/Contents/MacOS/Chromium",
f"{base}/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
])
elif system == "Windows":
common_paths = [
os.path.expandvars(r"%ProgramFiles%\Google\Chrome\Application\chrome.exe"),
os.path.expandvars(r"%ProgramFiles(x86)%\Google\Chrome\Application\chrome.exe"),
os.path.expandvars(r"%LocalAppData%\Google\Chrome\Application\chrome.exe"),
]
else: # Linux/Other
common_paths = [
"/usr/bin/google-chrome",
"/usr/bin/google-chrome-stable",
"/usr/bin/chromium-browser",
"/usr/bin/chromium",
"/usr/lib/chromium-browser/chromium-browser",
]
# 존재하는 모든 후보들 반환
candidates = [p for p in common_paths if os.path.exists(p)]
# PATH에서 찾기 추가
for cmd in ["google-chrome", "google-chrome-stable", "chromium-browser", "chromium", "chrome", "microsoft-edge"]:
found = shutil.which(cmd)
if found:
return found
if found and found not in candidates:
candidates.append(found)
return None
return candidates
async def fetch_html(url: str, timeout: int = 60, browser_path: str = None) -> dict:
@@ -53,63 +71,112 @@ async def fetch_html(url: str, timeout: int = 60, browser_path: str = None) -> d
start_time = asyncio.get_event_loop().time()
browser = None
try:
# 실행 가능한 브라우저 찾기
exec_path = find_browser_executable(browser_path)
# 실행 가능한 브라우저 후보들 찾기
candidates = find_browser_executable(browser_path)
if not candidates:
return {"success": False, "error": "No browser executable found", "html": ""}
# 브라우저 시작
if exec_path:
# 사용자 데이터 디렉토리 설정 (Mac/Root 권한 이슈 대응)
import tempfile
uid = os.getuid() if hasattr(os, 'getuid') else 'win'
# 공통 브라우저 인자
browser_args = [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage",
"--disable-gpu",
"--no-first-run",
"--no-service-autorun",
"--password-store=basic",
"--mute-audio",
"--disable-notifications",
"--disable-background-networking",
"--disable-background-timer-throttling",
"--disable-backgrounding-occluded-windows",
"--disable-breakpad",
"--disable-client-side-phishing-detection",
"--disable-default-apps",
"--disable-hang-monitor",
"--disable-popup-blocking",
"--disable-prompt-on-repost",
"--disable-sync",
"--disable-translate",
"--metrics-recording-only",
"--no-default-browser-check",
"--safebrowsing-disable-auto-update",
"--remote-allow-origins=*",
"--blink-settings=imagesEnabled=false",
]
last_error = "All candidates failed"
# 여러 브라우저 후보들 시도 (크롬이 이미 실행 중일 때 등의 상황 대비)
for exec_path in candidates:
browser = None
user_data_dir = os.path.join(tempfile.gettempdir(), f"zd_ohli_{uid}_{os.path.basename(exec_path).replace(' ', '_')}")
os.makedirs(user_data_dir, exist_ok=True)
try:
# 브라우저 시작
browser = await zd.start(
headless=True,
browser_executable_path=exec_path,
no_sandbox=True,
browser_args=["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu", "--no-first-run"]
)
else:
browser = await zd.start(
headless=True,
no_sandbox=True,
browser_args=["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu", "--no-first-run"]
user_data_dir=user_data_dir,
browser_args=browser_args
)
page = await browser.get(url)
# 페이지 로드 대기 - cdndania iframe 로딩될 때까지 폴링 (최대 15초)
max_wait = 15
poll_interval = 1
waited = 0
html = ""
while waited < max_wait:
await asyncio.sleep(poll_interval)
waited += poll_interval
html = await page.get_content()
page = await browser.get(url)
# cdndania iframe이 로드되었는지 확인
if "cdndania" in html or "fireplayer" in html:
break
elapsed = asyncio.get_event_loop().time() - start_time
if html and len(html) > 100:
result.update({
"success": True,
"html": html,
"elapsed": round(elapsed, 2)
})
else:
result["error"] = f"Short response: {len(html) if html else 0} bytes"
result["elapsed"] = round(elapsed, 2)
# 페이지 로드 대기 - 지능형 폴링 (최대 10초)
# 1. 리스트 페이지는 바로 반환, 2. 에피소드 페이지는 플레이어 로딩 대기
max_wait = 10
poll_interval = 0.2 # 1.0s -> 0.2s로 단축하여 반응속도 향상
waited = 0
html = ""
while waited < max_wait:
await asyncio.sleep(poll_interval)
waited += poll_interval
html = await page.get_content()
except Exception as e:
result["error"] = str(e)
result["elapsed"] = round(asyncio.get_event_loop().time() - start_time, 2)
finally:
if browser:
try:
# 리스트 페이지 마커 확인 (발견 즉시 탈출)
if "post-list" in html or "list-box" in html or "post-row" in html:
# log_debug(f"[Zendriver] List page detected in {waited:.1f}s")
break
# cdndania/fireplayer iframe이 로드되었는지 확인 (에피소드 페이지)
if "cdndania" in html or "fireplayer" in html:
# log_debug(f"[Zendriver] Player detected in {waited:.1f}s")
break
elapsed = asyncio.get_event_loop().time() - start_time
if html and len(html) > 100:
result.update({
"success": True,
"html": html,
"elapsed": round(elapsed, 2)
})
# 성공했으므로 루프 종료
await browser.stop()
except:
pass
return result
else:
last_error = f"Short response from {exec_path}: {len(html) if html else 0} bytes"
except Exception as e:
last_error = f"Failed with {exec_path}: {str(e)}"
finally:
if browser:
try:
await browser.stop()
except:
pass
result["error"] = last_error
result["elapsed"] = round(asyncio.get_event_loop().time() - start_time, 2)
return result
return result