feat: Apply early skip check to all sites with glob pattern matching

This commit is contained in:
2026-01-02 17:06:41 +09:00
parent c3a83dfe85
commit 805464cb25
4 changed files with 174 additions and 99 deletions

View File

@@ -1,5 +1,5 @@
title: "애니 다운로더" title: "애니 다운로더"
version: "0.4.8" version: "0.4.9"
package_name: "anime_downloader" package_name: "anime_downloader"
developer: "projectdx" developer: "projectdx"
description: "anime downloader" description: "anime downloader"

View File

@@ -1276,8 +1276,11 @@ class LogicAniLife(AnimeModuleBase):
return "enqueue_db_exist" return "enqueue_db_exist"
def _predict_filepath(self, episode_info): def _predict_filepath(self, episode_info):
"""Predict the output filepath from episode info WITHOUT expensive site access.""" """Predict the output filepath from episode info WITHOUT expensive site access.
Uses glob pattern to match any quality variant (720p, 1080p, etc.)."""
try: try:
import glob
title = episode_info.get("title", "") title = episode_info.get("title", "")
if not title: if not title:
return None return None
@@ -1291,19 +1294,19 @@ class LogicAniLife(AnimeModuleBase):
content_title = match.group("title").strip() content_title = match.group("title").strip()
season = int(match.group("season")) if match.group("season") else 1 season = int(match.group("season")) if match.group("season") else 1
epi_no = int(match.group("epi_no")) epi_no = int(match.group("epi_no"))
quality = "1080P"
filename = "%s.S%sE%s.%s-AL.mp4" % ( # Use glob pattern for quality: *-AL.mp4 matches any quality
filename_pattern = "%s.S%sE%s.*-AL.mp4" % (
content_title, content_title,
"0%s" % season if season < 10 else season, "0%s" % season if season < 10 else season,
"0%s" % epi_no if epi_no < 10 else epi_no, "0%s" % epi_no if epi_no < 10 else epi_no,
quality,
) )
else: else:
filename = "%s.720p-AL.mp4" % title # Fallback pattern for non-standard titles
filename_pattern = "%s.*-AL.mp4" % title
# Sanitize filename # Sanitize pattern (but keep glob wildcards)
filename = AniUtil.change_text_for_use_filename(filename) filename_pattern = AniUtil.change_text_for_use_filename(filename_pattern)
# Get save path # Get save path
savepath = P.ModelSetting.get("anilife_download_path") savepath = P.ModelSetting.get("anilife_download_path")
@@ -1320,12 +1323,21 @@ class LogicAniLife(AnimeModuleBase):
folder_name = AniUtil.change_text_for_use_filename(folder_name) folder_name = AniUtil.change_text_for_use_filename(folder_name)
savepath = os.path.join(savepath, folder_name) savepath = os.path.join(savepath, folder_name)
return os.path.join(savepath, filename) # Use glob to find any matching file
full_pattern = os.path.join(savepath, filename_pattern)
matching_files = glob.glob(full_pattern)
if matching_files:
# Return first matching file
logger.debug(f"Found existing file: {matching_files[0]}")
return matching_files[0]
return None
except Exception as e: except Exception as e:
logger.debug(f"_predict_filepath error: {e}") logger.debug(f"_predict_filepath error: {e}")
return None return None
def is_exist(self, info): def is_exist(self, info):
for e in self.queue.entity_list: for e in self.queue.entity_list:
if e.info["_id"] == info["_id"]: if e.info["_id"] == info["_id"]:

View File

@@ -1507,6 +1507,7 @@ class LogicLinkkf(AnimeModuleBase):
logger.error(traceback.format_exc()) logger.error(traceback.format_exc())
def add(self, episode_info): def add(self, episode_info):
"""Add episode to download queue with early skip checks."""
# 큐가 초기화되지 않았으면 초기화 (클래스 레벨 큐 확인) # 큐가 초기화되지 않았으면 초기화 (클래스 레벨 큐 확인)
if LogicLinkkf.queue is None: if LogicLinkkf.queue is None:
logger.warning("Queue is None in add(), initializing...") logger.warning("Queue is None in add(), initializing...")
@@ -1522,50 +1523,47 @@ class LogicLinkkf(AnimeModuleBase):
# self.queue를 LogicLinkkf.queue로 바인딩 (프로세스 내부 공유 보장) # self.queue를 LogicLinkkf.queue로 바인딩 (프로세스 내부 공유 보장)
self.queue = LogicLinkkf.queue self.queue = LogicLinkkf.queue
# 큐 상태 로깅 # 1. Check if already in queue
queue_len = len(self.queue.entity_list) if self.queue else 0
logger.info(f"add() called - Queue length: {queue_len}, episode _id: {episode_info.get('_id')}")
if self.is_exist(episode_info): if self.is_exist(episode_info):
logger.info(f"is_exist returned True for _id: {episode_info.get('_id')}") logger.info(f"is_exist returned True for _id: {episode_info.get('_id')}")
return "queue_exist" return "queue_exist"
else:
# 2. Check DB for completion status FIRST (before expensive operations)
db_entity = ModelLinkkfItem.get_by_linkkf_id(episode_info["_id"]) db_entity = ModelLinkkfItem.get_by_linkkf_id(episode_info["_id"])
# logger.info(f"db_entity: {db_entity}")
# logger.debug("db_entity:::> %s", db_entity) if db_entity is not None and db_entity.status == "completed":
# logger.debug("db_entity.status ::: %s", db_entity.status) logger.info(f"[Skip] Already completed in DB: {episode_info.get('program_title')} {episode_info.get('title')}")
return "db_completed"
# 3. Early file existence check - filepath is already in episode_info from get_series_info
filepath = episode_info.get("filepath")
if filepath and os.path.exists(filepath):
logger.info(f"[Skip] File already exists: {filepath}")
# Update DB status to completed if not already
if db_entity is not None and db_entity.status != "completed":
db_entity.status = "completed"
db_entity.filepath = filepath
db_entity.save()
return "file_exists"
# 4. Proceed with queue addition
queue_len = len(self.queue.entity_list) if self.queue else 0
logger.info(f"add() - Queue length: {queue_len}, episode _id: {episode_info.get('_id')}")
if db_entity is None: if db_entity is None:
entity = LinkkfQueueEntity(P, self, episode_info) entity = LinkkfQueueEntity(P, self, episode_info)
logger.debug("entity:::> %s", entity.as_dict()) logger.debug("entity:::> %s", entity.as_dict())
ModelLinkkfItem.append(entity.as_dict()) ModelLinkkfItem.append(entity.as_dict())
# # logger.debug("entity:: type >> %s", type(entity))
#
self.queue.add_queue(entity) self.queue.add_queue(entity)
# self.download_queue.add_queue(entity)
# P.logger.debug(F.config['path_data'])
# P.logger.debug(self.headers)
# filename = os.path.basename(entity.filepath)
# ffmpeg = SupportFfmpeg(entity.url, entity.filename, callback_function=self.callback_function,
# max_pf_count=0,
# save_path=entity.savepath, timeout_minute=60, headers=self.headers)
# ret = {'ret': 'success'}
# ret['json'] = ffmpeg.start()
return "enqueue_db_append" return "enqueue_db_append"
elif db_entity.get("status") != "completed" if isinstance(db_entity, dict) else db_entity.status != "completed": else:
# DB에 있지만 완료되지 않은 경우도 큐에 추가 # db_entity exists but status is not completed
status = db_entity.get("status") if isinstance(db_entity, dict) else db_entity.status status = db_entity.get("status") if isinstance(db_entity, dict) else db_entity.status
logger.info(f"db_entity status: {status}, adding to queue") logger.info(f"db_entity status: {status}, adding to queue")
try: try:
logger.info("Creating LinkkfQueueEntity...")
entity = LinkkfQueueEntity(P, self, episode_info) entity = LinkkfQueueEntity(P, self, episode_info)
logger.info(f"LinkkfQueueEntity created, url: {entity.url}, filepath: {entity.filepath}") logger.info(f"LinkkfQueueEntity created, url: {entity.url}, filepath: {entity.filepath}")
logger.debug("entity:::> %s", entity.as_dict())
logger.info(f"Adding to queue, queue length before: {len(self.queue.entity_list)}")
result = self.queue.add_queue(entity) result = self.queue.add_queue(entity)
logger.info(f"add_queue result: {result}, queue length after: {len(self.queue.entity_list)}") logger.info(f"add_queue result: {result}, queue length after: {len(self.queue.entity_list)}")
except Exception as e: except Exception as e:
@@ -1574,8 +1572,7 @@ class LogicLinkkf(AnimeModuleBase):
return "entity_creation_error" return "entity_creation_error"
return "enqueue_db_exist" return "enqueue_db_exist"
else:
return "db_completed"
# def is_exist(self, info): # def is_exist(self, info):
# print(self.download_queue.entity_list) # print(self.download_queue.entity_list)

View File

@@ -1262,52 +1262,117 @@ class LogicOhli24(AnimeModuleBase):
######################################################### #########################################################
def add(self, episode_info: Dict[str, Any]) -> str: def add(self, episode_info: Dict[str, Any]) -> str:
"""Add episode to download queue with early skip checks."""
# 1. Check if already in queue
if self.is_exist(episode_info): if self.is_exist(episode_info):
return "queue_exist" return "queue_exist"
else:
logger.debug(f"episode_info:: {episode_info}")
db_entity = ModelOhli24Item.get_by_ohli24_id(episode_info["_id"])
logger.debug("db_entity:::> %s", db_entity) # 2. Check DB for completion status FIRST (before expensive operations)
# logger.debug("db_entity.status ::: %s", db_entity.status) db_entity = ModelOhli24Item.get_by_ohli24_id(episode_info["_id"])
logger.debug(f"db_entity:::> {db_entity}")
if db_entity is not None and db_entity.status == "completed":
logger.info(f"[Skip] Already completed in DB: {episode_info.get('title')}")
return "db_completed"
# 3. Early file existence check - predict filepath before expensive extraction
predicted_filepath = self._predict_filepath(episode_info)
if predicted_filepath and os.path.exists(predicted_filepath):
logger.info(f"[Skip] File already exists: {predicted_filepath}")
# Update DB status to completed if not already
if db_entity is not None and db_entity.status != "completed":
db_entity.status = "completed"
db_entity.filepath = predicted_filepath
db_entity.save()
return "file_exists"
# 4. Proceed with queue addition
logger.debug(f"episode_info:: {episode_info}")
if db_entity is None: if db_entity is None:
entity = Ohli24QueueEntity(P, self, episode_info) entity = Ohli24QueueEntity(P, self, episode_info)
entity.proxy = LogicOhli24.get_proxy() entity.proxy = LogicOhli24.get_proxy()
logger.debug("entity:::> %s", entity.as_dict()) logger.debug("entity:::> %s", entity.as_dict())
ModelOhli24Item.append(entity.as_dict()) ModelOhli24Item.append(entity.as_dict())
# # logger.debug("entity:: type >> %s", type(entity))
#
self.queue.add_queue(entity) self.queue.add_queue(entity)
# P.logger.debug(F.config['path_data'])
# P.logger.debug(self.headers)
# filename = os.path.basename(entity.filepath)
# ffmpeg = SupportFfmpeg(entity.url, entity.filename, callback_function=self.callback_function,
# max_pf_count=0,
# save_path=entity.savepath, timeout_minute=60, headers=self.headers)
# ret = {'ret': 'success'}
# ret['json'] = ffmpeg.start()
return "enqueue_db_append" return "enqueue_db_append"
elif db_entity.status != "completed": else:
# db_entity exists but status is not completed
entity = Ohli24QueueEntity(P, self, episode_info) entity = Ohli24QueueEntity(P, self, episode_info)
entity.proxy = LogicOhli24.get_proxy() entity.proxy = LogicOhli24.get_proxy()
logger.debug("entity:::> %s", entity.as_dict()) logger.debug("entity:::> %s", entity.as_dict())
# P.logger.debug(F.config['path_data'])
# P.logger.debug(self.headers)
# filename = os.path.basename(entity.filepath)
# ffmpeg = SupportFfmpeg(entity.url, entity.filename, callback_function=self.callback_function,
# max_pf_count=0, save_path=entity.savepath, timeout_minute=60,
# headers=self.headers)
# ret = {'ret': 'success'}
# ret['json'] = ffmpeg.start()
self.queue.add_queue(entity) self.queue.add_queue(entity)
return "enqueue_db_exist" return "enqueue_db_exist"
def _predict_filepath(self, episode_info: Dict[str, Any]) -> Optional[str]:
"""Predict the output filepath from episode info WITHOUT expensive site access.
Uses glob pattern to match any quality variant (720p, 1080p, etc.)."""
try:
import glob
title = episode_info.get("title", "")
if not title:
return None
# Parse title pattern: "제목 N기 M화" or "제목 M화"
match = re.compile(
r"(?P<title>.*?)\s*((?P<season>\d+)기)?\s*((?P<epi_no>\d+)화)"
).search(title)
if match:
content_title = match.group("title").strip()
season = int(match.group("season")) if match.group("season") else 1
epi_no = int(match.group("epi_no"))
# Use glob pattern for quality: *-OHNI24.mp4 matches any quality
filename_pattern = "%s.S%sE%s.*-OHNI24.mp4" % (
content_title,
"0%s" % season if season < 10 else season,
"0%s" % epi_no if epi_no < 10 else epi_no,
)
else: else:
return "db_completed" # Fallback pattern for non-standard titles
filename_pattern = "%s.*-OHNI24.mp4" % title
# Sanitize pattern (but keep glob wildcards)
filename_pattern = Util.change_text_for_use_filename(filename_pattern)
# Get save path
savepath = P.ModelSetting.get("ohli24_download_path")
if not savepath:
return None
# Check auto folder option
if P.ModelSetting.get_bool("ohli24_auto_make_folder"):
day = episode_info.get("day", "")
content_title_clean = match.group("title").strip() if match else title
if "완결" in day:
folder_name = "%s %s" % (
P.ModelSetting.get("ohli24_finished_insert"),
content_title_clean,
)
else:
folder_name = content_title_clean
folder_name = Util.change_text_for_use_filename(folder_name.strip())
savepath = os.path.join(savepath, folder_name)
if P.ModelSetting.get_bool("ohli24_auto_make_season_folder"):
season_val = int(match.group("season")) if match and match.group("season") else 1
savepath = os.path.join(savepath, "Season %s" % season_val)
# Use glob to find any matching file
full_pattern = os.path.join(savepath, filename_pattern)
matching_files = glob.glob(full_pattern)
if matching_files:
# Return first matching file
logger.debug(f"Found existing file: {matching_files[0]}")
return matching_files[0]
return None
except Exception as e:
logger.debug(f"_predict_filepath error: {e}")
return None
def is_exist(self, info: Dict[str, Any]) -> bool: def is_exist(self, info: Dict[str, Any]) -> bool:
# print(self.queue) # print(self.queue)
@@ -1317,6 +1382,7 @@ class LogicOhli24(AnimeModuleBase):
return True return True
return False return False
def callback_function(self, **args: Any) -> None: def callback_function(self, **args: Any) -> None:
logger.debug(f"callback_function invoked with args: {args}") logger.debug(f"callback_function invoked with args: {args}")
if 'status' in args: if 'status' in args: