Files
youtube-dl/cleanup_duplicates.py

53 lines
2.0 KiB
Python

import sys
import os
import unicodedata
sys.path.append('/Volumes/WD/Users/Work/python/flaskfarm')
from framework import app, db
from system.logic import SystemLogic
# 플러그인 모듈 로드
from data.plugins.gds_dviewer.model_file_index import FileIndex
def cleanup_duplicates(parent_path):
with app.app_context():
# 해당 폴더의 모든 항목 조회
items = FileIndex.query.filter_by(parent_path=parent_path).all()
print(f"Total items in {parent_path}: {len(items)}")
# NFC 이름 기준으로 그룹화
groups = {}
for item in items:
nfc_name = unicodedata.normalize('NFC', item.name)
if nfc_name not in groups:
groups[nfc_name] = []
groups[nfc_name].append(item)
deleted_count = 0
for name, group in groups.items():
if len(group) > 1:
print(f"Found duplicate: {name} (Count: {len(group)})")
# 우선순위: 메타데이터 있는 것 > ID가 작은 것(오래된 것)
# 정렬: 메타데이터 있나? (내림차순 True=1, False=0), ID (오름차순)
group.sort(key=lambda x: (1 if x.meta_id else 0, -x.id), reverse=True)
# 첫 번째(가장 좋은 것)를 남기고 나머지 삭제
keep = group[0]
remove_list = group[1:]
print(f" Keep: ID={keep.id}, Meta={keep.meta_id}, Name={keep.name}")
for rm in remove_list:
print(f" REMOVE: ID={rm.id}, Meta={rm.meta_id}, Name={rm.name}")
db.session.delete(rm)
deleted_count += 1
if deleted_count > 0:
db.session.commit()
print(f"Deleted {deleted_count} duplicate items.")
else:
print("No duplicates found to delete.")
if __name__ == "__main__":
cleanup_duplicates('VIDEO/방송중/라프텔 애니메이션')