import re import traceback from . import logger class SupportString(object): @classmethod def get_cate_char_by_first(cls, title): # get_first value = ord(title[0].upper()) if ord('가') <= value < ord('나'): return '가' if ord('나') <= value < ord('다'): return '나' if ord('다') <= value < ord('라'): return '다' if ord('라') <= value < ord('마'): return '라' if ord('마') <= value < ord('바'): return '마' if ord('바') <= value < ord('사'): return '바' if ord('사') <= value < ord('아'): return '사' if ord('아') <= value < ord('자'): return '아' if ord('자') <= value < ord('차'): return '자' if ord('차') <= value < ord('카'): return '차' if ord('카') <= value < ord('타'): return '카' if ord('타') <= value < ord('파'): return '타' if ord('파') <= value < ord('하'): return '파' if ord('하') <= value < ord('힣'): return '하' return '0Z' @classmethod def is_include_hangul(cls, text): try: hanCount = len(re.findall(u'[\u3130-\u318F\uAC00-\uD7A3]+', text)) return hanCount > 0 except: return False @classmethod def language_info(cls, text): try: text = text.strip().replace(' ', '') all_count = len(text) han_count = len(re.findall('[\u3130-\u318F\uAC00-\uD7A3]', text)) eng_count = len(re.findall('[a-zA-Z]', text)) etc_count = len(re.findall('[0-9]', text)) etc_count += len(re.findall('[-=+,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》:]', text)) if all_count == etc_count: return (0,0) han_percent = int(han_count * 100 / (all_count-etc_count)) eng_percent = int(eng_count * 100 / (all_count-etc_count)) return (han_percent, eng_percent) except Exception as e: logger.error(f"Exception:{str(e)}") logger.error(traceback.format_exc()) return False @classmethod def remove_special_char(cls, text): return re.sub('[-=+,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》:]', '', text) @classmethod def remove_emoji(cls, text, char=''): import re emoji_pattern = re.compile("[" u"\U0001F600-\U0001F64F" # emoticons u"\U0001F300-\U0001F5FF" # symbols & pictographs u"\U0001F680-\U0001F6FF" # transport & map symbols u"\U0001F1E0-\U0001F1FF" # flags (iOS) u"\U00002500-\U00002BEF" # chinese char u"\U00002702-\U000027B0" u"\U00002702-\U000027B0" #u"\U000024C2-\U0001F251" u"\U0001f926-\U0001f937" u"\U00010000-\U0010ffff" u"\u2640-\u2642" u"\u2600-\u2B55" u"\u200d" u"\u23cf" u"\u23e9" u"\u231a" u"\ufe0f" # dingbats u"\u3030" "]+", flags=re.UNICODE) # Remove emojis from the text text = emoji_pattern.sub(char, text) return text