from pathlib import Path import re import sys from breadtube_bot.unidecode import DISCORD_PATTERN UNIDECODE_DICT: dict[int, list[str]] = {} # Reads perl files from https://metacpan.org/pod/Text::Unidecode project def generate_dict(data_path: Path): quote_pattern = re.compile(r'[\'"]([^\'"]*)["\']') perl_quote = re.compile(r'q+\{([^\}]*)\}') begin_comment_pattern = re.compile(r'^ *#') end_comment_pattern = re.compile(r', *#(.*)$') def perl_quote_repl(match_object): new_char = match_object.group(1).replace('"', '_').replace("'", '_') return f"'{new_char}'" for file_path in sorted(data_path.glob('*.pm')): content = file_path.read_text().strip() if 'make_placeholder_map' in content: continue section = int(f'0{file_path.stem}', 16) content = ''.join([end_comment_pattern.sub(',', line).replace('~', '-') for line in content.splitlines() if line and begin_comment_pattern.match(line) is None]) content = perl_quote.sub(perl_quote_repl, content) replace_chars: list[str] = [] for index, char in enumerate(quote_pattern.findall(content)): if char == '[?]' or (section == 0 and index <= 0x80): # noqa: PLR2004 replace_chars.append('_') else: replace_chars.append(DISCORD_PATTERN.sub('_', char).lower()) assert len(replace_chars) == 256, f'Wrong size for {file_path.name}: {len(replace_chars)}' # noqa: PLR2004 UNIDECODE_DICT[section] = replace_chars if __name__ == '__main__': data_path = Path('data/unidecode') if not data_path.exists(): print(f'No data found at path: {data_path}') sys.exit(1) if not data_path.is_dir(): print(f'Path "{data_path}" is not a folder') sys.exit(1) generate_dict(data_path) with Path('breadtube_bot/unidecode_data.py').open(mode='w', encoding='utf-8') as unidecode_file: unidecode_file.write('UNIDECODE_DICT: dict[int, list[str]] = {\n') for key, value in UNIDECODE_DICT.items(): # noqa: FURB122 unidecode_file.write(f' {key}: {value},\n') unidecode_file.write('}\n')