breadtube-bot/generated_unidecode_data.py
BreadTube d015927861 Special characters handling
* Fix README for python version
* Add discord-friendly unidecode function for channel names (avoiding
  special characted)
* Check if "items" is present before accessing in channel id request
  response
2026-02-03 01:30:41 +09:00

54 lines
2.2 KiB
Python

from pathlib import Path
import re
import sys
from breadtube_bot.unidecode import DISCORD_PATTERN
UNIDECODE_DICT: dict[int, list[str]] = {}
# Reads perl files from https://metacpan.org/pod/Text::Unidecode project
def generate_dict(data_path: Path):
quote_pattern = re.compile(r'[\'"]([^\'"]*)["\']')
perl_quote = re.compile(r'q+\{([^\}]*)\}')
begin_comment_pattern = re.compile(r'^ *#')
end_comment_pattern = re.compile(r', *#(.*)$')
def perl_quote_repl(match_object):
new_char = match_object.group(1).replace('"', '_').replace("'", '_')
return f"'{new_char}'"
for file_path in sorted(data_path.glob('*.pm')):
content = file_path.read_text().strip()
if 'make_placeholder_map' in content:
continue
section = int(f'0{file_path.stem}', 16)
content = ''.join([end_comment_pattern.sub(',', line).replace('~', '-') for line in content.splitlines()
if line and begin_comment_pattern.match(line) is None])
content = perl_quote.sub(perl_quote_repl, content)
replace_chars: list[str] = []
for index, char in enumerate(quote_pattern.findall(content)):
if char == '[?]' or (section == 0 and index <= 0x80): # noqa: PLR2004
replace_chars.append('_')
else:
replace_chars.append(DISCORD_PATTERN.sub('_', char).lower())
assert len(replace_chars) == 256, f'Wrong size for {file_path.name}: {len(replace_chars)}' # noqa: PLR2004
UNIDECODE_DICT[section] = replace_chars
if __name__ == '__main__':
data_path = Path('data/unidecode')
if not data_path.exists():
print(f'No data found at path: {data_path}')
sys.exit(1)
if not data_path.is_dir():
print(f'Path "{data_path}" is not a folder')
sys.exit(1)
generate_dict(data_path)
with Path('breadtube_bot/unidecode_data.py').open(mode='w', encoding='utf-8') as unidecode_file:
unidecode_file.write('UNIDECODE_DICT: dict[int, list[str]] = {\n')
for key, value in UNIDECODE_DICT.items(): # noqa: FURB122
unidecode_file.write(f' {key}: {value},\n')
unidecode_file.write('}\n')