Subscription update

This commit is contained in:
BreadTube 2025-09-29 23:34:47 +09:00 committed by Corentin
commit f510cc5aef
4 changed files with 182 additions and 81 deletions

View file

@ -7,24 +7,30 @@ from pathlib import Path
import re import re
import time import time
import tomllib import tomllib
from typing import Any from typing import Any, TYPE_CHECKING
import traceback import traceback
from .config import Config from .config import Config
from .discord_manager import DiscordManager from .discord_manager import DiscordManager
from .logger import create_logger from .logger import create_logger
from .objects import (ChannelCategory, FileMime, Message, MessageReference, MessageReferenceType, Overwrite, from .objects import (ChannelCategory, FileMime, Message, MessageReference, MessageReferenceType, Overwrite,
OverwriteType, Permissions, Role, TextChannel) OverwriteType, Permissions, Role, TextChannel)
from .youtube_manager import YoutubeManager from .youtube_manager import YoutubeManager
from .youtube_subscription import SUBSCRIPTION_FILE_COLUMNS, SubscriptionHelper, Subscriptions from .youtube_subscription import SUBSCRIPTION_FILE_COLUMNS, SubscriptionHelper, SubscriptionInfo, Subscriptions
if TYPE_CHECKING:
from breadtube_bot.youtube_objects import SearchResultItem
class Bot: class Bot:
DEFAULT_MESSAGE_LIST_LIMIT = 50 DEFAULT_MESSAGE_LIST_LIMIT = 50
DISCORD_NAME_REGEX = r'([^a-z])'
INIT_MESSAGE = ('Bot initialized.\nThis is the current configuration used.\n' INIT_MESSAGE = ('Bot initialized.\nThis is the current configuration used.\n'
'You can upload a new one to update the configuration.') 'You can upload a new one to update the configuration.')
MAX_DOWNLOAD_SIZE: int = 50_000 MAX_DOWNLOAD_SIZE: int = 50_000
SUBS_LIST_MIN_SIZE = 50
SUBS_LIST_SHORTS_RATIO = 5
SUBS_LIST_VIDEO_RATIO = 2
class Task(Enum): class Task(Enum):
DELETE_MESSAGES = 1 DELETE_MESSAGES = 1
@ -40,7 +46,7 @@ class Bot:
def __init__(self, bot_token: str, guild_id: int, yt_api_key: str, config: Config | None = None, def __init__(self, bot_token: str, guild_id: int, yt_api_key: str, config: Config | None = None,
log_level: int = logging.INFO): log_level: int = logging.INFO):
self.config = config or Config() self.config: Config = config or Config()
self.guild_id = guild_id self.guild_id = guild_id
self.logger = create_logger('breadtube', log_level, stdout=True) self.logger = create_logger('breadtube', log_level, stdout=True)
@ -115,7 +121,7 @@ class Bot:
deny=Permissions.NONE)]}, deny=Permissions.NONE)]},
request_timeout=self.config.request_timeout) request_timeout=self.config.request_timeout)
def _get_bot_channel_messages(self) -> list[Message]: def _get_all_channel_messages(self, channel: TextChannel) -> list[Message]:
messages_id_delete_task: set[int] = set() messages_id_delete_task: set[int] = set()
for task_type, _, task_params in self.tasks: for task_type, _, task_params in self.tasks:
if task_type == self.Task.DELETE_MESSAGES: if task_type == self.Task.DELETE_MESSAGES:
@ -125,7 +131,7 @@ class Bot:
messages: list[Message] = [] messages: list[Message] = []
while True: while True:
message_batch = self.discord_manager.list_text_channel_messages( message_batch = self.discord_manager.list_text_channel_messages(
self.bot_channel, request_timeout=self.config.request_timeout, after_id=last_message_id) channel, request_timeout=self.config.request_timeout, after_id=last_message_id)
messages.extend([m for m in message_batch if m.id not in messages_id_delete_task]) messages.extend([m for m in message_batch if m.id not in messages_id_delete_task])
if len(message_batch) < self.DEFAULT_MESSAGE_LIST_LIMIT: if len(message_batch) < self.DEFAULT_MESSAGE_LIST_LIMIT:
break break
@ -133,7 +139,7 @@ class Bot:
return messages return messages
def _scan_bot_channel(self): # noqa: PLR0915 def _scan_bot_channel(self): # noqa: PLR0915
messages = self._get_bot_channel_messages() messages = self._get_all_channel_messages(self.bot_channel)
init_message_found = False init_message_found = False
new_config: Config | None = None new_config: Config | None = None
new_subscriptions: Subscriptions | None = None new_subscriptions: Subscriptions | None = None
@ -277,30 +283,9 @@ class Bot:
time.time() + self.config.bot_message_duration, time.time() + self.config.bot_message_duration,
list(delayed_delete.values()))) list(delayed_delete.values())))
def _init_subs(self): def _get_subscription_channel(self, subscription: SubscriptionInfo, channel_dict: dict[str, TextChannel],
categories, text_channel = self.discord_manager.list_channels( category_ranges: list[tuple[int, int, ChannelCategory]]) -> TextChannel:
self.guild_id, request_timeout=self.config.request_timeout) discord_name = re.sub(self.DISCORD_NAME_REGEX, '-', subscription.name.lower())
self.guild_text_channels = text_channel
self.guild_categories = categories
channel_dict: dict[str, TextChannel] = {c.name or '': c for c in self.guild_text_channels}
unmanaged_categories: set[str] = set(self.config.unmanaged_categories.split(','))
category_ranges: list[tuple[int, int, ChannelCategory]] = []
for category in self.guild_categories:
if category.name in unmanaged_categories:
self.logger.debug('Skipping unmanaged category: %s', category.name)
continue
range_info = (category.name or '').split('-')
if len(range_info) != 2: # noqa: PLR2004
self.logger.warning('Cannot compute range for category: %s', category.name)
continue
category_ranges.append((ord(range_info[0].lower()), ord(range_info[1].lower()), category))
category_ranges = sorted(category_ranges, key=operator.itemgetter(0))
name_regex = r'([^a-z])'
for sub_info in self._yt_subscriptions.values():
discord_name = sub_info.name.lower()
discord_name = re.sub(name_regex, '-', discord_name)
category_value = ord(discord_name[0]) category_value = ord(discord_name[0])
sub_channel: TextChannel | None = channel_dict.get(discord_name) sub_channel: TextChannel | None = channel_dict.get(discord_name)
if sub_channel is None: if sub_channel is None:
@ -322,21 +307,123 @@ class Bot:
allow=Permissions.VIEW_CHANNEL | Permissions.SEND_MESSAGES, allow=Permissions.VIEW_CHANNEL | Permissions.SEND_MESSAGES,
deny=Permissions.NONE)]}, deny=Permissions.NONE)]},
request_timeout=self.config.request_timeout) request_timeout=self.config.request_timeout)
return sub_channel
def _refresh_subscription(self, subscription: SubscriptionInfo):
_, yt_video_info = self.yt_manager.request_channel_videos(
channel_id=subscription.channel_id,
max_results=self.SUBS_LIST_SHORTS_RATIO * self.config.youtube_channel_video_count,
request_timeout=self.config.request_timeout)
video_ids = {v.id.videoId for v in subscription.shorts_list + subscription.video_list}
for yt_info in yt_video_info.items:
if yt_info.id.videoId in video_ids:
continue
if self.yt_manager.is_shorts(yt_info.id.videoId, request_timeout=self.config.request_timeout):
subscription.shorts_list.append(yt_info)
else:
subscription.video_list.append(yt_info)
video_ids.add(yt_info.id.videoId)
internal_size = min(self.SUBS_LIST_MIN_SIZE,
self.SUBS_LIST_SHORTS_RATIO * self.config.youtube_channel_video_count)
subscription.shorts_list = sorted(
subscription.shorts_list, key=lambda x: x.snippet.publishTime, reverse=True)[:internal_size]
internal_size = min(self.SUBS_LIST_MIN_SIZE,
self.SUBS_LIST_VIDEO_RATIO * self.config.youtube_channel_video_count)
subscription.video_list = sorted(
subscription.video_list, key=lambda x: x.snippet.publishTime, reverse=True)[:internal_size]
subscription.last_update = time.time()
@staticmethod
def _video_message_content(video: SearchResultItem) -> str:
return f'https://www.youtube.com/video/{video.id.videoId}'
def _init_subs(self):
self.logger.info('Initialize all subs')
categories, text_channel = self.discord_manager.list_channels(
self.guild_id, request_timeout=self.config.request_timeout)
self.guild_text_channels = text_channel
self.guild_categories = categories
channel_dict: dict[str, TextChannel] = {c.name or '': c for c in self.guild_text_channels}
unmanaged_categories: set[str] = set(self.config.unmanaged_categories.split(','))
category_ranges: list[tuple[int, int, ChannelCategory]] = []
for category in self.guild_categories:
if category.name in unmanaged_categories:
self.logger.debug('Skipping unmanaged category: %s', category.name)
continue
range_info = (category.name or '').split('-')
if len(range_info) != 2: # noqa: PLR2004
self.logger.warning('Cannot compute range for category: %s', category.name)
continue
category_ranges.append((ord(range_info[0].lower()), ord(range_info[1].lower()), category))
category_ranges = sorted(category_ranges, key=operator.itemgetter(0))
for sub_info in self._yt_subscriptions.values():
try:
sub_channel = self._get_subscription_channel(sub_info, channel_dict, category_ranges)
except RuntimeError as error:
self.logger.error(error)
continue
if sub_info.channel_info is None: if sub_info.channel_info is None:
_, channel_info = self.yt_manager.request_channel_info( _, channel_info = self.yt_manager.request_channel_info(
sub_info.channel_id, request_timeout=self.config.request_timeout) sub_info.channel_id, request_timeout=self.config.request_timeout)
if not channel_info.items: if not channel_info.items:
self.logger.error('No channel info return from YouTube API for channel: %s', discord_name) raise RuntimeError('No channel info return from YouTube API for channel: %s', sub_channel.name)
continue
sub_info.channel_info = channel_info.items[0].snippet sub_info.channel_info = channel_info.items[0].snippet
channel_url = f'https://www.youtube.com/{sub_info.channel_info.customUrl}'
self._refresh_subscription(sub_info)
sub_init_message = f'https://www.youtube.com/{sub_info.channel_info.customUrl}'
sub_messages = self._get_all_channel_messages(sub_channel)
if not sub_messages or sub_messages[-1].content != sub_init_message:
self.logger.debug('Clearing sub channel: %s', sub_channel.name)
for message in sub_messages:
self.discord_manager.delete_message(message, request_timeout=self.config.request_timeout)
_ = self.discord_manager.create_message( _ = self.discord_manager.create_message(
sub_channel, {'content': channel_url}, request_timeout=self.config.request_timeout) sub_channel, {'content': sub_init_message}, request_timeout=self.config.request_timeout)
else:
messages = list(reversed(sub_messages[:-1][:self.config.youtube_channel_video_count]))
yt_videos = list(reversed(sub_info.video_list[:self.config.youtube_channel_video_count]))
immediate_delete: dict[int, Message] = {
m.id: m for m in sub_messages[self.config.youtube_channel_video_count:-1]}
last_matching_index = 0
stop_scan = False
for yt_video in yt_videos:
for index, message in enumerate(messages[last_matching_index:], start=last_matching_index):
if message.content != self._video_message_content(yt_video):
if last_matching_index != 0:
stop_scan = True
break
self.logger.debug('Unmatched video: %s', yt_video.id.videoId)
immediate_delete[message.id] = message
else:
self.logger.debug('Matched video: %s', yt_video.id.videoId)
last_matching_index = index + 1
break
else:
self.logger.debug('All videos scanned')
break
if stop_scan:
break
for message in messages[last_matching_index:]:
immediate_delete[message.id] = message
for message in immediate_delete.values():
try:
self.discord_manager.delete_message(message, request_timeout=self.config.request_timeout)
except RuntimeError as error:
self.logger.error('Error deleting message %s from channel %s : %s',
message.id, sub_channel.name, error)
for video in yt_videos[last_matching_index:]:
_ = self.discord_manager.create_message(
sub_channel, {'content': self._video_message_content(video)},
request_timeout=self.config.request_timeout)
sub_info.last_update = time.time() sub_info.last_update = time.time()
def run(self): def run(self):
while True: while self.tasks:
if self.tasks:
self.tasks = sorted(self.tasks, key=operator.itemgetter(1), reverse=True) self.tasks = sorted(self.tasks, key=operator.itemgetter(1), reverse=True)
task_type, task_time, task_params = self.tasks.pop() task_type, task_time, task_params = self.tasks.pop()
sleep_time = task_time - time.time() sleep_time = task_time - time.time()
@ -374,4 +461,5 @@ class Bot:
except Exception as error: except Exception as error:
self.logger.error('Error initializing subscriptions : %s -> %s', self.logger.error('Error initializing subscriptions : %s -> %s',
error, traceback.format_exc().replace('\n', ' | ')) error, traceback.format_exc().replace('\n', ' | '))
time.sleep(1) self.tasks.append((
self.Task.INIT_SUBS, time.time() + self.config.youtube_channel_refresh_interval, None))

View file

@ -1,13 +1,14 @@
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
import http.client
import json import json
import time import time
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
import urllib.error import urllib.error
import urllib.request import urllib.request
from .yt_objects import ChannelResult, SearchResult from .youtube_objects import ChannelResult, SearchResult
if TYPE_CHECKING: if TYPE_CHECKING:
@ -17,6 +18,7 @@ if TYPE_CHECKING:
class YoutubeManager: class YoutubeManager:
DEFAULT_DAILY_REQUESTS = 10_000 DEFAULT_DAILY_REQUESTS = 10_000
SHORTS_CHECK_STATUS = 303
@dataclass @dataclass
class RateLimit: class RateLimit:
@ -56,6 +58,15 @@ class YoutubeManager:
except TimeoutError as error: except TimeoutError as error:
raise RuntimeError(f'Timeout calling YT API ({url}): {error}') from error raise RuntimeError(f'Timeout calling YT API ({url}): {error}') from error
def is_shorts(self, video_id: str, request_timeout: float) -> bool:
try:
connection = http.client.HTTPSConnection('www.youtube.com', timeout=request_timeout)
connection.request('GET', f'/shorts/{video_id}')
response = connection.getresponse()
return response.status != self.SHORTS_CHECK_STATUS
except Exception as error:
raise RuntimeError(f'Exception calling YouTube shorts ({video_id}): {error}') from error
def request_channel_info(self, channel_id: str, request_timeout: float) -> tuple[ def request_channel_info(self, channel_id: str, request_timeout: float) -> tuple[
HTTPHeaders, ChannelResult]: HTTPHeaders, ChannelResult]:
url = ('https://www.googleapis.com/youtube/v3/channels?part=snippet' url = ('https://www.googleapis.com/youtube/v3/channels?part=snippet'
@ -65,7 +76,7 @@ class YoutubeManager:
def request_channel_videos(self, channel_id: str, max_results: int, request_timeout: float) -> tuple[ def request_channel_videos(self, channel_id: str, max_results: int, request_timeout: float) -> tuple[
HTTPHeaders, SearchResult]: HTTPHeaders, SearchResult]:
url = ('https://www.googleapis.com/youtube/v3/search?part=snippet' url = (f'https://www.googleapis.com/youtube/v3/search?part=snippet&channelId={channel_id}'
f'&channelId={channel_id}&maxResults={max_results}&order=date&key={self._api_key}') f'&maxResults={max_results}&order=date&type=video&key={self._api_key}')
headers, info = self._request(url=url, request_timeout=request_timeout) headers, info = self._request(url=url, request_timeout=request_timeout)
return headers, SearchResult.from_dict(info) return headers, SearchResult.from_dict(info)

View file

@ -1,6 +1,6 @@
from dataclasses import dataclass from dataclasses import dataclass, field
from .yt_objects import ChannelSnippet from .youtube_objects import ChannelSnippet, SearchResultItem
@dataclass @dataclass
@ -9,6 +9,8 @@ class SubscriptionInfo:
channel_id: str channel_id: str
last_update: float last_update: float
channel_info: ChannelSnippet | None = None channel_info: ChannelSnippet | None = None
shorts_list: list[SearchResultItem] = field(default_factory=list)
video_list: list[SearchResultItem] = field(default_factory=list)
Subscriptions = dict[str, SubscriptionInfo] Subscriptions = dict[str, SubscriptionInfo]