Implement subscription from RSS feed

This commit is contained in:
BreadTube 2026-01-09 00:25:10 +09:00
commit 835b9a42a1
Signed by untrusted user who does not match committer: corentin
GPG key ID: 48C87E27C6C917F4
11 changed files with 185 additions and 271 deletions

View file

@ -10,7 +10,7 @@ from pathlib import Path
import re
import time
import tomllib
from typing import Any, TYPE_CHECKING
from typing import Any
import traceback
from .config import Config
@ -19,10 +19,8 @@ from .logger import create_logger
from .objects import (ChannelCategory, FileMime, Message, MessageReference, MessageReferenceType, Overwrite,
OverwriteType, Permissions, Role, TextChannel)
from .youtube_manager import YoutubeManager
from .youtube_subscription import SUBSCRIPTION_FILE_COLUMNS, SubscriptionHelper, SubscriptionInfo, Subscriptions
if TYPE_CHECKING:
from breadtube_bot.youtube_objects import SearchResultItem
from .youtube_subscription import (
SUBSCRIPTION_FILE_COLUMNS, SubscriptionHelper, SubscriptionInfo, Subscriptions, VideoInfo)
class Bot:
@ -31,9 +29,6 @@ class Bot:
INIT_MESSAGE: str = ('Bot initialized.\nThis is the current configuration used.\n'
'You can upload a new one to update the configuration.')
MAX_DOWNLOAD_SIZE: int = 50_000
SUBS_LIST_MIN_SIZE: int = 50
SUBS_LIST_SHORTS_RATIO: int = 5
SUBS_LIST_VIDEO_RATIO: int = 2
SUBS_SAVE_PATH: Path = Path('/tmp/breadtube-bot_subs.json')
class Task(Enum):
@ -48,7 +43,7 @@ class Bot:
raise RuntimeError('Cannot current bot version')
return tomllib.loads(pyproject_path.read_text(encoding='utf-8'))['project']['version']
def __init__(self, bot_token: str, guild_id: int, yt_api_key: str, config: Config | None = None,
def __init__(self, bot_token: str, guild_id: int, config: Config | None = None,
log_level: int = logging.INFO):
self.config: Config = config or Config()
self.guild_id = guild_id
@ -93,10 +88,16 @@ class Bot:
raise RuntimeError("Couldn't initialize bot channel/role/permission")
self.bot_channel: TextChannel = bot_channel
self.yt_manager = YoutubeManager(api_key=yt_api_key, logger=self.logger)
self._yt_subscriptions: Subscriptions = {
name: SubscriptionInfo.from_dict(info) for name, info in json.loads(
self.SUBS_SAVE_PATH.read_text(encoding='utf-8')).items()} if self.SUBS_SAVE_PATH.exists() else {}
self.yt_manager = YoutubeManager(logger=self.logger)
self._yt_subscriptions: Subscriptions = {}
if self.SUBS_SAVE_PATH.exists():
try:
self._yt_subscriptions = {
name: SubscriptionInfo.from_dict(info) for name, info in json.loads(
self.SUBS_SAVE_PATH.read_text(encoding='utf-8')).items()}
except Exception:
self.logger.error('Cannot load saved subscriptions at path "%s" -> deleting', self.SUBS_SAVE_PATH)
self.SUBS_SAVE_PATH.unlink()
self._scan_bot_channel()
self.tasks.append((
self.Task.SCAN_BOT_CHANNEL, time.time() + self.config.bot_channel_scan_interval, None))
@ -322,59 +323,42 @@ class Bot:
request_timeout=self.config.request_timeout)
return sub_channel
def _refresh_subscription(self, subscription: SubscriptionInfo):
_, yt_video_info = self.yt_manager.request_channel_videos(
channel_id=subscription.channel_id,
max_results=self.SUBS_LIST_SHORTS_RATIO * self.config.youtube_channel_video_count,
request_timeout=self.config.request_timeout)
video_ids = {v.id.videoId for v in subscription.shorts_list + subscription.video_list}
yt_connection = http.client.HTTPSConnection('www.youtube.com', timeout=self.config.request_timeout)
for yt_info in yt_video_info.items:
if yt_info.id.videoId in video_ids:
continue
if self.yt_manager.is_shorts(yt_connection, yt_info.id.videoId):
subscription.shorts_list.append(yt_info)
else:
subscription.video_list.append(yt_info)
video_ids.add(yt_info.id.videoId)
internal_size = min(self.SUBS_LIST_MIN_SIZE,
self.SUBS_LIST_SHORTS_RATIO * self.config.youtube_channel_video_count)
subscription.shorts_list = sorted(
subscription.shorts_list, key=lambda x: x.snippet.publishTime, reverse=True)[:internal_size]
internal_size = min(self.SUBS_LIST_MIN_SIZE,
self.SUBS_LIST_VIDEO_RATIO * self.config.youtube_channel_video_count)
subscription.video_list = sorted(
subscription.video_list, key=lambda x: x.snippet.publishTime, reverse=True)[:internal_size]
def _refresh_subscription(self, connection: http.client.HTTPSConnection, subscription: SubscriptionInfo):
_, yt_channel_info, yt_video_info = self.yt_manager.request_channel_videos(
connection=connection, channel_id=subscription.channel_id)
if subscription.channel_info is None:
subscription.channel_info = yt_channel_info
video_ids: set[str] = {v.video_id for v in subscription.video_list}
new_videos = [video for video in yt_video_info if video.video_id not in video_ids]
if new_videos:
subscription.video_list = sorted(
subscription.video_list + new_videos, key=lambda x: x.published,
reverse=True)[:self.config.youtube_channel_video_count]
subscription.last_update = time.time()
def _video_message_content(self, video: SearchResultItem) -> str:
def _video_message_content(self, video: VideoInfo, subscription: SubscriptionInfo) -> str:
return (self.config.youtube_channel_video_message
.replace('{{video_id}}', str(video.id.videoId))
.replace('{{video_title}}', str(html.unescape(video.snippet.title)))
.replace('{{video_description}}', str(video.snippet.description))
.replace('{{video_publish_time}}', video.snippet.publishTime.isoformat())
.replace('{{channel_id}}', str(video.snippet.channelId))
.replace('{{channel_title}}', str(video.snippet.channelTitle))
)
.replace('{{video_id}}', str(video.video_id))
.replace('{{video_title}}', str(html.unescape(video.title)))
.replace('{{video_description}}', str(video.description))
.replace('{{video_publish_time}}', video.published.isoformat())
.replace('{{channel_id}}', str(subscription.channel_info.channel_id)
if subscription.channel_info is not None else 'NO_CHANNEL_ID')
.replace('{{channel_title}}', str(subscription.channel_info.title
if subscription.channel_info is not None else 'NO_CHANNEL_TITLE')))
def _refresh_sub(self, subscription: SubscriptionInfo, channel_dict: dict[str, TextChannel],
category_ranges: list[tuple[int, int, ChannelCategory]]):
def _refresh_sub(self, connection: http.client.HTTPSConnection, subscription: SubscriptionInfo,
channel_dict: dict[str, TextChannel], category_ranges: list[tuple[int, int, ChannelCategory]]):
try:
sub_channel = self._get_subscription_channel(subscription, channel_dict, category_ranges)
except RuntimeError as error:
self.logger.error(error)
return
self._refresh_subscription(connection, subscription)
if subscription.channel_info is None:
_, channel_info = self.yt_manager.request_channel_info(
subscription.channel_id, request_timeout=self.config.request_timeout)
if not channel_info.items:
raise RuntimeError('No channel info return from YouTube API for channel: %s', sub_channel.name)
subscription.channel_info = channel_info.items[0].snippet
self._refresh_subscription(subscription)
sub_init_message = f'https://www.youtube.com/{subscription.channel_info.customUrl}'
raise RuntimeError('No channel info after refreshing subscription')
sub_init_message = f'https://www.youtube.com/{subscription.channel_info.url}'
sub_messages = self._get_all_channel_messages(sub_channel)
if not sub_messages or sub_messages[-1].content != sub_init_message:
self.logger.debug('Clearing sub channel: %s', sub_channel.name)
@ -391,14 +375,14 @@ class Bot:
stop_scan = False
for yt_video in yt_videos:
for index, message in enumerate(messages[last_matching_index:], start=last_matching_index):
if message.content != self._video_message_content(yt_video):
if message.content != self._video_message_content(yt_video, subscription):
if last_matching_index != 0:
stop_scan = True
break
self.logger.debug('Unmatched video: %s', yt_video.id.videoId)
self.logger.debug('Unmatched video: %s', yt_video.video_id)
immediate_delete[message.id] = message
else:
self.logger.debug('Matched video: %s', yt_video.id.videoId)
self.logger.debug('Matched video: %s', yt_video.video_id)
last_matching_index = index + 1
break
else:
@ -417,7 +401,7 @@ class Bot:
message.id, sub_channel.name, error)
for video in yt_videos[last_matching_index:]:
_ = self.discord_manager.create_message(
sub_channel, {'content': self._video_message_content(video)},
sub_channel, {'content': self._video_message_content(video, subscription)},
request_timeout=self.config.request_timeout)
subscription.last_update = time.time()
@ -443,15 +427,20 @@ class Bot:
category_ranges.append((ord(range_info[0].lower()), ord(range_info[1].lower()), category))
category_ranges = sorted(category_ranges, key=operator.itemgetter(0))
yt_connection = http.client.HTTPSConnection('www.youtube.com', timeout=self.config.request_timeout)
sorted_subs = sorted(self._yt_subscriptions.values(), key=lambda s: s.last_update)
for sub_info in sorted_subs:
try:
self._refresh_sub(sub_info, channel_dict, category_ranges)
self._refresh_sub(yt_connection, sub_info, channel_dict, category_ranges)
except RuntimeError as error:
self.logger.error('Refreshing subscription %s failed: %s', sub_info.channel_id, error)
except TimeoutError as error:
self.logger.error('Timeout error refreshing subcription: %s', error)
break
except Exception as error:
self.logger.error('Refreshing subscription %s unexpectedly failed: %s', sub_info.channel_id, error)
break
yt_connection.close()
self.logger.info('Subs refreshed')
def run(self):