From 40856d7cd9284d7844b6d1036850386cf68aec83 Mon Sep 17 00:00:00 2001 From: "Karina J. Kwiatek" Date: Mon, 5 Aug 2024 22:23:15 +0200 Subject: [PATCH 1/6] Separate out bot --- main.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..e69de29 From dc80b8a4b4d0a9b2b3c1c246ab340d6493598dc3 Mon Sep 17 00:00:00 2001 From: "Karina J. Kwiatek" Date: Mon, 5 Aug 2024 22:27:18 +0200 Subject: [PATCH 2/6] Move main.py into src --- main.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 main.py diff --git a/main.py b/main.py deleted file mode 100644 index e69de29..0000000 From b6156e3fc67ce91e09de8afe59b2902b25263b6a Mon Sep 17 00:00:00 2001 From: "Karina J. Kwiatek" Date: Tue, 6 Aug 2024 01:14:38 +0200 Subject: [PATCH 3/6] Send blog posts from RSS feed into text channel --- requirements.in | 2 ++ requirements.txt | 8 +++++++ src/constants.py | 4 ++++ src/main.py | 13 ++++++++++- src/rss.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 src/rss.py diff --git a/requirements.in b/requirements.in index 85de664..da9cb1f 100644 --- a/requirements.in +++ b/requirements.in @@ -1,2 +1,4 @@ python-dotenv discord.py +feedparser +beautifulsoup4 diff --git a/requirements.txt b/requirements.txt index af0553f..0d72f01 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,8 +14,12 @@ async-timeout==4.0.3 # via aiohttp attrs==24.1.0 # via aiohttp +beautifulsoup4==4.12.3 + # via -r requirements.in discord-py==2.4.0 # via -r requirements.in +feedparser==6.0.11 + # via -r requirements.in frozenlist==1.4.1 # via # aiohttp @@ -28,5 +32,9 @@ multidict==6.0.5 # yarl python-dotenv==1.0.1 # via -r requirements.in +sgmllib3k==1.0.0 + # via feedparser +soupsieve==2.5 + # via beautifulsoup4 yarl==1.9.4 # via aiohttp diff --git a/src/constants.py b/src/constants.py index d129ec4..a8c41d3 100644 --- a/src/constants.py +++ b/src/constants.py @@ -23,3 +23,7 @@ TEAM_CATEGORY_NAME = "Team Channels" TEAM_VOICE_CATEGORY_NAME = "Team Voice Channels" TEAM_LEADER_ROLE = "Team Supervisor" + +FEED_URL = "https://studentrobotics.org/feed.xml" +FEED_CHANNEL_NAME = "blog" +FEED_CHECK_INTERVAL = 10 # seconds diff --git a/src/main.py b/src/main.py index 9e48628..ffb5d27 100644 --- a/src/main.py +++ b/src/main.py @@ -1,3 +1,4 @@ +import asyncio import os import sys import logging @@ -6,6 +7,7 @@ from discord import Intents from src.bot import BotClient +from rss import post_check_timer logger = logging.getLogger("srbot") logger.setLevel(logging.INFO) @@ -24,4 +26,13 @@ exit(1) bot = BotClient(logger=logger, intents=intents) - bot.run(token) + loop = asyncio.get_event_loop() + +try: + loop.create_task(post_check_timer(bot)) + loop.run_until_complete(bot.start(token)) +except KeyboardInterrupt: + loop.run_until_complete(bot.close()) + # cancel all tasks lingering +finally: + loop.close() diff --git a/src/rss.py b/src/rss.py new file mode 100644 index 0000000..710bb8d --- /dev/null +++ b/src/rss.py @@ -0,0 +1,58 @@ +import asyncio +from typing import Optional + +import discord +import feedparser +from bs4 import BeautifulSoup +from feedparser import FeedParserDict + +from src.bot import BotClient +from src.constants import FEED_URL, FEED_CHECK_INTERVAL, FEED_CHANNEL_NAME + + +def get_feed_channel(bot: BotClient) -> discord.TextChannel: + for channel in bot.get_all_channels(): + if channel.name == FEED_CHANNEL_NAME: + return channel + + +async def get_last_blog_post(channel: discord.TextChannel) -> str | None: + # TODO: This doesn't work when the bot is restarted, store the URL instead + last_message: Optional[discord.Message] = channel.last_message + if last_message is not None and len(last_message.embeds) > 0: + return last_message.embeds[0].url + + return None + + +async def check_posts(bot: BotClient): + feed = feedparser.parse(FEED_URL) + channel = get_feed_channel(bot) + post = feed.entries[0] + newest_post_url = post.link + last_message_url = await get_last_blog_post(channel) + if newest_post_url != last_message_url: + await channel.send(embed=create_embed(post)) + + +def create_embed(post: FeedParserDict) -> discord.Embed: + soup = BeautifulSoup(post.content[0].value, 'html.parser') + + embed = discord.Embed( + title=post.title, + type="article", + url=post.link, + description=soup.p.text, + ) + + if len(post.media_thumbnail) > 0: + embed.set_image(url=post.media_thumbnail[0]['url']) + + return embed + + +async def post_check_timer(bot: BotClient): + await bot.wait_until_ready() + while True: + await check_posts(bot) + await asyncio.sleep(FEED_CHECK_INTERVAL) From caf06a7174e56d621e168dc7cad2aa910277a0a4 Mon Sep 17 00:00:00 2001 From: "Karina J. Kwiatek" Date: Tue, 6 Aug 2024 10:57:21 +0200 Subject: [PATCH 4/6] Keep track of seen blog posts --- .gitignore | 3 ++- src/rss.py | 25 +++++++++++++++---------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index 15cbfe1..7686201 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .vscode/* log.log -.env \ No newline at end of file +.env +seen_posts.txt diff --git a/src/rss.py b/src/rss.py index 710bb8d..d181482 100644 --- a/src/rss.py +++ b/src/rss.py @@ -1,5 +1,6 @@ import asyncio -from typing import Optional +import os +from typing import List import discord import feedparser @@ -16,23 +17,27 @@ def get_feed_channel(bot: BotClient) -> discord.TextChannel: return channel -async def get_last_blog_post(channel: discord.TextChannel) -> str | None: - # TODO: This doesn't work when the bot is restarted, store the URL instead - last_message: Optional[discord.Message] = channel.last_message - if last_message is not None and len(last_message.embeds) > 0: - return last_message.embeds[0].url +def get_seen_posts() -> List[str]: + if os.path.exists('seen_posts.txt'): + with open('seen_posts.txt', 'r') as f: + return f.readlines() - return None + return [] + + +def add_seen_post(post_id: str) -> None: + with open('seen_posts.txt', 'a') as f: + f.write(post_id + '\n') async def check_posts(bot: BotClient): feed = feedparser.parse(FEED_URL) channel = get_feed_channel(bot) post = feed.entries[0] - newest_post_url = post.link - last_message_url = await get_last_blog_post(channel) - if newest_post_url != last_message_url: + + if post.id + "\n" not in get_seen_posts(): await channel.send(embed=create_embed(post)) + add_seen_post(post.id) def create_embed(post: FeedParserDict) -> discord.Embed: From 8eaa9079118aae68be0bfb68d727704769a4ab75 Mon Sep 17 00:00:00 2001 From: "Karina J. Kwiatek" Date: Tue, 6 Aug 2024 13:36:38 +0200 Subject: [PATCH 5/6] Use discordpy's tasks to check RSS --- src/bot.py | 18 ++++++++++++++++++ src/main.py | 13 +------------ src/rss.py | 21 +++------------------ 3 files changed, 22 insertions(+), 30 deletions(-) diff --git a/src/bot.py b/src/bot.py index 0e6b5d5..2f8e77d 100644 --- a/src/bot.py +++ b/src/bot.py @@ -15,6 +15,7 @@ ANNOUNCE_CHANNEL_NAME, WELCOME_CATEGORY_NAME, PASSWORDS_CHANNEL_NAME, + FEED_CHECK_INTERVAL, ) from src.commands.join import join from src.commands.team import ( @@ -26,6 +27,10 @@ create_team_channel, ) +from discord.ext import tasks + +from src.rss import check_posts + class BotClient(discord.Client): logger: logging.Logger @@ -66,6 +71,9 @@ async def setup_hook(self) -> None: self.tree.copy_global_to(guild=self.guild) await self.tree.sync(guild=self.guild) + async def setup_hook(self) -> None: + self.check_for_new_blog_posts.start() + async def on_ready(self) -> None: self.logger.info(f"{self.user} has connected to Discord!") guild = self.get_guild(self.guild.id) @@ -134,6 +142,16 @@ async def on_member_remove(self, member: discord.Member) -> None: await channel.delete() self.logger.info(f"Deleted channel '{channel.name}', because it has no users.") + @tasks.loop(seconds=FEED_CHECK_INTERVAL) + async def check_for_new_blog_posts(self): + self.logger.info("Checking for new blog posts") + await check_posts(self.get_guild(int(os.getenv('DISCORD_GUILD_ID')))) + + @check_for_new_blog_posts.before_loop + async def before_check_for_new_blog_posts(self): + await self.wait_until_ready() + + async def load_passwords(self) -> AsyncGenerator[Tuple[str, str], None]: """ Returns a mapping from role name to the password for that role. diff --git a/src/main.py b/src/main.py index ffb5d27..9e48628 100644 --- a/src/main.py +++ b/src/main.py @@ -1,4 +1,3 @@ -import asyncio import os import sys import logging @@ -7,7 +6,6 @@ from discord import Intents from src.bot import BotClient -from rss import post_check_timer logger = logging.getLogger("srbot") logger.setLevel(logging.INFO) @@ -26,13 +24,4 @@ exit(1) bot = BotClient(logger=logger, intents=intents) - loop = asyncio.get_event_loop() - -try: - loop.create_task(post_check_timer(bot)) - loop.run_until_complete(bot.start(token)) -except KeyboardInterrupt: - loop.run_until_complete(bot.close()) - # cancel all tasks lingering -finally: - loop.close() + bot.run(token) diff --git a/src/rss.py b/src/rss.py index d181482..2acb5a3 100644 --- a/src/rss.py +++ b/src/rss.py @@ -1,4 +1,3 @@ -import asyncio import os from typing import List @@ -7,14 +6,7 @@ from bs4 import BeautifulSoup from feedparser import FeedParserDict -from src.bot import BotClient -from src.constants import FEED_URL, FEED_CHECK_INTERVAL, FEED_CHANNEL_NAME - - -def get_feed_channel(bot: BotClient) -> discord.TextChannel: - for channel in bot.get_all_channels(): - if channel.name == FEED_CHANNEL_NAME: - return channel +from src.constants import FEED_URL, FEED_CHANNEL_NAME def get_seen_posts() -> List[str]: @@ -30,9 +22,9 @@ def add_seen_post(post_id: str) -> None: f.write(post_id + '\n') -async def check_posts(bot: BotClient): +async def check_posts(guild: discord.Guild) -> None: feed = feedparser.parse(FEED_URL) - channel = get_feed_channel(bot) + channel = discord.utils.get(guild.channels, name=FEED_CHANNEL_NAME) post = feed.entries[0] if post.id + "\n" not in get_seen_posts(): @@ -54,10 +46,3 @@ def create_embed(post: FeedParserDict) -> discord.Embed: embed.set_image(url=post.media_thumbnail[0]['url']) return embed - - -async def post_check_timer(bot: BotClient): - await bot.wait_until_ready() - while True: - await check_posts(bot) - await asyncio.sleep(FEED_CHECK_INTERVAL) From 87f5bca9e87597d6d72c764e77e1f37967b15dcc Mon Sep 17 00:00:00 2001 From: "Karina J. Kwiatek" Date: Tue, 6 Aug 2024 23:14:13 +0200 Subject: [PATCH 6/6] Fix lint errors --- requirements.in | 1 + requirements.txt | 4 ++++ script/requirements.txt | 18 ++++++++++++++++++ setup.cfg | 3 +++ src/bot.py | 24 ++++++++++++------------ src/rss.py | 11 +++++++---- 6 files changed, 45 insertions(+), 16 deletions(-) diff --git a/requirements.in b/requirements.in index da9cb1f..28b2b20c 100644 --- a/requirements.in +++ b/requirements.in @@ -2,3 +2,4 @@ python-dotenv discord.py feedparser beautifulsoup4 +types-beautifulsoup4 diff --git a/requirements.txt b/requirements.txt index 0d72f01..f3d3012 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,5 +36,9 @@ sgmllib3k==1.0.0 # via feedparser soupsieve==2.5 # via beautifulsoup4 +types-beautifulsoup4==4.12.0.20240511 + # via -r requirements.in +types-html5lib==1.1.11.20240806 + # via types-beautifulsoup4 yarl==1.9.4 # via aiohttp diff --git a/script/requirements.txt b/script/requirements.txt index 1cf361f..d68b71f 100644 --- a/script/requirements.txt +++ b/script/requirements.txt @@ -24,12 +24,16 @@ attrs==24.1.0 # via # -r script/../requirements.txt # aiohttp +beautifulsoup4==4.12.3 + # via -r script/../requirements.txt build==1.0.3 # via pip-tools click==8.1.7 # via pip-tools discord-py==2.4.0 # via -r script/../requirements.txt +feedparser==6.0.11 + # via -r script/../requirements.txt flake8==6.1.0 # via # -r script/requirements.in @@ -93,14 +97,28 @@ pyproject-hooks==1.0.0 # via build python-dotenv==1.0.1 # via -r script/../requirements.txt +sgmllib3k==1.0.0 + # via + # -r script/../requirements.txt + # feedparser six==1.16.0 # via flake8-tuple +soupsieve==2.5 + # via + # -r script/../requirements.txt + # beautifulsoup4 tomli==2.0.1 # via # build # mypy # pip-tools # pyproject-hooks +types-beautifulsoup4==4.12.0.20240511 + # via -r script/../requirements.txt +types-html5lib==1.1.11.20240806 + # via + # -r script/../requirements.txt + # types-beautifulsoup4 typing-extensions==4.7.1 # via mypy wheel==0.41.2 diff --git a/setup.cfg b/setup.cfg index d3f12e2..a6569d5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -52,3 +52,6 @@ strict_equality = True scripts_are_modules = True warn_unused_configs = True + +[mypy-feedparser.*] +ignore_missing_imports = True diff --git a/src/bot.py b/src/bot.py index 2f8e77d..3f2f860 100644 --- a/src/bot.py +++ b/src/bot.py @@ -6,16 +6,19 @@ import discord from discord import app_commands +from discord.ext import tasks +from src.rss import check_posts from src.constants import ( SPECIAL_ROLE, VERIFIED_ROLE, CHANNEL_PREFIX, VOLUNTEER_ROLE, + FEED_CHANNEL_NAME, + FEED_CHECK_INTERVAL, ANNOUNCE_CHANNEL_NAME, WELCOME_CATEGORY_NAME, PASSWORDS_CHANNEL_NAME, - FEED_CHECK_INTERVAL, ) from src.commands.join import join from src.commands.team import ( @@ -27,10 +30,6 @@ create_team_channel, ) -from discord.ext import tasks - -from src.rss import check_posts - class BotClient(discord.Client): logger: logging.Logger @@ -41,6 +40,7 @@ class BotClient(discord.Client): welcome_category: discord.CategoryChannel announce_channel: discord.TextChannel passwords_channel: discord.TextChannel + feed_channel: discord.TextChannel def __init__( self, @@ -54,7 +54,7 @@ def __init__( self.tree = app_commands.CommandTree(self) guild_id = os.getenv('DISCORD_GUILD_ID') if guild_id is None or not guild_id.isnumeric(): - logger.error("Invalid guild ID") + self.logger.error("Invalid guild ID") exit(1) self.guild = discord.Object(id=int(guild_id)) team = Team() @@ -70,8 +70,6 @@ async def setup_hook(self) -> None: # This copies the global commands over to your guild. self.tree.copy_global_to(guild=self.guild) await self.tree.sync(guild=self.guild) - - async def setup_hook(self) -> None: self.check_for_new_blog_posts.start() async def on_ready(self) -> None: @@ -88,6 +86,7 @@ async def on_ready(self) -> None: welcome_category = discord.utils.get(guild.categories, name=WELCOME_CATEGORY_NAME) announce_channel = discord.utils.get(guild.text_channels, name=ANNOUNCE_CHANNEL_NAME) passwords_channel = discord.utils.get(guild.text_channels, name=PASSWORDS_CHANNEL_NAME) + feed_channel = discord.utils.get(guild.text_channels, name=FEED_CHANNEL_NAME) if ( verified_role is None @@ -96,6 +95,7 @@ async def on_ready(self) -> None: or welcome_category is None or announce_channel is None or passwords_channel is None + or feed_channel is None ): logging.error("Roles and channels are not set up") exit(1) @@ -106,6 +106,7 @@ async def on_ready(self) -> None: self.welcome_category = welcome_category self.announce_channel = announce_channel self.passwords_channel = passwords_channel + self.feed_channel = feed_channel async def on_member_join(self, member: discord.Member) -> None: name = member.display_name @@ -143,15 +144,14 @@ async def on_member_remove(self, member: discord.Member) -> None: self.logger.info(f"Deleted channel '{channel.name}', because it has no users.") @tasks.loop(seconds=FEED_CHECK_INTERVAL) - async def check_for_new_blog_posts(self): + async def check_for_new_blog_posts(self) -> None: self.logger.info("Checking for new blog posts") - await check_posts(self.get_guild(int(os.getenv('DISCORD_GUILD_ID')))) + await check_posts(self.feed_channel) @check_for_new_blog_posts.before_loop - async def before_check_for_new_blog_posts(self): + async def before_check_for_new_blog_posts(self) -> None: await self.wait_until_ready() - async def load_passwords(self) -> AsyncGenerator[Tuple[str, str], None]: """ Returns a mapping from role name to the password for that role. diff --git a/src/rss.py b/src/rss.py index 2acb5a3..90e4e33 100644 --- a/src/rss.py +++ b/src/rss.py @@ -6,7 +6,7 @@ from bs4 import BeautifulSoup from feedparser import FeedParserDict -from src.constants import FEED_URL, FEED_CHANNEL_NAME +from src.constants import FEED_URL def get_seen_posts() -> List[str]: @@ -22,9 +22,8 @@ def add_seen_post(post_id: str) -> None: f.write(post_id + '\n') -async def check_posts(guild: discord.Guild) -> None: +async def check_posts(channel: discord.TextChannel) -> None: feed = feedparser.parse(FEED_URL) - channel = discord.utils.get(guild.channels, name=FEED_CHANNEL_NAME) post = feed.entries[0] if post.id + "\n" not in get_seen_posts(): @@ -34,12 +33,16 @@ async def check_posts(guild: discord.Guild) -> None: def create_embed(post: FeedParserDict) -> discord.Embed: soup = BeautifulSoup(post.content[0].value, 'html.parser') + text = "" + + if soup.p: + text = soup.p.text embed = discord.Embed( title=post.title, type="article", url=post.link, - description=soup.p.text, + description=text, ) if len(post.media_thumbnail) > 0: