From 7ced687b714213445a4a89897762ada906859b54 Mon Sep 17 00:00:00 2001 From: Edward 'Toy' Facundo Date: Fri, 5 Jan 2024 14:44:01 +0000 Subject: [PATCH] add: adds opml podcast manipulation with tests --- .gitignore | 2 + podcasts/README.md | 55 +++++++++++ podcasts/opml.py | 180 ++++++++++++++++++++++++++++++++++++ podcasts/tests.py | 221 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 458 insertions(+) create mode 100644 podcasts/README.md create mode 100644 podcasts/opml.py create mode 100644 podcasts/tests.py diff --git a/.gitignore b/.gitignore index aa44ee2..e719d15 100644 --- a/.gitignore +++ b/.gitignore @@ -128,3 +128,5 @@ dmypy.json # Pyre type checker .pyre/ +# Podcasts Manager +*.opml \ No newline at end of file diff --git a/podcasts/README.md b/podcasts/README.md new file mode 100644 index 0000000..e51c6e2 --- /dev/null +++ b/podcasts/README.md @@ -0,0 +1,55 @@ +## Summary +The `OPMLManager` class is a class for managing OPML (Outline Processor Markup Language) files. It provides methods for reading, extracting feeds, adding and removing feeds, saving changes, and updating feed images. + +## Example Usage +```python +# Create an instance of OPMLManager +manager = OPMLManager('path/to/opml/file.opml') + +# Read the OPML file and get the root element +opml_root = manager.read_opml() + +# Extract feeds from the OPML file +feeds = manager.extract_feeds() + +# Add a new feed to the OPML file +manager.add_feed('New Feed', 'https://example.com/feed.xml') + +# Remove a feed from the OPML file +manager.remove_feed_by_url('https://example.com/feed.xml') + +# Save the changes made to the OPML file +manager.save_opml() + +# Get the image URL for a feed +image_url = manager.get_image_url('https://example.com/feed.xml') + +# Update the image for a feed +manager.update_image('https://example.com/image.jpg') +``` + +## Code Analysis +### Main functionalities +The main functionalities of the `OPMLManager` class are: +- Reading an OPML file and getting the root element. +- Extracting feeds from the OPML file and returning a list of dictionaries containing feed information. +- Adding a new feed to the OPML file. +- Removing a feed from the OPML file based on its URL. +- Saving the changes made to the OPML file. +- Getting the image URL for a feed based on its URL. +- Updating the image for a feed by downloading it from a provided URL and saving it locally. +___ +### Methods +- `__init__(self, file_path)`: Initializes the `OPMLManager` instance with the file path of the OPML file. +- `read_opml(self)`: Reads the OPML file and returns the root element. +- `extract_feeds(self)`: Extracts feeds from the OPML file and returns a list of dictionaries containing feed information. +- `add_feed(self, title, new_feed_url, type='rss', html=None, imageUrl=None)`: Adds a new feed to the OPML file. +- `remove_feed_by_url(self, feed_url)`: Removes a feed from the OPML file based on its URL. +- `save_opml(self)`: Saves the changes made to the OPML file. +- `get_image_url(self, feed_url)`: Returns the image URL for a feed based on its URL. +- `update_image(url)`: Updates the image for a feed by downloading it from the provided URL and saving it locally. +___ +### Fields +- `file_path (str)`: The file path of the OPML file. +- `opml_root (Element)`: The root element of the OPML file. +___ diff --git a/podcasts/opml.py b/podcasts/opml.py new file mode 100644 index 0000000..5005e3c --- /dev/null +++ b/podcasts/opml.py @@ -0,0 +1,180 @@ +import os +import os.path +import shutil +import xml.etree.ElementTree as ET +import requests +import hashlib + +""" +OPMLManager class for managing OPML files. + +This class provides methods for reading, extracting, adding, removing, and saving feeds in an OPML file. +It also includes a method for updating the image associated with a feed URL. + +Attributes: + file_path (str): The path to the OPML file. + opml_root (Element): The root element of the OPML file. + +Methods: + __init__(file_path): Initializes the OPMLManager object with the specified file path. + read_opml(): Reads the OPML file and returns the root element. + extract_feeds(): Extracts the feeds from the OPML file and returns a list of dictionaries. + add_feed(title, new_feed_url, type='rss', html=None, imageUrl=None): Adds a new feed to the OPML file. + remove_feed_by_url(feed_url): Removes a feed from the OPML file based on its URL. + save_opml(): Saves the changes made to the OPML file. + get_image_url(feed_url): Returns the image URL associated with a feed URL. + update_image(url): Updates the image associated with a feed URL by caching it locally. + +""" + + +class OPMLManager: + """ + A class for managing OPML (Outline Processor Markup Language) files. + + Attributes: + file_path (str): The file path of the OPML file. + opml_root (Element): The root element of the OPML file. + """ + + class OPMLParseError(Exception): + pass + + class CustomException(Exception): + pass + + class ImageDownloadError(Exception): + pass + + def __init__(self, file_path): + """ + Initializes the OPMLManager instance with the file path of the OPML file. + + Args: + file_path (str): The file path of the OPML file. + """ + self.file_path = file_path + self.opml_root = self.read_opml() + + def read_opml(self): + """ + Reads the OPML file and returns the root element. + + Returns: + Element: The root element of the OPML file. + """ + try: + tree = ET.parse(self.file_path) + root = tree.getroot() + return root + except ET.ParseError as e: + raise self.OPMLParseError(f"Error parsing OPML file: {e}") + + def extract_feeds(self): + """ + Extracts feeds from the OPML file and returns a list of dictionaries containing feed information. + + Returns: + list: A list of dictionaries containing feed information. + """ + feeds = [] + for outline in self.opml_root.findall(".//outline"): + feed = {} + feed['title'] = outline.get('text') + feed['type'] = outline.get('type') + feed['url'] = outline.get('xmlUrl') + feed['imageUrl'] = outline.get('imageUrl') + feed['html'] = outline.get('htmlUrl') + feeds.append(feed) + self.feeds = feeds + return feeds + + def add_feed(self, title, new_feed_url, type='rss', html=None, imageUrl=None): + """ + Adds a new feed to the OPML file. + + Args: + title (str): The title of the new feed. + new_feed_url (str): The URL of the new feed. + type (str, optional): The type of the new feed. Defaults to 'rss'. + html (str, optional): The HTML URL of the new feed. Defaults to None. + imageUrl (str, optional): The image URL of the new feed. Defaults to None. + """ + if not isinstance(title, str): + raise ValueError("Title must be a string.") + if not isinstance(new_feed_url, str): + raise ValueError("New feed URL must be a string.") + if not isinstance(type, str): + raise ValueError("Type must be a string.") + if html is not None and not isinstance(html, str): + raise ValueError("HTML URL must be a string.") + if imageUrl is not None and not isinstance(imageUrl, str): + raise ValueError("Image URL must be a string.") + + body = self.opml_root.find(".//body") + new_feed_attributes = {"text": title, "type": type, "xmlUrl": new_feed_url} + if html is not None: + new_feed_attributes["htmlUrl"] = html + if imageUrl is not None: + new_feed_attributes["imageUrl"] = imageUrl + new_feed = ET.Element("outline", new_feed_attributes) + body.append(new_feed) + + def remove_feed_by_url(self, feed_url): + """ + Removes a feed from the OPML file based on its URL. + + Args: + feed_url (str): The URL of the feed to be removed. + """ + body = self.opml_root.find(".//body") + for outline in body.findall(".//outline"): + if outline.get("xmlUrl") == feed_url: + body.remove(outline) + + def save_opml(self): + """ + Saves the changes made to the OPML file. + """ + tree = ET.ElementTree(self.opml_root) + tree.write(self.file_path) + + def get_image_url(self, feed_url): + """ + Returns the image URL for a feed based on its URL. + + Args: + feed_url (str): The URL of the feed. + + Returns: + str: The image URL of the feed. + """ + for outline in self.opml_root.findall(".//outline"): + if outline.get("xmlUrl") == feed_url: + return outline.get("imageUrl") + return None + + @staticmethod + def update_image(url, cache_directory): + """ + Updates the image for a feed by downloading it from the provided URL and saving it locally. + + Args: + url (str): The URL of the image. + cache_directory (str): The directory to cache the image. + """ + file_name = hashlib.sha256(str(url).encode()).hexdigest() + file_path = os.path.join(cache_directory, file_name) + cached_image = os.path.isfile(file_path) + if cached_image is not False: + print(cached_image) + else: + try: + response = requests.get(url, stream=True) + if response: + with open(file_path, 'wb') as out_file: + shutil.copyfileobj(response.raw, out_file) + else: + raise self.ImageDownloadError('Was not possible to cache the file') + except Exception as e: + raise self.CustomException(f"Error updating image: {e}") diff --git a/podcasts/tests.py b/podcasts/tests.py new file mode 100644 index 0000000..1415c71 --- /dev/null +++ b/podcasts/tests.py @@ -0,0 +1,221 @@ + +# Generated by CodiumAI + +import pytest +from opml import * + +class TestOPMLManager: + + # read_opml returns the root element of the OPML file + def test_read_opml_returns_root_element(self): + # Arrange + file_path = "test.opml" + with open(file_path, 'w') as f: + f.write('') + opml_manager = OPMLManager(file_path) + + # Act + root = opml_manager.read_opml() + + # Assert + assert isinstance(root, ET.Element) + + # extract_feeds returns a list of dictionaries containing feed information + def test_extract_feeds_returns_list_of_dictionaries(self): + # Arrange + file_path = "test.opml" + with open(file_path, 'w') as f: + f.write('\n\n \n Test OPML\n \n \n \n \n \n') + + opml_manager = OPMLManager(file_path) + + # Act + feeds = opml_manager.extract_feeds() + + # Assert + assert isinstance(feeds, list) + for feed in feeds: + assert isinstance(feed, dict) + + # add_feed adds a new feed to the OPML file + def test_add_feed_adds_new_feed_to_opml_file(self): + # Arrange + file_path = "test.opml" + opml_manager = OPMLManager(file_path) + title = "Test Feed" + new_feed_url = "https://example.com/feed" + + # Act + opml_manager.add_feed(title, new_feed_url) + feeds = opml_manager.extract_feeds() + + # Assert + assert any(feed['title'] == title and feed['url'] == new_feed_url for feed in feeds) + + # read_opml raises an OPMLParseError if the file cannot be parsed with a temporary invalid file + def test_read_opml_raises_opml_parse_error_if_file_cannot_be_parsed_with_temporary_invalid_file(self): + # Arrange + file_path = "temporary_invalid.opml" + with open(file_path, 'w') as f: + f.write("") + + # Act and Assert + with pytest.raises(OPMLManager.OPMLParseError): + opml_manager = OPMLManager(file_path) + + # Cleanup + os.remove(file_path) + + # add_feed raises a ValueError if any argument is not a string + def test_add_feed_raises_value_error_if_argument_not_string(self): + # Arrange + import tempfile + file_path = os.path.join(tempfile.gettempdir(), "test.opml") + with open(file_path, 'w') as f: + f.write('') + opml_manager = OPMLManager(file_path) + title = 123 + new_feed_url = "https://example.com/feed" + + # Act and Assert + with pytest.raises(ValueError): + opml_manager.add_feed(title, new_feed_url) + os.remove(file_path) + + # add_feed raises a TypeError if title or new_feed_url is not provided + def test_add_feed_raises_type_error_if_title_or_new_feed_url_not_provided(self): + # Arrange + file_path = "test.opml" + with open(file_path, 'w') as f: + f.write('') + opml_manager = OPMLManager(file_path) + + # Act and Assert + with pytest.raises(TypeError): + opml_manager.add_feed() + os.remove(file_path) + + # remove_feed_by_url removes a feed from the OPML file based on its URL + def test_remove_feed_by_url(self): + # Arrange + file_path = "test.opml" + with open(file_path, 'w') as f: + f.write('') + opml_manager = OPMLManager(file_path) + + # Act + opml_manager.remove_feed_by_url("https://feed1.com") + + # Assert + assert len(opml_manager.opml_root.findall(".//outline")) == 1 + assert opml_manager.opml_root.findall(".//outline")[0].get("xmlUrl") == "https://feed2.com" + assert opml_manager.opml_root.findall(".//outline")[0].get("imageUrl") == "https://image2.com" + assert opml_manager.opml_root.findall(".//outline")[0].get("htmlUrl") == "https://html2.com" + assert opml_manager.opml_root.findall(".//outline")[0].get("text") == "Feed 2" + assert opml_manager.opml_root.findall(".//outline")[0].get("type") == "rss" + + # save_opml saves the changes made to the OPML file + def test_save_opml_saves_changes(self): + # Arrange + file_path = "test.opml" + with open(file_path, 'w') as f: + f.write('') + opml_manager = OPMLManager(file_path) + opml_manager.add_feed("Test Feed", "https://example.com/rss", html="https://example.com", imageUrl="https://example.com/image.jpg") + + # Act + opml_manager.save_opml() + + # Assert + with open(file_path, 'r') as f: + content = f.read() + assert "Test Feed" in content and "https://example.com/rss" in content and "https://example.com" in content and "https://example.com/image.jpg" in content + + # get_image_url returns the image URL for a feed based on its URL + def test_get_image_url_returns_image_url(self): + # Arrange + file_path = "test.opml" + with open(file_path, 'w') as f: + f.write('') + opml_manager = OPMLManager(file_path) + + # Act + image_url = opml_manager.get_image_url("https://feed1.com") + + # Assert + assert image_url == "https://image1.com" + + # OPMLManager can be initialized with a non-existent file path + def test_initialized_with_nonexistent_file_path(self): + # Arrange + file_path = "nonexistent.opml" + with open(file_path, 'w') as f: + f.write('') + + # Act + opml_manager = OPMLManager(file_path) + + # Assert + assert isinstance(opml_manager, OPMLManager) + + # OPMLManager can be initialized with an empty OPML file + def test_initialized_with_empty_opml(self): + # Arrange + file_path = "empty.opml" + with open(file_path, 'w') as f: + f.write('') + + # Act + opml_manager = OPMLManager(file_path) + + # Assert + assert isinstance(opml_manager.opml_root, ET.Element) + + # OPMLManager can be initialized with an OPML file that contains no feeds + def test_initialized_with_empty_opml_file(self): + # Arrange + file_path = "empty.opml" + with open(file_path, 'w') as f: + f.write('') + + # Act + opml_manager = OPMLManager(file_path) + + # Assert + assert isinstance(opml_manager.opml_root, ET.Element) + + # add_feed can add a feed with only required arguments + def test_add_feed_with_only_required_arguments(self): + # Arrange + file_path = "test.opml" + with open(file_path, 'w') as f: + f.write('') + opml_manager = OPMLManager(file_path) + + # Act + opml_manager.add_feed("Test Feed", "https://example.com/feed") + + # Assert + assert len(opml_manager.opml_root.findall(".//outline")) == 1 + assert opml_manager.opml_root.findall(".//outline")[-1].get("text") == "Test Feed" + assert opml_manager.opml_root.findall(".//outline")[-1].get("type") == "rss" + assert opml_manager.opml_root.findall(".//outline")[-1].get("xmlUrl") == "https://example.com/feed" + assert opml_manager.opml_root.findall(".//outline")[-1].get("htmlUrl") is None + assert opml_manager.opml_root.findall(".//outline")[-1].get("imageUrl") is None + + # add_feed can add a feed with all optional arguments + def test_add_feed_with_all_optional_arguments(self): + # Arrange + file_path = "test.opml" + opml_manager = OPMLManager(file_path) + + # Act + opml_manager.add_feed("Test Feed", "https://example.com/feed", type="rss", html="https://example.com", imageUrl="https://example.com/image.jpg") + feeds = opml_manager.extract_feeds() + + # Assert + assert any(feed['title'] == "Test Feed" for feed in feeds) + assert any(feed['type'] == "rss" for feed in feeds) + assert any(feed['url'] == "https://example.com/feed" for feed in feeds) + assert any(feed['imageUrl'] == "https://example.com/image.jpg" for feed in feeds) + assert any(feed['html'] == "https://example.com" for feed in feeds) \ No newline at end of file