-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add: adds opml podcast manipulation with tests
- Loading branch information
Showing
5 changed files
with
460 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -128,3 +128,5 @@ dmypy.json | |
# Pyre type checker | ||
.pyre/ | ||
|
||
# Podcasts Manager | ||
*.opml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
## Summary | ||
The `OPMLManager` class is a class for managing OPML (Outline Processor Markup Language) files. It provides methods for reading, extracting feeds, adding and removing feeds, saving changes, and updating feed images. | ||
|
||
## Example Usage | ||
```python | ||
# Create an instance of OPMLManager | ||
manager = OPMLManager('path/to/opml/file.opml') | ||
|
||
# Read the OPML file and get the root element | ||
opml_root = manager.read_opml() | ||
|
||
# Extract feeds from the OPML file | ||
feeds = manager.extract_feeds() | ||
|
||
# Add a new feed to the OPML file | ||
manager.add_feed('New Feed', 'https://example.com/feed.xml') | ||
|
||
# Remove a feed from the OPML file | ||
manager.remove_feed_by_url('https://example.com/feed.xml') | ||
|
||
# Save the changes made to the OPML file | ||
manager.save_opml() | ||
|
||
# Get the image URL for a feed | ||
image_url = manager.get_image_url('https://example.com/feed.xml') | ||
|
||
# Update the image for a feed | ||
manager.update_image('https://example.com/image.jpg') | ||
``` | ||
|
||
## Code Analysis | ||
### Main functionalities | ||
The main functionalities of the `OPMLManager` class are: | ||
- Reading an OPML file and getting the root element. | ||
- Extracting feeds from the OPML file and returning a list of dictionaries containing feed information. | ||
- Adding a new feed to the OPML file. | ||
- Removing a feed from the OPML file based on its URL. | ||
- Saving the changes made to the OPML file. | ||
- Getting the image URL for a feed based on its URL. | ||
- Updating the image for a feed by downloading it from a provided URL and saving it locally. | ||
___ | ||
### Methods | ||
- `__init__(self, file_path)`: Initializes the `OPMLManager` instance with the file path of the OPML file. | ||
- `read_opml(self)`: Reads the OPML file and returns the root element. | ||
- `extract_feeds(self)`: Extracts feeds from the OPML file and returns a list of dictionaries containing feed information. | ||
- `add_feed(self, title, new_feed_url, type='rss', html=None, imageUrl=None)`: Adds a new feed to the OPML file. | ||
- `remove_feed_by_url(self, feed_url)`: Removes a feed from the OPML file based on its URL. | ||
- `save_opml(self)`: Saves the changes made to the OPML file. | ||
- `get_image_url(self, feed_url)`: Returns the image URL for a feed based on its URL. | ||
- `update_image(url)`: Updates the image for a feed by downloading it from the provided URL and saving it locally. | ||
___ | ||
### Fields | ||
- `file_path (str)`: The file path of the OPML file. | ||
- `opml_root (Element)`: The root element of the OPML file. | ||
___ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
import os | ||
import os.path | ||
import shutil | ||
import xml.etree.ElementTree as ET | ||
import requests | ||
import hashlib | ||
|
||
""" | ||
OPMLManager class for managing OPML files. | ||
This class provides methods for reading, extracting, adding, removing, and saving feeds in an OPML file. | ||
It also includes a method for updating the image associated with a feed URL. | ||
Attributes: | ||
file_path (str): The path to the OPML file. | ||
opml_root (Element): The root element of the OPML file. | ||
Methods: | ||
__init__(file_path): Initializes the OPMLManager object with the specified file path. | ||
read_opml(): Reads the OPML file and returns the root element. | ||
extract_feeds(): Extracts the feeds from the OPML file and returns a list of dictionaries. | ||
add_feed(title, new_feed_url, type='rss', html=None, imageUrl=None): Adds a new feed to the OPML file. | ||
remove_feed_by_url(feed_url): Removes a feed from the OPML file based on its URL. | ||
save_opml(): Saves the changes made to the OPML file. | ||
get_image_url(feed_url): Returns the image URL associated with a feed URL. | ||
update_image(url): Updates the image associated with a feed URL by caching it locally. | ||
""" | ||
|
||
|
||
class OPMLManager: | ||
""" | ||
A class for managing OPML (Outline Processor Markup Language) files. | ||
Attributes: | ||
file_path (str): The file path of the OPML file. | ||
opml_root (Element): The root element of the OPML file. | ||
""" | ||
|
||
class OPMLParseError(Exception): | ||
pass | ||
|
||
class CustomException(Exception): | ||
pass | ||
|
||
class ImageDownloadError(Exception): | ||
pass | ||
|
||
def __init__(self, file_path): | ||
""" | ||
Initializes the OPMLManager instance with the file path of the OPML file. | ||
Args: | ||
file_path (str): The file path of the OPML file. | ||
""" | ||
self.file_path = file_path | ||
self.opml_root = self.read_opml() | ||
|
||
def read_opml(self): | ||
""" | ||
Reads the OPML file and returns the root element. | ||
Returns: | ||
Element: The root element of the OPML file. | ||
""" | ||
try: | ||
tree = ET.parse(self.file_path) | ||
root = tree.getroot() | ||
return root | ||
except ET.ParseError as e: | ||
raise self.OPMLParseError(f"Error parsing OPML file: {e}") | ||
|
||
def extract_feeds(self): | ||
""" | ||
Extracts feeds from the OPML file and returns a list of dictionaries containing feed information. | ||
Returns: | ||
list: A list of dictionaries containing feed information. | ||
""" | ||
feeds = [] | ||
for outline in self.opml_root.findall(".//outline"): | ||
feed = {} | ||
feed['title'] = outline.get('text') | ||
feed['type'] = outline.get('type') | ||
feed['url'] = outline.get('xmlUrl') | ||
feed['imageUrl'] = outline.get('imageUrl') | ||
feed['html'] = outline.get('htmlUrl') | ||
feeds.append(feed) | ||
self.feeds = feeds | ||
return feeds | ||
|
||
def add_feed(self, title, new_feed_url, type='rss', html=None, imageUrl=None): | ||
""" | ||
Adds a new feed to the OPML file. | ||
Args: | ||
title (str): The title of the new feed. | ||
new_feed_url (str): The URL of the new feed. | ||
type (str, optional): The type of the new feed. Defaults to 'rss'. | ||
html (str, optional): The HTML URL of the new feed. Defaults to None. | ||
imageUrl (str, optional): The image URL of the new feed. Defaults to None. | ||
""" | ||
if not isinstance(title, str): | ||
raise ValueError("Title must be a string.") | ||
if not isinstance(new_feed_url, str): | ||
raise ValueError("New feed URL must be a string.") | ||
if not isinstance(type, str): | ||
raise ValueError("Type must be a string.") | ||
if html is not None and not isinstance(html, str): | ||
raise ValueError("HTML URL must be a string.") | ||
if imageUrl is not None and not isinstance(imageUrl, str): | ||
raise ValueError("Image URL must be a string.") | ||
|
||
body = self.opml_root.find(".//body") | ||
new_feed_attributes = {"text": title, "type": type, "xmlUrl": new_feed_url} | ||
if html is not None: | ||
new_feed_attributes["htmlUrl"] = html | ||
if imageUrl is not None: | ||
new_feed_attributes["imageUrl"] = imageUrl | ||
new_feed = ET.Element("outline", new_feed_attributes) | ||
body.append(new_feed) | ||
|
||
def remove_feed_by_url(self, feed_url): | ||
""" | ||
Removes a feed from the OPML file based on its URL. | ||
Args: | ||
feed_url (str): The URL of the feed to be removed. | ||
""" | ||
body = self.opml_root.find(".//body") | ||
for outline in body.findall(".//outline"): | ||
if outline.get("xmlUrl") == feed_url: | ||
body.remove(outline) | ||
|
||
def save_opml(self): | ||
""" | ||
Saves the changes made to the OPML file. | ||
""" | ||
tree = ET.ElementTree(self.opml_root) | ||
tree.write(self.file_path) | ||
|
||
def get_image_url(self, feed_url): | ||
""" | ||
Returns the image URL for a feed based on its URL. | ||
Args: | ||
feed_url (str): The URL of the feed. | ||
Returns: | ||
str: The image URL of the feed. | ||
""" | ||
for outline in self.opml_root.findall(".//outline"): | ||
if outline.get("xmlUrl") == feed_url: | ||
return outline.get("imageUrl") | ||
return None | ||
|
||
@staticmethod | ||
def update_image(url, cache_directory): | ||
""" | ||
Updates the image for a feed by downloading it from the provided URL and saving it locally. | ||
Args: | ||
url (str): The URL of the image. | ||
cache_directory (str): The directory to cache the image. | ||
""" | ||
file_name = hashlib.sha256(str(url).encode()).hexdigest() | ||
file_path = os.path.join(cache_directory, file_name) | ||
cached_image = os.path.isfile(file_path) | ||
if cached_image is not False: | ||
print(cached_image) | ||
else: | ||
try: | ||
response = requests.get(url, stream=True) | ||
if response: | ||
with open(file_path, 'wb') as out_file: | ||
shutil.copyfileobj(response.raw, out_file) | ||
else: | ||
raise self.ImageDownloadError('Was not possible to cache the file') | ||
except Exception as e: | ||
raise self.CustomException(f"Error updating image: {e}") |
Oops, something went wrong.