Skip to content

Commit

Permalink
[motherless] improve and tidy patterns
Browse files Browse the repository at this point in the history
  • Loading branch information
smackingpotato committed Nov 20, 2024
1 parent 9a792ab commit e035000
Showing 1 changed file with 11 additions and 11 deletions.
22 changes: 11 additions & 11 deletions gallery_dl/extractor/motherless.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class MotherlessExtractor(Extractor):
class MotherlessMediaExtractor(MotherlessExtractor):
"""Extractor for a single image/video from motherless.com"""

pattern = ROOT_URL_PATTERN + "/((?!GV|GI|G)[A-Z0-9]+)$"
pattern = ROOT_URL_PATTERN + "/(?!G)([A-Z0-9]+)$"
example = "https://motherless.com/ABC123"
directory_fmt = ("{category}",)

Expand Down Expand Up @@ -107,8 +107,8 @@ def items(self):

page = self.request(f"{self.root}/G{self.gallery_id}").text
data = {
"gallery_id" : self.gallery_id,
"gallery_title": get_gallery_name_from_homepage(page),
"gallery_id" : self.gallery_id,
"gallery_title": get_gallery_name_from_homepage(page),
"uploader": get_gallery_uploader(page),
"count": get_gallery_image_count(page)}

Expand Down Expand Up @@ -137,7 +137,7 @@ def items(self):
self.gallery_id = re.match(self.pattern, self.url).group(1)
page = self.request(f"{self.root}/G{self.gallery_id}").text
data = {
"gallery_id" : self.gallery_id,
"gallery_id" : self.gallery_id,
"gallery_title": get_gallery_name_from_homepage(page),
"uploader": get_gallery_uploader(page),
"count": get_gallery_video_count(page)}
Expand Down Expand Up @@ -167,11 +167,11 @@ def items(self):
self.gallery_id = re.match(self.pattern, self.url).group(1)
page = self.request(f"{self.root}/G{self.gallery_id}").text
data = {
"gallery_id" : self.gallery_id,
"gallery_title": get_gallery_name_from_homepage(page),
"gallery_id" : self.gallery_id,
"gallery_title": get_gallery_name_from_homepage(page),
"uploader": get_gallery_uploader(page),
"count": get_gallery_image_count(page) + get_gallery_video_count(page)}

yield Message.Directory, data

for id, url, extension, title, num in get_images(self):
Expand Down Expand Up @@ -205,10 +205,10 @@ def get_images(extractor):
page = extractor.request(f"{extractor.root}/GI{extractor.gallery_id}?page={n}").text
page_count = 0

for result in re.finditer(f' src="https:\/\/cdn5-thumbs\.motherlessmedia\.com\/thumbs\/([A-Z0-9]+?)\.(jpg|gif)"[\s\S]+?alt="(.+)"', page):
for result in re.finditer(f' src="https://cdn5-thumbs\.motherlessmedia\.com/thumbs/([A-Z0-9]+?)\.([a-zA-Z]+)"[\s\S]+?alt="(.+)"', page):
id = result.group(1)
url = f"https://cdn5-images.motherlessmedia.com/images/{id}.jpg"
extension = result.group(2)
url = f"https://cdn5-images.motherlessmedia.com/images/{id}.{extension}"
title = result.group(3)
page_count += 1

Expand All @@ -228,7 +228,7 @@ def get_videos(extractor):
page = extractor.request(f"{extractor.root}/GV{extractor.gallery_id}?page={n}").text
page_count = 0

for result in re.finditer(f'thumbs\/([A-Z0-9]+?)-strip\.jpg" alt="(.+)"', page):
for result in re.finditer('thumbs/([A-Z0-9]+?)-strip\.jpg" alt="(.+)"', page):
id = result.group(1)
url = f"https://cdn5-videos.motherlessmedia.com/videos/{id}.mp4"
title = result.group(2)
Expand Down Expand Up @@ -274,7 +274,7 @@ def get_media_date(page_data):
return (datetime.now(timezone.utc) - timedelta(days=days_ago)).replace(hour=0, minute=0, second=0, microsecond=0)

def get_media_uploader(page_data):
username_html = re.search('class="username">\s+(.+[^\s])\s+<\/span>', page_data).group(1)
username_html = re.search('class="username">\s+(.+\S)\s+</span>', page_data).group(1)
return text.remove_html(username_html)

def get_image_id(image_url):
Expand Down

0 comments on commit e035000

Please sign in to comment.