From e0350009ded3344964249867e219cb9586f5b5a3 Mon Sep 17 00:00:00 2001 From: Smacking Potato Date: Wed, 20 Nov 2024 00:51:13 +0000 Subject: [PATCH] [motherless] improve and tidy patterns --- gallery_dl/extractor/motherless.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/gallery_dl/extractor/motherless.py b/gallery_dl/extractor/motherless.py index d13122fa6a..1054e4482e 100644 --- a/gallery_dl/extractor/motherless.py +++ b/gallery_dl/extractor/motherless.py @@ -26,7 +26,7 @@ class MotherlessExtractor(Extractor): class MotherlessMediaExtractor(MotherlessExtractor): """Extractor for a single image/video from motherless.com""" - pattern = ROOT_URL_PATTERN + "/((?!GV|GI|G)[A-Z0-9]+)$" + pattern = ROOT_URL_PATTERN + "/(?!G)([A-Z0-9]+)$" example = "https://motherless.com/ABC123" directory_fmt = ("{category}",) @@ -107,8 +107,8 @@ def items(self): page = self.request(f"{self.root}/G{self.gallery_id}").text data = { - "gallery_id" : self.gallery_id, - "gallery_title": get_gallery_name_from_homepage(page), + "gallery_id" : self.gallery_id, + "gallery_title": get_gallery_name_from_homepage(page), "uploader": get_gallery_uploader(page), "count": get_gallery_image_count(page)} @@ -137,7 +137,7 @@ def items(self): self.gallery_id = re.match(self.pattern, self.url).group(1) page = self.request(f"{self.root}/G{self.gallery_id}").text data = { - "gallery_id" : self.gallery_id, + "gallery_id" : self.gallery_id, "gallery_title": get_gallery_name_from_homepage(page), "uploader": get_gallery_uploader(page), "count": get_gallery_video_count(page)} @@ -167,11 +167,11 @@ def items(self): self.gallery_id = re.match(self.pattern, self.url).group(1) page = self.request(f"{self.root}/G{self.gallery_id}").text data = { - "gallery_id" : self.gallery_id, - "gallery_title": get_gallery_name_from_homepage(page), + "gallery_id" : self.gallery_id, + "gallery_title": get_gallery_name_from_homepage(page), "uploader": get_gallery_uploader(page), "count": get_gallery_image_count(page) + get_gallery_video_count(page)} - + yield Message.Directory, data for id, url, extension, title, num in get_images(self): @@ -205,10 +205,10 @@ def get_images(extractor): page = extractor.request(f"{extractor.root}/GI{extractor.gallery_id}?page={n}").text page_count = 0 - for result in re.finditer(f' src="https:\/\/cdn5-thumbs\.motherlessmedia\.com\/thumbs\/([A-Z0-9]+?)\.(jpg|gif)"[\s\S]+?alt="(.+)"', page): + for result in re.finditer(f' src="https://cdn5-thumbs\.motherlessmedia\.com/thumbs/([A-Z0-9]+?)\.([a-zA-Z]+)"[\s\S]+?alt="(.+)"', page): id = result.group(1) - url = f"https://cdn5-images.motherlessmedia.com/images/{id}.jpg" extension = result.group(2) + url = f"https://cdn5-images.motherlessmedia.com/images/{id}.{extension}" title = result.group(3) page_count += 1 @@ -228,7 +228,7 @@ def get_videos(extractor): page = extractor.request(f"{extractor.root}/GV{extractor.gallery_id}?page={n}").text page_count = 0 - for result in re.finditer(f'thumbs\/([A-Z0-9]+?)-strip\.jpg" alt="(.+)"', page): + for result in re.finditer('thumbs/([A-Z0-9]+?)-strip\.jpg" alt="(.+)"', page): id = result.group(1) url = f"https://cdn5-videos.motherlessmedia.com/videos/{id}.mp4" title = result.group(2) @@ -274,7 +274,7 @@ def get_media_date(page_data): return (datetime.now(timezone.utc) - timedelta(days=days_ago)).replace(hour=0, minute=0, second=0, microsecond=0) def get_media_uploader(page_data): - username_html = re.search('class="username">\s+(.+[^\s])\s+<\/span>', page_data).group(1) + username_html = re.search('class="username">\s+(.+\S)\s+', page_data).group(1) return text.remove_html(username_html) def get_image_id(image_url):