From df14d1936175a36e4e4c73b61ba3185d866b95e2 Mon Sep 17 00:00:00 2001 From: Saurabh Shrivastava Date: Fri, 9 Dec 2016 20:08:27 +0530 Subject: [PATCH 1/9] Picking name from property meta tag. The property meta tag contains name of the video This fix extracts that and clips the string to obtain title of the video. (Probable fix for : https://github.com/abhishek-vinjamoori/SubtitleExtractor/issues/5) --- netflix.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/netflix.py b/netflix.py index cf0455d..31c0158 100644 --- a/netflix.py +++ b/netflix.py @@ -129,7 +129,18 @@ def getTitle(self): s = int("deliberateError") except: - self.title = "Netflixsubtitles" + + try: + # + s = self.soupObject.find("meta", attrs={"property": "og:title"}) + self.title = str(s['content']) + self.title = self.title[6:] #slicing "Watch " + self.title = self.title[:-17] #slicing " Online | Netflix" + if not self.title: + s = int("deliberateError") + + except: + self.title = "Netflixsubtitles" pass From 0fc10c8b9eb410da045d4c4638934cabc1c7327f Mon Sep 17 00:00:00 2001 From: Saurabh Shrivastava Date: Fri, 9 Dec 2016 20:21:22 +0530 Subject: [PATCH 2/9] Update netflix.py --- netflix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/netflix.py b/netflix.py index 31c0158..3d2fa4d 100644 --- a/netflix.py +++ b/netflix.py @@ -137,7 +137,7 @@ def getTitle(self): self.title = self.title[6:] #slicing "Watch " self.title = self.title[:-17] #slicing " Online | Netflix" if not self.title: - s = int("deliberateError") + s = int("deliberateError") except: self.title = "Netflixsubtitles" From 3ceb11418295f9bdc17c1e685b647bf420e28e03 Mon Sep 17 00:00:00 2001 From: Saurabh Shrivastava Date: Fri, 9 Dec 2016 20:23:18 +0530 Subject: [PATCH 3/9] Picking name from property meta tag. The property meta tag contains name of the video This fix extracts that and clips the string to obtain title of the video. (Probable fix for : #5) --- netflix.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/netflix.py b/netflix.py index 3d2fa4d..153e1f2 100644 --- a/netflix.py +++ b/netflix.py @@ -32,7 +32,7 @@ def getSubtitles(self): The main function which uses helper functions to get the subtitles """ - # self.createSoupObject() + # self.createSoupObject() check = self.loginNetflix() self.title = "NetflixCaptions" @@ -137,7 +137,7 @@ def getTitle(self): self.title = self.title[6:] #slicing "Watch " self.title = self.title[:-17] #slicing " Online | Netflix" if not self.title: - s = int("deliberateError") + s = int("deliberateError") #corrected indentation except: self.title = "Netflixsubtitles" From 7bc04867127df3e8c331133aaa4b93910731b626 Mon Sep 17 00:00:00 2001 From: Saurabh Shrivastava Date: Fri, 9 Dec 2016 22:26:50 +0530 Subject: [PATCH 4/9] Added alternate naming option plus slicing. In case twitter tag fails, added og:title as backup. Sliced words -"Watch " and " Amazon Video" for better naming. --- newamazon.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/newamazon.py b/newamazon.py index bd20938..e35575b 100644 --- a/newamazon.py +++ b/newamazon.py @@ -335,7 +335,11 @@ def getTitle(self): """ This function returns the title of the video. This is also used for naming the file. - --> Extracting the value from here + --> Extracting the value from here + + OR + + --> Extracting the value from here """ @@ -344,13 +348,27 @@ def getTitle(self): s = self.soupObject.find("meta", attrs={"name": "twitter:title"}) self.title = str(s['content']) self.title = self.title.replace("/", "") + self.title = self.title[6:] #slicing "Watch " + self.title = self.title[:-16] #slicing " - Amazon Video" self.title = self.title.strip() if not self.title: s = int("deliberateError") # except except: - self.title = "Amazonsubtitles" + + try: + s = self.soupObject.find("meta", attrs={"property": "og:title"}) + self.title = str(s['content']) + self.title = self.title.replace("/", "") + self.title = self.title[6:] #slicing "Watch " + self.title = self.title[:-16] #slicing " - Amazon Video" + self.title = self.title.strip() + if not self.title: + s = int("deliberateError") + + except: + self.title = "Amazonsubtitles" pass @@ -388,4 +406,4 @@ def standardFunctionCalls(self): self.deleteUnnecessaryfiles() return returnValue - pass \ No newline at end of file + pass From fbb8c183f7adb989d7ee15c08f474715052d199f Mon Sep 17 00:00:00 2001 From: Saurabh Shrivastava Date: Wed, 21 Dec 2016 23:46:24 +0530 Subject: [PATCH 5/9] Better Naming Handling. Checks and replaces "Watch" "Online | Netflix" only if present. --- netflix.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/netflix.py b/netflix.py index 153e1f2..a3861a0 100644 --- a/netflix.py +++ b/netflix.py @@ -125,6 +125,10 @@ def getTitle(self): s = self.soupObject.find("meta", attrs={"name": "twitter:title"}) self.title = str(s['content']) self.title = self.title.strip() + temp_title = self.title.split() + if temp_title[0]=='Watch' or temp[0] == "watch": + self.title = ' '.join(self.title.split()[1:]) + self.title = self.title.replace(" Online | Netflix","") if not self.title: s = int("deliberateError") @@ -134,8 +138,11 @@ def getTitle(self): # s = self.soupObject.find("meta", attrs={"property": "og:title"}) self.title = str(s['content']) - self.title = self.title[6:] #slicing "Watch " - self.title = self.title[:-17] #slicing " Online | Netflix" + self.title = self.title.strip() + temp_title = self.title.split() + if temp_title[0]=='Watch' or temp[0] == "watch": + self.title = ' '.join(self.title.split()[1:]) + self.title = self.title.replace(" Online | Netflix","") if not self.title: s = int("deliberateError") #corrected indentation From 6846f760e32962f65774d25d686630e8ca3fc6a0 Mon Sep 17 00:00:00 2001 From: Saurabh Shrivastava Date: Wed, 21 Dec 2016 23:54:37 +0530 Subject: [PATCH 6/9] Better Naming Handling. Replaces "Watch" and "- Amazon Video" only if present. --- newamazon.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/newamazon.py b/newamazon.py index e35575b..d916c76 100644 --- a/newamazon.py +++ b/newamazon.py @@ -348,8 +348,10 @@ def getTitle(self): s = self.soupObject.find("meta", attrs={"name": "twitter:title"}) self.title = str(s['content']) self.title = self.title.replace("/", "") - self.title = self.title[6:] #slicing "Watch " - self.title = self.title[:-16] #slicing " - Amazon Video" + temp_title = self.title.split() + if temp_title[0]=='Watch' or temp[0] == "watch": + self.title = ' '.join(self.title.split()[1:]) + self.title = self.title.replace("- Amazon Video","") self.title = self.title.strip() if not self.title: s = int("deliberateError") @@ -361,8 +363,10 @@ def getTitle(self): s = self.soupObject.find("meta", attrs={"property": "og:title"}) self.title = str(s['content']) self.title = self.title.replace("/", "") - self.title = self.title[6:] #slicing "Watch " - self.title = self.title[:-16] #slicing " - Amazon Video" + temp_title = self.title.split() + if temp_title[0]=='Watch' or temp[0] == "watch": + self.title = ' '.join(self.title.split()[1:]) + self.title = self.title.replace("- Amazon Video","") self.title = self.title.strip() if not self.title: s = int("deliberateError") From 4184534a26437a56a47f556939e73bb5a9cfa3de Mon Sep 17 00:00:00 2001 From: Saurabh Shrivastava Date: Sat, 24 Dec 2016 13:15:16 +0530 Subject: [PATCH 7/9] Corrected Indentation --- newamazon.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/newamazon.py b/newamazon.py index d916c76..210c168 100644 --- a/newamazon.py +++ b/newamazon.py @@ -24,7 +24,7 @@ def __init__(self, url, testMode): self.requestsFileName = "iDoNotExistDefinitelyOnThisComputerFolder.html" self.videoType = "" - # Parameters requireed for Obtaining the URL + # Parameters requireed for Obtaining the URLittititi self.parametersDict = { "PreURL": "https://atv-ps.amazon.com/cdp/catalog/GetPlaybackResources?", "asin": "", @@ -360,16 +360,16 @@ def getTitle(self): except: try: - s = self.soupObject.find("meta", attrs={"property": "og:title"}) - self.title = str(s['content']) - self.title = self.title.replace("/", "") - temp_title = self.title.split() - if temp_title[0]=='Watch' or temp[0] == "watch": - self.title = ' '.join(self.title.split()[1:]) - self.title = self.title.replace("- Amazon Video","") - self.title = self.title.strip() - if not self.title: - s = int("deliberateError") + s = self.soupObject.find("meta", attrs={"property": "og:title"}) + self.title = str(s['content']) + self.title = self.title.replace("/", "") + temp_title = self.title.split() + if temp_title[0]=='Watch' or temp[0] == "watch": + self.title = ' '.join(self.title.split()[1:]) + self.title = self.title.replace("- Amazon Video","") + self.title = self.title.strip() + if not self.title: + s = int("deliberateError") except: self.title = "Amazonsubtitles" From c61ef93cb82f783901bda2079098acdd42020522 Mon Sep 17 00:00:00 2001 From: Saurabh Shrivastava Date: Sat, 24 Dec 2016 13:45:33 +0530 Subject: [PATCH 8/9] Cleaning up the file. --- newamazon.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/newamazon.py b/newamazon.py index 210c168..0463053 100644 --- a/newamazon.py +++ b/newamazon.py @@ -24,7 +24,7 @@ def __init__(self, url, testMode): self.requestsFileName = "iDoNotExistDefinitelyOnThisComputerFolder.html" self.videoType = "" - # Parameters requireed for Obtaining the URLittititi + # Parameters requireed for Obtaining the URL self.parametersDict = { "PreURL": "https://atv-ps.amazon.com/cdp/catalog/GetPlaybackResources?", "asin": "", @@ -336,9 +336,7 @@ def getTitle(self): This function returns the title of the video. This is also used for naming the file. --> Extracting the value from here - OR - --> Extracting the value from here """ From 36ad833a1f586aedbda60561b1355e35e830e1d9 Mon Sep 17 00:00:00 2001 From: Saurabh Shrivastava Date: Sat, 24 Dec 2016 13:47:32 +0530 Subject: [PATCH 9/9] Removing extra whitespaces. --- netflix.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/netflix.py b/netflix.py index a3861a0..866765a 100644 --- a/netflix.py +++ b/netflix.py @@ -32,7 +32,7 @@ def getSubtitles(self): The main function which uses helper functions to get the subtitles """ - # self.createSoupObject() + # self.createSoupObject() check = self.loginNetflix() self.title = "NetflixCaptions" @@ -144,7 +144,7 @@ def getTitle(self): self.title = ' '.join(self.title.split()[1:]) self.title = self.title.replace(" Online | Netflix","") if not self.title: - s = int("deliberateError") #corrected indentation + s = int("deliberateError") except: self.title = "Netflixsubtitles"