From 06b5f38aaed047ace0f5d65a550172685eb04eb5 Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Sat, 19 Sep 2020 01:12:42 +0200 Subject: [PATCH 01/39] Update naming in `--help` Argument for `--history` is now called "FILE" --- vo-scraper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vo-scraper.py b/vo-scraper.py index 79da956..9557f7e 100755 --- a/vo-scraper.py +++ b/vo-scraper.py @@ -704,6 +704,7 @@ def setup_arg_parser(): ) parser.add_argument( "-hs", "--history", + metavar="FILE", help="A file to which the scraper saves the IDs of downloaded videos to. The scraper will skip downloads if the corresponding ID exists in the specified file." ) parser.add_argument( From b61af623b8bc62ef88bc23cafc50f5978aeabb3c Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Sat, 19 Sep 2020 02:10:40 +0200 Subject: [PATCH 02/39] Add option to only download latest video Use via `--latest` flag. Get's overwritten by `--all`. --- vo-scraper.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/vo-scraper.py b/vo-scraper.py index 9557f7e..e6cc8c4 100755 --- a/vo-scraper.py +++ b/vo-scraper.py @@ -75,6 +75,7 @@ # Boolean flags download_all = False +download_latest = False verbose = False print_src = False @@ -311,6 +312,7 @@ def vo_scrapper(vo_link, user, passw): """ global user_agent global download_all + global download_latest global video_quality global quality_dict @@ -341,6 +343,9 @@ def vo_scrapper(vo_link, user, passw): if download_all: # Add all available videos to the selected choice = list(range(len(vo_json_data['episodes']))) + elif download_latest: + # Only add newest video to the selected + choice = [0] else: # Let user pick videos try: @@ -634,6 +639,7 @@ def apply_args(args): global verbose global download_all + global download_latest global video_quality global print_src global file_to_print_src_to @@ -651,6 +657,7 @@ def apply_args(args): # Set global variable according to input download_all = args.all + download_latest = args.latest video_quality = args.quality # Check for printing flag @@ -725,6 +732,11 @@ def setup_arg_parser(): action="store_true", help="Skip checking whether there's a connection to video.ethz.ch or the internet in general." ) + parser.add_argument( + "--latest", + action="store_true", + help="Only downloads the latest video from each passed lecture." + ) parser.add_argument( "-su", "--skip-update-check", action="store_true", From 6dd1a73cb86310683377873f2ad97aad1317cca3 Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Sat, 19 Sep 2020 13:15:34 +0200 Subject: [PATCH 03/39] Add CI configuration Tests for: - Syntax Errors - Same version number in Python and VERSION file - Downloading an unprotected video --- .gitlab-ci.yml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .gitlab-ci.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..960103f --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,29 @@ +image: python:3.8-slim + +stages: + - pre-test + - tests + +python-compile-test: + stage: pre-test + script: + # Check whether script is syntax error free + - python3 -m py_compile vo-scraper.py + +ensure-same-version: + stage: pre-test + script: + # Ensure verion numbers in `VERSION` and `vo-scraper.py` match + - grep -q $(sed -n "s/^.*program_version = '\(.*\)'$/\1/p" vo-scraper.py) VERSION + +# Download unprotected video +unprotected-recording: + stage: tests + needs: [python-compile-test] + script: + # Install dependency + - pip3 install requests + # Download video + - python3 vo-scraper.py --quality low --latest https://video.ethz.ch/lectures/d-infk/2020/spring/252-0028-00L.html + # Compare checksums + - echo $(sha1sum Lecture\ Recordings/Digital\ Design\ and\ Computer\ Architecture/2020-03-12_low-3ebf562d.mp4) | grep -q f80bcc1c215cebf64a4da7f9623406fb1309e512 From 01dfcc4d759f7f975248ed94e47426da612db490 Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Sat, 19 Sep 2020 13:31:59 +0200 Subject: [PATCH 04/39] Test download for lectures protected with PWD --- .gitlab-ci.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 960103f..fbff38e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -27,3 +27,15 @@ unprotected-recording: - python3 vo-scraper.py --quality low --latest https://video.ethz.ch/lectures/d-infk/2020/spring/252-0028-00L.html # Compare checksums - echo $(sha1sum Lecture\ Recordings/Digital\ Design\ and\ Computer\ Architecture/2020-03-12_low-3ebf562d.mp4) | grep -q f80bcc1c215cebf64a4da7f9623406fb1309e512 + +# Download 'PWD' protected video +pwd-protected-recording: + stage: tests + needs: [python-compile-test] + script: + # Install dependency + - pip3 install requests + # Download video + - python3 vo-scraper.py --quality low --latest --file $PWD_LINK_FILE + # Compare checksums + - echo $(sha1sum Lecture\ Recordings/Introduction\ to\ Machine\ Learning/2020-05-27\ -\ Tutorial_low-1898f0cc.mp4) | grep -q dce9f9aeb00693b6dbce49b113c10d2f84a29b70 From a964cff2c1898ebed42efff90dae926575165cf6 Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Sat, 19 Sep 2020 13:37:32 +0200 Subject: [PATCH 05/39] Test download for lectures protected with `ETH` --- .gitlab-ci.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index fbff38e..d7cf8fa 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -39,3 +39,15 @@ pwd-protected-recording: - python3 vo-scraper.py --quality low --latest --file $PWD_LINK_FILE # Compare checksums - echo $(sha1sum Lecture\ Recordings/Introduction\ to\ Machine\ Learning/2020-05-27\ -\ Tutorial_low-1898f0cc.mp4) | grep -q dce9f9aeb00693b6dbce49b113c10d2f84a29b70 + +# Download 'ETH' protected video +eth-protected-recording: + stage: tests + needs: [python-compile-test] + script: + # Install dependency + - pip3 install requests + # Download video + - python3 vo-scraper.py --quality low --latest --file $ETH_LINK_FILE + # Compare checksums + - echo $(sha1sum Lecture\ Recordings/Advanced\ Systems\ Lab/2020-03-19_low-fd29952f.mp4) | grep -q efd4a1779a29da08c0186ed6121fc10bfa7e8e83 From f37578ae25342c1ed639a04426d57ba56d17f4cd Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Fri, 25 Sep 2020 22:05:42 +0200 Subject: [PATCH 06/39] Add emojis Makes the readme look a bit more colourful --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3ea0d84..36ff628 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# vo-scraper +# vo-scraper 🎓🎥 A python script for ETH students to download lecture videos from [video.ethz.ch](https://video.ethz.ch/). From 36a7db165bb18fa5dfab8f3f61dd0d62219ccbc5 Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Fri, 25 Sep 2020 22:28:39 +0200 Subject: [PATCH 07/39] Put parameters in alphabetical order `--latest` was at wrong position --- vo-scraper.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vo-scraper.py b/vo-scraper.py index e6cc8c4..49cb02a 100755 --- a/vo-scraper.py +++ b/vo-scraper.py @@ -714,6 +714,11 @@ def setup_arg_parser(): metavar="FILE", help="A file to which the scraper saves the IDs of downloaded videos to. The scraper will skip downloads if the corresponding ID exists in the specified file." ) + parser.add_argument( + "--latest", + action="store_true", + help="Only downloads the latest video from each passed lecture." + ) parser.add_argument( "-p", "--print-source", metavar="FILE", @@ -732,11 +737,6 @@ def setup_arg_parser(): action="store_true", help="Skip checking whether there's a connection to video.ethz.ch or the internet in general." ) - parser.add_argument( - "--latest", - action="store_true", - help="Only downloads the latest video from each passed lecture." - ) parser.add_argument( "-su", "--skip-update-check", action="store_true", @@ -784,7 +784,7 @@ def remove_illegal_characters(str): # # =============================================================== -if __name__ == '__main__': +if __name__ == '__main__': # Setup parser parser = setup_arg_parser() args = parser.parse_args() From 399984545316d6639db573c2320c2b514bbaa4d4 Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Fri, 25 Sep 2020 22:29:02 +0200 Subject: [PATCH 08/39] Trim trailing whitespaces --- vo-scraper.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vo-scraper.py b/vo-scraper.py index 49cb02a..0af27d4 100755 --- a/vo-scraper.py +++ b/vo-scraper.py @@ -249,14 +249,14 @@ def make_range(item, max_episode_number): Keyword arguments: item -- a string in the form of 'x..z' or 'x..y..z' max_episode_number -- The highest episode number to have an upperbound for the range of episodes - + Returns: A range from x to z, with step size y, 1 if y wasn't provided """ if len(item.split('..')) == 2: # user passed something like 'x..z', so step size is 1 lower_bound, upper_bound = item.split('..') - step = 1 + step = 1 else: # user passed something like 'x..y..z', so step size is y lower_bound, step, upper_bound = item.split('..') @@ -274,7 +274,7 @@ def get_user_choice(max_episode_number): Keyword arguments: max_episode_number -- The highest episode number to have an upperbound for the range of episodes - + Returns: A list containg the user picked choices """ @@ -288,9 +288,9 @@ def get_user_choice(max_episode_number): choice.append(int(elem)) else: choice += make_range(elem, max_episode_number) - + # make elements of `choice` unique - choice = set(choice) + choice = set(choice) # sort them, to download in order and not randomly choice = sorted(choice) @@ -784,7 +784,7 @@ def remove_illegal_characters(str): # # =============================================================== -if __name__ == '__main__': +if __name__ == '__main__': # Setup parser parser = setup_arg_parser() args = parser.parse_args() From cebb7bac7a6e9b3bbc1db1d15f028d1ea92e8d56 Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Fri, 25 Sep 2020 23:08:38 +0200 Subject: [PATCH 09/39] Perform verbosity check as early as possible --- vo-scraper.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/vo-scraper.py b/vo-scraper.py index 0af27d4..d1da46a 100755 --- a/vo-scraper.py +++ b/vo-scraper.py @@ -637,7 +637,6 @@ def apply_args(args): - history """ - global verbose global download_all global download_latest global video_quality @@ -646,10 +645,6 @@ def apply_args(args): global directory_prefix global history_file - # Enable verbose for debugging - verbose = args.verbose - print_information("Verbose enabled", verbose_only=True) - # Check if user wants to submit bug report and exit if(args.bug == True): print_information("If you found a bug you can raise an issue here: ") @@ -789,6 +784,10 @@ def remove_illegal_characters(str): parser = setup_arg_parser() args = parser.parse_args() + # Enable verbose for debugging + verbose = args.verbose + print_information("Verbose enabled", verbose_only=True) + # Check for version flag if args.version: print_information(program_version) From 2e8fd12930a92a6d6315a0774a3d77e8e1fe6e7d Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Fri, 25 Sep 2020 23:11:10 +0200 Subject: [PATCH 10/39] Add support for parameter file This file can contain any parameters accepted by the scraper. Useful if you don't want to type out all the parameters everytime you perform a download. --- vo-scraper.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/vo-scraper.py b/vo-scraper.py index d1da46a..e714647 100755 --- a/vo-scraper.py +++ b/vo-scraper.py @@ -82,6 +82,7 @@ # Location of text files file_to_print_src_to = "" history_file = "" +PARAMETER_FILE = "parameters.txt" quality_dict = { 'high' : 0, @@ -714,6 +715,11 @@ def setup_arg_parser(): action="store_true", help="Only downloads the latest video from each passed lecture." ) + parser.add_argument( + "--parameter-file", + metavar="FILE", + help="Pass the name of the file to read parameters from. If the flag is not set parser will try to read parameters from `parameters.txt`" + ) parser.add_argument( "-p", "--print-source", metavar="FILE", @@ -793,6 +799,25 @@ def remove_illegal_characters(str): print_information(program_version) sys.exit() + # If a parameter file was passed, use that instead of default + if args.parameter_file: + PARAMETER_FILE = args.parameter_file + # Read parameters if file exists + if os.path.isfile(PARAMETER_FILE): + with open(PARAMETER_FILE) as f: + # Read file and remove trailing whitespaces and newlines + parameters = [x.strip() for x in f.readlines()] + # Split strings with spaces + parameters = [words for segments in parameters for words in segments.split()] + # Add parameters list + sys.argv += parameters + # Parse args again as we might have added some + args = parser.parse_args() + else: + # Print when no parameter file was found + # If no `--parameter-file` was passsed, this only prints when verbosity is turned on + print_information("No parameter file found at location: "+PARAMETER_FILE, verbose_only=not bool(args.parameter_file), type='warning') + # Apply commands from input apply_args(args) From 5c0f4ac285b9938bb94612108a51f6a9f9bc28d0 Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Fri, 25 Sep 2020 23:32:09 +0200 Subject: [PATCH 11/39] Rename test stage --- .gitlab-ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d7cf8fa..a6add4e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -2,7 +2,7 @@ image: python:3.8-slim stages: - pre-test - - tests + - basic-download-test python-compile-test: stage: pre-test @@ -18,7 +18,7 @@ ensure-same-version: # Download unprotected video unprotected-recording: - stage: tests + stage: basic-download-test needs: [python-compile-test] script: # Install dependency @@ -30,7 +30,7 @@ unprotected-recording: # Download 'PWD' protected video pwd-protected-recording: - stage: tests + stage: basic-download-test needs: [python-compile-test] script: # Install dependency @@ -42,7 +42,7 @@ pwd-protected-recording: # Download 'ETH' protected video eth-protected-recording: - stage: tests + stage: basic-download-test needs: [python-compile-test] script: # Install dependency From 0dfacb0171acfc4311ef3cbcc57349d9072f4de2 Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Fri, 25 Sep 2020 23:32:46 +0200 Subject: [PATCH 12/39] Add test for parameter file with default name --- .gitlab-ci.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a6add4e..ab08faf 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,6 +3,7 @@ image: python:3.8-slim stages: - pre-test - basic-download-test + - feature-test python-compile-test: stage: pre-test @@ -51,3 +52,17 @@ eth-protected-recording: - python3 vo-scraper.py --quality low --latest --file $ETH_LINK_FILE # Compare checksums - echo $(sha1sum Lecture\ Recordings/Advanced\ Systems\ Lab/2020-03-19_low-fd29952f.mp4) | grep -q efd4a1779a29da08c0186ed6121fc10bfa7e8e83 + +# Test parameter file +default-parameter-file: + stage: feature-test + needs: [unprotected-recording] + script: + # Install dependency + - pip3 install requests + # Add parameter file + - printf -- "--quality low\n--latest\n" > parameters.txt + # Download video + - python3 vo-scraper.py https://video.ethz.ch/lectures/d-infk/2020/spring/252-0028-00L.html + # Compare checksums + - echo $(sha1sum Lecture\ Recordings/Digital\ Design\ and\ Computer\ Architecture/2020-03-12_low-3ebf562d.mp4) | grep -q f80bcc1c215cebf64a4da7f9623406fb1309e512 From 8ee8d4dcce87d2052737d75a9917233cab7951ba Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Fri, 25 Sep 2020 23:41:08 +0200 Subject: [PATCH 13/39] Adjust comment in CI --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ab08faf..48aac09 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -53,7 +53,7 @@ eth-protected-recording: # Compare checksums - echo $(sha1sum Lecture\ Recordings/Advanced\ Systems\ Lab/2020-03-19_low-fd29952f.mp4) | grep -q efd4a1779a29da08c0186ed6121fc10bfa7e8e83 -# Test parameter file +# Test default named parameter file default-parameter-file: stage: feature-test needs: [unprotected-recording] From 37b7b8b87edb20211e65273a0468a761b525e5aa Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Fri, 25 Sep 2020 23:41:31 +0200 Subject: [PATCH 14/39] Add test for parameter file with custom name --- .gitlab-ci.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 48aac09..c1ab930 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -66,3 +66,17 @@ default-parameter-file: - python3 vo-scraper.py https://video.ethz.ch/lectures/d-infk/2020/spring/252-0028-00L.html # Compare checksums - echo $(sha1sum Lecture\ Recordings/Digital\ Design\ and\ Computer\ Architecture/2020-03-12_low-3ebf562d.mp4) | grep -q f80bcc1c215cebf64a4da7f9623406fb1309e512 + +# Test custom named parameter file +custom-parameter-file: + stage: feature-test + needs: [unprotected-recording] + script: + # Install dependency + - pip3 install requests + # Add parameter file + - printf -- "--quality low\n--latest\n" > parameters2.txt + # Download video + - python3 vo-scraper.py --parameter-file parameters2.txt https://video.ethz.ch/lectures/d-infk/2020/spring/252-0028-00L.html + # Compare checksums + - echo $(sha1sum Lecture\ Recordings/Digital\ Design\ and\ Computer\ Architecture/2020-03-12_low-3ebf562d.mp4) | grep -q f80bcc1c215cebf64a4da7f9623406fb1309e512 From ee06df082a1a3b6a8a4cb5d1c647c6cf167867e3 Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Fri, 25 Sep 2020 23:56:41 +0200 Subject: [PATCH 15/39] Remove useless statement I never tested on Windows and just copied the statement from the original scraper. So I had no idea what its exact use was --- vo-scraper.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vo-scraper.py b/vo-scraper.py index e714647..7a6c5ff 100755 --- a/vo-scraper.py +++ b/vo-scraper.py @@ -871,5 +871,3 @@ def remove_illegal_characters(str): # Print summary and exit print_information(str(link_counter) + " files found, " + str(download_counter) + " downloaded and " + str(skip_counter) + " skipped") - if platform == "win32": - input('\nEOF') # So Windows users also see the output (apparently) From 93ffff711797cb17e39c8020a3342c9c732407fe Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Wed, 30 Sep 2020 14:04:27 +0200 Subject: [PATCH 16/39] Add info about live streams --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 36ff628..fee5895 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,12 @@ You may find this example of ranges useful: | `1..3..` | `1 3 5 [...]` | Every other episodes starting from the second (i.e.. all the second episodes of the week) | | `..3..` | `0 3 6 [...]` | Every third episodes, starting from the beginning | +### Q: Can I use it to download live streams? + +#### A: No + +Downloading live streams is not supported. + ### Q: How do I pass a file with links to multiple lectures? #### A: Use `--file ` From 1974259959ecddb5d2fbe7963c5386939c33cb9d Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Wed, 30 Sep 2020 14:04:50 +0200 Subject: [PATCH 17/39] Add info about third party platforms --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index fee5895..c4fa75f 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,12 @@ You may find this example of ranges useful: Downloading live streams is not supported. +### Q: Can I use it to download lecture recordings from other platforms (e.g. Zoom)? + +#### A: No + +Downloading is only supported for recorded lectures on [video.ethz.ch](https://video.ethz.ch/). Other platforms such as Zoom, Moodle, and Polybox are not supported. + ### Q: How do I pass a file with links to multiple lectures? #### A: Use `--file ` From c2d6e105c513c7f065e226aba8e232326878445b Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Wed, 30 Sep 2020 14:08:05 +0200 Subject: [PATCH 18/39] Add info about parameter file --- README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/README.md b/README.md index c4fa75f..9da1a2d 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,25 @@ Additionally you can also add a username and password at the end of the link sep **Note:** This is **NOT** recommended for your NETHZ account password for security reasons! +### Q: I don't like having to pass all those parameters each time I download recordings. Is there a better way? + +#### A: Yes + +You can can create a file called `parameters.txt` in which you put all your parameters. As long as you keep it in the same directory in which you call the scraper, it will automatically detect the file and read the parameters from there. + +**Example:** + +If you create a file called `parameters.txt` with the following content + +``` +--all +--quality low +``` + +and then run `python3 vo-scraper.py ` in that directory it will download all recordings (`--all`) from that lecture in low quality (`--quality low`) without you having to pass any parameters. + +If you want to use a different name for the parameter file, you can pass the parameter `--parameter-file `. Ironically, you cannot do this via `parameters.txt` :P + ### Q: How does it acquire the videos? #### A: Like so: From 78236f8600c3d80ed7ba9bd335efa0788b4e2fd9 Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Wed, 30 Sep 2020 18:01:58 +0200 Subject: [PATCH 19/39] Display hints after running scraper --- vo-scraper.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/vo-scraper.py b/vo-scraper.py index 7a6c5ff..c79bd5e 100755 --- a/vo-scraper.py +++ b/vo-scraper.py @@ -22,6 +22,7 @@ import json # For handling json files import argparse # For parsing commandline arguments import getpass # For getting the user password +import random # For selecting a random hint # Check whether `requests` is installed @@ -102,6 +103,7 @@ class bcolors: 'error' : f"({bcolors.ERROR}ERR{bcolors.ENDC})" } +HINT_LIST = [] # =============================================================== # _____ _ _ # | ___| _ _ _ __ ___ | |_ (_) ___ _ __ ___ @@ -701,6 +703,11 @@ def setup_arg_parser(): "-d", "--destination", help="Directory where to save the files to. By default this is the folder \"Lecture Recordings/\" of the current working directory." ) + parser.add_argument( + "--disable-hints", + action="store_true", + help="If set no hints will be displayed if the scraper finished running" + ) parser.add_argument( "-f", "--file", help="A file with links to all the lectures you want to download. Each lecture link should be on a new line. See README.md for details." @@ -869,5 +876,9 @@ def remove_illegal_characters(str): for (file_name, video_src_link, episode_name) in video_src_collection: downloader(file_name, video_src_link, episode_name) + if not args.disable_hints and HINT_LIST: + print() + print("Hint:", random.choice(HINT_LIST)) + # Print summary and exit print_information(str(link_counter) + " files found, " + str(download_counter) + " downloaded and " + str(skip_counter) + " skipped") From 0130063490c1ef7424e90c733334d8731af67ce3 Mon Sep 17 00:00:00 2001 From: Georg Teufelberger Date: Thu, 1 Oct 2020 10:09:22 +0200 Subject: [PATCH 20/39] Auto format --- vo-scraper.py | 75 ++++++++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/vo-scraper.py b/vo-scraper.py index c79bd5e..c8818d0 100755 --- a/vo-scraper.py +++ b/vo-scraper.py @@ -16,7 +16,10 @@ # ======================================================================== # Import urllib.request, urllib.parse, os, sys, http.client -import urllib.request, os, sys, http.client +import urllib.request +import os +import sys +import http.client from urllib.request import Request, urlopen from sys import platform import json # For handling json files @@ -49,9 +52,9 @@ # Links to repo gitlab_repo_page = "https://gitlab.ethz.ch/tgeorg/vo-scraper/" -gitlab_issue_page = gitlab_repo_page+"issues" -gitlab_changelog_page = gitlab_repo_page+"-/tags/v" -remote_version_link = gitlab_repo_page+"raw/master/VERSION" +gitlab_issue_page = gitlab_repo_page + "issues" +gitlab_changelog_page = gitlab_repo_page + "-/tags/v" +remote_version_link = gitlab_repo_page + "raw/master/VERSION" program_version = '1.3.0' # For web requests @@ -132,7 +135,7 @@ def print_information(str, type='info', verbose_only=False): print(print_type_dict[type], str) elif verbose: # Always print with tag - print(print_type_dict[type],str) + print(print_type_dict[type], str) def get_credentials(user, passw): """Gets user credentials and returns them @@ -142,7 +145,7 @@ def get_credentials(user, passw): passw -- The password passed from a text file """ if not user: - user = input("Enter your username: ") + user = input("Enter your username: ") if not passw: passw = getpass.getpass() @@ -168,8 +171,8 @@ def acquire_login_cookie(protection, vo_link, user, passw): (user, passw) = get_credentials(user, passw) # Setup headers and content to send - headers = {"User-Agent": user_agent, "Referer": vo_link+".html"} - data = { "__charset__": "utf-8", "j_validate": True, "j_username": user, "j_password": passw} + headers = {"User-Agent": user_agent, "Referer": vo_link + ".html"} + data = {"__charset__": "utf-8", "j_validate": True, "j_username": user, "j_password": passw} # Request login-cookie r = requests.post("https://video.ethz.ch/j_security_check", headers=headers, data=data) @@ -190,11 +193,11 @@ def acquire_login_cookie(protection, vo_link, user, passw): (user, passw) = get_credentials(user, passw) # Setup headers and content to send - headers = {"Referer": vo_link+".html", "User-Agent":user_agent} - data = { "__charset__": "utf-8", "username": user, "password": passw } + headers = {"Referer": vo_link + ".html", "User-Agent": user_agent} + data = {"__charset__": "utf-8", "username": user, "password": passw} # Get login cookie - r = requests.post(vo_link+".series-login.json", headers=headers, data=data) + r = requests.post(vo_link + ".series-login.json", headers=headers, data=data) # Put login cookie in cookie_jar cookie_jar = r.cookies @@ -269,7 +272,7 @@ def make_range(item, max_episode_number): upper_bound = int(upper_bound) if upper_bound else max_episode_number step = int(step) - return range(lower_bound, upper_bound+1, step) + return range(lower_bound, upper_bound + 1, step) def get_user_choice(max_episode_number): """ @@ -383,7 +386,7 @@ def vo_scrapper(vo_link, user, passw): for item_nr in choice: # Get link to video metadata json file item = vo_json_data['episodes'][item_nr] - video_info_link = video_info_prefix+item['id'] + video_info_link = video_info_prefix + item['id'] # Download the video metadata file # Use login-cookie if provided otherwise make request without cookie @@ -407,8 +410,8 @@ def vo_scrapper(vo_link, user, passw): versions = list() print_information("Available versions:", verbose_only=True) for vid_version in video_json_data['streams'][0]['sources']['mp4']: - versions.append((counter, vid_version['res']['w']*vid_version['res']['h'])) - print_information(str(counter) + ": " + "%4d" %vid_version['res']['w'] + "x" + "%4d" %vid_version['res']['h'], verbose_only=True) + versions.append((counter, vid_version['res']['w'] * vid_version['res']['h'])) + print_information(str(counter) + ": " + "%4d" % vid_version['res']['w'] + "x" + "%4d" % vid_version['res']['h'], verbose_only=True) counter += 1 versions.sort(key=lambda tup: tup[1], reverse=True) # Now it's sorted: high -> medium -> low @@ -421,7 +424,7 @@ def vo_scrapper(vo_link, user, passw): continue lecture_title = vo_json_data['title'] - episode_title = vo_json_data["episodes"][item_nr]["title"] + episode_title = vo_json_data["episodes"][item_nr]["title"] # If video and lecture title overlap, remove lecture title from video title if episode_title.startswith(lecture_title): @@ -431,15 +434,15 @@ def vo_scrapper(vo_link, user, passw): episode_name = item['createdAt'][:-6] + " " + lecture_title + episode_title # Append date - episode_title = item['createdAt'][:-6]+episode_title + episode_title = item['createdAt'][:-6] + episode_title # Generate a pseudo hash by using part of the filename of the online version (which appears to be a UUID) - pseudo_hash = video_src_link.replace('https://oc-vp-dist-downloads.ethz.ch/mh_default_org/oaipmh-mmp/','')[:8] + pseudo_hash = video_src_link.replace('https://oc-vp-dist-downloads.ethz.ch/mh_default_org/oaipmh-mmp/', '')[:8] print_information(pseudo_hash, verbose_only=True) # Filename is `directory/