-
Notifications
You must be signed in to change notification settings - Fork 148
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Anwar, Malik Aqeel
committed
Aug 5, 2020
1 parent
5a73277
commit ed9dffc
Showing
4 changed files
with
69 additions
and
48 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,11 +3,14 @@ | |
# Email: [email protected] | ||
|
||
# Code resued from https://stackoverflow.com/questions/38511444/python-download-files-from-google-drive-using-url | ||
import requests,os | ||
# Make sure you run this from parent folder and not from utils folder i.e. | ||
# python utils/fetch_dataset.py | ||
|
||
import requests, os | ||
from zipfile import ZipFile | ||
import argparse | ||
import urllib | ||
from utils.aux_functions import print_orderly | ||
|
||
parser = argparse.ArgumentParser( | ||
description="Download dataset - Python code to download associated datasets" | ||
) | ||
|
@@ -18,42 +21,48 @@ | |
help="Name of the dataset - Details on available datasets can be found at GitHub Page", | ||
) | ||
args = parser.parse_args() | ||
|
||
|
||
def download_file_from_google_drive(id, destination): | ||
URL = "https://docs.google.com/uc?export=download" | ||
|
||
session = requests.Session() | ||
|
||
response = session.get(URL, params = { 'id' : id }, stream = True) | ||
response = session.get(URL, params={"id": id}, stream=True) | ||
token = get_confirm_token(response) | ||
|
||
if token: | ||
params = { 'id' : id, 'confirm' : token } | ||
response = session.get(URL, params = params, stream = True) | ||
params = {"id": id, "confirm": token} | ||
response = session.get(URL, params=params, stream=True) | ||
|
||
save_response_content(response, destination) | ||
|
||
|
||
def get_confirm_token(response): | ||
for key, value in response.cookies.items(): | ||
if key.startswith('download_warning'): | ||
if key.startswith("download_warning"): | ||
return value | ||
|
||
return None | ||
|
||
|
||
def save_response_content(response, destination): | ||
CHUNK_SIZE = 32768 | ||
|
||
print(destination) | ||
with open(destination, "wb") as f: | ||
for chunk in response.iter_content(CHUNK_SIZE): | ||
if chunk: # filter out keep-alive new chunks | ||
if chunk: # filter out keep-alive new chunks | ||
f.write(chunk) | ||
|
||
|
||
def download(t_url): | ||
response = urllib.request.urlopen(t_url) | ||
data = response.read() | ||
txt_str = str(data) | ||
lines = txt_str.split("\\n") | ||
return lines | ||
|
||
|
||
def Convert(lst): | ||
it = iter(lst) | ||
res_dct = dict(zip(it, it)) | ||
|
@@ -62,19 +71,22 @@ def Convert(lst): | |
|
||
if __name__ == "__main__": | ||
# Fetch the latest download_links.txt file from GitHub | ||
print_orderly('Download dataset', 60) | ||
link = 'https://raw.githubusercontent.com/aqeelanwar/MaskTheFace/master/datasets/download_links.txt' | ||
links_dict = Convert(download(link)[0].replace(':', '\n').replace("b'", "").replace("\'", "").replace(" ", "").split('\n')) | ||
link = "https://raw.githubusercontent.com/aqeelanwar/MaskTheFace/master/datasets/download_links.txt" | ||
links_dict = Convert( | ||
download(link)[0] | ||
.replace(":", "\n") | ||
.replace("b'", "") | ||
.replace("'", "") | ||
.replace(" ", "") | ||
.split("\n") | ||
) | ||
file_id = links_dict[args.dataset] | ||
destination = os.getcwd().rsplit(os.path.sep,1) | ||
destination = destination[0] + os.path.sep + 'datasets' + os.path.sep + '_.zip' | ||
print('Downloading: ', args.dataset) | ||
destination = "datasets/_.zip" | ||
print("Downloading: ", args.dataset) | ||
download_file_from_google_drive(file_id, destination) | ||
print('Extracting: ', args.dataset) | ||
with ZipFile(destination, 'r') as zipObj: | ||
print("Extracting: ", args.dataset) | ||
with ZipFile(destination, "r") as zipObj: | ||
# Extract all the contents of zip file in current directory | ||
zipObj.extractall(destination.rsplit(os.path.sep, 1)[0]) | ||
|
||
os.remove(destination) | ||
print_orderly('Done', 60) | ||
|