Skip to content

Commit

Permalink
fix : download git repo issue #15
Browse files Browse the repository at this point in the history
  • Loading branch information
Zakongjampa committed Feb 6, 2023
1 parent be34e01 commit 7e9dda4
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
.vscode/
**/.ipynb_checkpoints
.env/
48 changes: 48 additions & 0 deletions script/catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from github import Github
import requests
import os
import csv
import re



def download_csv(repo):
"""From the GitHub repository, it will download specific file
Args:
repo (repo): An object of repository to get access to the content of a file
Returns:
catalog_csv: Get the content of the file in a http reponse format.
"""
file = repo.get_contents("data/catalog.csv", ref="master")
catalog_csv = requests.get(file.download_url, stream=True)
return catalog_csv


def get_repos_in_catalog(catalog):
repos_in_catalog = set()

data = catalog.content.decode('utf-8')
pechas_list = data.split("\n")
pechas = list(csv.reader(pechas_list, delimiter=","))
for pecha in pechas[1:-3]:
pecha_id = re.search("\[.+\]", pecha[0])[0][1:-1]
if (pecha_id in repos_in_catalog):
print(f"Pecha is already in catalog {pecha_id}")
else:
repos_in_catalog.add(pecha_id)
return repos_in_catalog


def get_existing_pecha(repo):
pass

if __name__ == "__main__":
token = os.environ.get('GitHubToken')
g = Github(token)
repo = g.get_repo("OpenPecha-Data/catalog")
catalog_csv = download_csv(repo)
repos_in_catalog_set = get_repos_in_catalog(catalog_csv)
existing_pecha_set = get_existing_pecha(repo)
# save(catalog_csv,"catalog.txt")
15 changes: 15 additions & 0 deletions script/catalog_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from dataclasses import dataclass, field

@dataclass(frozen=True)
class catalog_info:
pecha_id : str
title : str
volume : any
author : str
source_id : int
creation_date : str
legacy_id : int




0 comments on commit 7e9dda4

Please sign in to comment.