-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
362 additions
and
60 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
|
||
|
||
|
||
from fastapi.responses import JSONResponse | ||
|
||
|
||
def error_repo_not_found() -> JSONResponse: | ||
return JSONResponse( | ||
content={"error": "Repository not found"}, | ||
headers={ | ||
"x-error-code": "RepoNotFound", | ||
"x-error-message": "Repository not found", | ||
}, | ||
status_code=401, | ||
) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
|
||
|
||
class RepoMeta(object): | ||
def __init__(self) -> None: | ||
self._id = None | ||
self.id = None | ||
self.author = None | ||
self.sha = None | ||
self.lastModified = None | ||
self.private = False | ||
self.gated = False | ||
self.disabled = False | ||
self.tags = [] | ||
self.description = "" | ||
self.paperswithcode_id = None | ||
self.downloads = 0 | ||
self.likes = 0 | ||
self.cardData = None | ||
self.siblings = None | ||
self.createdAt = None | ||
|
||
def to_dict(self): | ||
return { | ||
"_id": self._id, | ||
"id": self.id, | ||
"author": self.author, | ||
"sha": self.sha, | ||
"lastModified": self.lastModified, | ||
"private": self.private, | ||
"gated": self.gated, | ||
"disabled": self.disabled, | ||
"tags": self.tags, | ||
"description": self.description, | ||
"paperswithcode_id": self.paperswithcode_id, | ||
"downloads": self.downloads, | ||
"likes": self.likes, | ||
"cardData": self.cardData, | ||
"siblings": self.siblings, | ||
"createdAt": self.createdAt, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
# coding=utf-8 | ||
# Copyright 2024 XiaHan | ||
# | ||
# Use of this source code is governed by an MIT-style | ||
# license that can be found in the LICENSE file or at | ||
# https://opensource.org/licenses/MIT. | ||
import hashlib | ||
import io | ||
import os | ||
import re | ||
from typing import Any, Dict, List, Union | ||
import gitdb | ||
from git import Commit, Optional, Repo, Tree | ||
from gitdb.base import OStream | ||
import yaml | ||
|
||
from olah.mirror.meta import RepoMeta | ||
class LocalMirrorRepo(object): | ||
def __init__(self, path: str, repo_type: str, org: str, repo: str) -> None: | ||
self._path = path | ||
self._repo_type = repo_type | ||
self._org = org | ||
self._repo = repo | ||
|
||
self._git_repo = Repo(self._path) | ||
|
||
def _sha256(self, text: Union[str, bytes]) -> str: | ||
if isinstance(text, bytes) or isinstance(text, bytearray): | ||
bin = text | ||
elif isinstance(text, str): | ||
bin = text.encode('utf-8') | ||
else: | ||
raise Exception("Invalid sha256 param type.") | ||
sha256_hash = hashlib.sha256() | ||
sha256_hash.update(bin) | ||
hashed_string = sha256_hash.hexdigest() | ||
return hashed_string | ||
|
||
def _match_card(self, readme: str) -> str: | ||
pattern = r'\s*---(.*?)---' | ||
|
||
match = re.match(pattern, readme, flags=re.S) | ||
|
||
if match: | ||
card_string = match.group(1) | ||
return card_string | ||
else: | ||
return "" | ||
def _remove_card(self, readme: str) -> str: | ||
pattern = r'\s*---(.*?)---' | ||
out = re.sub(pattern, "", readme, flags=re.S) | ||
return out | ||
|
||
def _get_readme(self, commit: Commit) -> str: | ||
if "README.md" not in commit.tree: | ||
return "" | ||
else: | ||
out: bytes = commit.tree["README.md"].data_stream.read() | ||
return out.decode() | ||
|
||
def _get_description(self, commit: Commit) -> str: | ||
readme = self._get_readme(commit) | ||
return self._remove_card(readme) | ||
|
||
def _get_entry_files(self, tree, include_dir=False) -> List[str]: | ||
out_paths = [] | ||
for entry in tree: | ||
if entry.type == "tree": | ||
out_paths.extend(self._get_entry_files(entry)) | ||
if include_dir: | ||
out_paths.append(entry.path) | ||
else: | ||
out_paths.append(entry.path) | ||
return out_paths | ||
|
||
def _get_tree_files(self, commit: Commit) -> List[str]: | ||
return self._get_entry_files(commit.tree) | ||
|
||
|
||
def _get_earliest_commit(self) -> Commit: | ||
earliest_commit = None | ||
earliest_commit_date = None | ||
|
||
for commit in self._git_repo.iter_commits(): | ||
commit_date = commit.committed_datetime | ||
|
||
if earliest_commit_date is None or commit_date < earliest_commit_date: | ||
earliest_commit = commit | ||
earliest_commit_date = commit_date | ||
|
||
return earliest_commit | ||
|
||
def get_meta(self, commit_hash: str) -> Dict[str, Any]: | ||
try: | ||
commit = self._git_repo.commit(commit_hash) | ||
except gitdb.exc.BadName: | ||
return None | ||
meta = RepoMeta() | ||
|
||
meta._id = self._sha256(f"{self._org}/{self._repo}/{commit.hexsha}") | ||
meta.id = f"{self._org}/{self._repo}" | ||
meta.author = self._org | ||
meta.sha = commit.hexsha | ||
meta.lastModified = self._git_repo.head.commit.committed_datetime.strftime("%Y-%m-%dT%H:%M:%S.%fZ") | ||
meta.private = False | ||
meta.gated = False | ||
meta.disabled = False | ||
meta.tags = [] | ||
meta.description = self._get_description(commit) | ||
meta.paperswithcode_id = None | ||
meta.downloads = 0 | ||
meta.likes = 0 | ||
meta.cardData = yaml.load(self._match_card(self._get_readme(commit)), Loader=yaml.CLoader) | ||
meta.siblings = [{"rfilename": p} for p in self._get_tree_files(commit)] | ||
meta.createdAt = self._get_earliest_commit().committed_datetime.strftime("%Y-%m-%dT%H:%M:%S.%fZ") | ||
return meta.to_dict() | ||
|
||
def _contain_path(self, path: str, tree: Tree) -> bool: | ||
norm_p = os.path.normpath(path).replace("\\", "/") | ||
parts = norm_p.split("/") | ||
for part in parts: | ||
if all([t.name != part for t in tree]): | ||
return False | ||
else: | ||
entry = tree[part] | ||
if entry.type == "tree": | ||
tree = entry | ||
else: | ||
tree = {} | ||
return True | ||
|
||
def get_file_head(self, commit_hash: str, path: str) -> Optional[Dict[str, Any]]: | ||
try: | ||
commit = self._git_repo.commit(commit_hash) | ||
except gitdb.exc.BadName: | ||
return None | ||
|
||
if not self._contain_path(path, commit.tree): | ||
return None | ||
else: | ||
header = {} | ||
header["content-length"] = str(commit.tree[path].data_stream.size) | ||
header["x-repo-commit"] = commit.hexsha | ||
header["etag"] = self._sha256(commit.tree[path].data_stream.read()) | ||
return header | ||
|
||
def get_file(self, commit_hash: str, path: str) -> Optional[OStream]: | ||
try: | ||
commit = self._git_repo.commit(commit_hash) | ||
except gitdb.exc.BadName: | ||
return None | ||
|
||
def stream_wrapper(file_bytes: bytes): | ||
file_stream = io.BytesIO(file_bytes) | ||
while True: | ||
chunk = file_stream.read(4096) | ||
if len(chunk) == 0: | ||
break | ||
else: | ||
yield chunk | ||
|
||
if not self._contain_path(path, commit.tree): | ||
return None | ||
else: | ||
return stream_wrapper(commit.tree[path].data_stream.read()) | ||
|
||
|
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
Oops, something went wrong.