From e21571257dc585d4b18178f43209b085adb3614b Mon Sep 17 00:00:00 2001 From: Daniel Garijo Date: Wed, 31 Mar 2021 21:54:05 +0200 Subject: [PATCH 1/2] Fix #186 --- .gitignore | 1 + src/somef/cli.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index a95bce69..00facf06 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ Lib/* Scripts/* .idea/* test.json +out.json diff --git a/src/somef/cli.py b/src/somef/cli.py index e123fe0d..14fa7c1c 100644 --- a/src/somef/cli.py +++ b/src/somef/cli.py @@ -347,7 +347,8 @@ def get_path(obj, path): print(dockerfiles) if len(notebooks) > 0: - filtered_resp["hasExecutableNotebook"] = [convert_to_raw_usercontent(x, owner, repo_name, repo_ref) for x in notebooks] + filtered_resp["hasExecutableNotebook"] = [convert_to_raw_usercontent(x, owner, repo_name, repo_ref) for x in + notebooks] if len(dockerfiles) > 0: filtered_resp["hasBuildFile"] = [convert_to_raw_usercontent(x, owner, repo_name, repo_ref) for x in dockerfiles] if len(docs) > 0: @@ -369,6 +370,7 @@ def get_path(obj, path): def convert_to_raw_usercontent(partial, owner, repo_name, repo_ref): return f"https://raw.githubusercontent.com/{owner}/{repo_name}/{repo_ref}/{urllib.parse.quote(partial)}" + ## Function takes readme text as input and divides it into excerpts ## Returns the extracted excerpts def create_excerpts(string_list): @@ -595,16 +597,18 @@ def merge(header_predictions, predictions, citations, dois, binder_links, long_t def format_output(git_data, repo_data): print("formatting output") for i in git_data.keys(): + # print(i) + # print(git_data[i]) if i == 'description': if 'description' not in repo_data.keys(): repo_data['description'] = [] - repo_data['description'].append({'excerpt': git_data[i], 'confidence': [1.0], 'technique': 'GitHub API'}) + if git_data[i] != "": + repo_data['description'].append({'excerpt': git_data[i], 'confidence': [1.0], 'technique': 'GitHub API'}) else: if i == 'hasExecutableNotebook' or i == 'hasBuildFile' or i == 'hasDocumentation': repo_data[i] = {'excerpt': git_data[i], 'confidence': [1.0], 'technique': 'File Exploration'} - else: + elif git_data[i] != "" and git_data[i] != []: repo_data[i] = {'excerpt': git_data[i], 'confidence': [1.0], 'technique': 'GitHub API'} - return repo_data From e12e6ade4955ebff96c59060269671d6a13653f4 Mon Sep 17 00:00:00 2001 From: Daniel Garijo Date: Wed, 31 Mar 2021 23:17:04 +0200 Subject: [PATCH 2/2] Fix #185 --- src/somef/cli.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/somef/cli.py b/src/somef/cli.py index 14fa7c1c..e013eba7 100644 --- a/src/somef/cli.py +++ b/src/somef/cli.py @@ -603,7 +603,8 @@ def format_output(git_data, repo_data): if 'description' not in repo_data.keys(): repo_data['description'] = [] if git_data[i] != "": - repo_data['description'].append({'excerpt': git_data[i], 'confidence': [1.0], 'technique': 'GitHub API'}) + repo_data['description'].append( + {'excerpt': git_data[i], 'confidence': [1.0], 'technique': 'GitHub API'}) else: if i == 'hasExecutableNotebook' or i == 'hasBuildFile' or i == 'hasDocumentation': repo_data[i] = {'excerpt': git_data[i], 'confidence': [1.0], 'technique': 'File Exploration'} @@ -674,13 +675,18 @@ def average_confidence(x): reverse=True) descriptions_text = [x["excerpt"] for x in descriptions] + published_date = "" + try: + published_date = format_date(release_path(["datePublished"])) + except: + print("Published date is not available") + codemeta_output = { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "@type": "SoftwareSourceCode", "license": data_path(["license", "excerpt", "url"]), "codeRepository": "git+" + code_repository + ".git", "dateCreated": format_date(data_path(["dateCreated", "excerpt"])), - "datePublished": format_date(release_path(["datePublished"])), "dateModified": format_date(data_path(["dateModified", "excerpt"])), "downloadUrl": data_path(["downloadUrl", "excerpt"]), "issueTracker": code_repository + "/issues", @@ -698,6 +704,8 @@ def average_confidence(x): } ] } + if published_date != "": + codemeta_output["datePublished"] = published_date pruned_output = {}