Skip to content

Commit

Permalink
Merge pull request #3 from Rosna/master
Browse files Browse the repository at this point in the history
added contributor field in data/csv
  • Loading branch information
dgarijo authored Jul 26, 2019
2 parents f20076a + d836e5b commit 46cdbd6
Show file tree
Hide file tree
Showing 6 changed files with 3,101 additions and 3,082 deletions.
615 changes: 317 additions & 298 deletions data/citation.csv

Large diffs are not rendered by default.

674 changes: 337 additions & 337 deletions data/description.csv

Large diffs are not rendered by default.

1,860 changes: 930 additions & 930 deletions data/installation.csv

Large diffs are not rendered by default.

2,272 changes: 1,136 additions & 1,136 deletions data/invocation.csv

Large diffs are not rendered by default.

742 changes: 371 additions & 371 deletions data/none.csv

Large diffs are not rendered by default.

20 changes: 10 additions & 10 deletions helper_scripts/splitcsvcategory.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,38 +8,38 @@
df.sort_values(by=['URL', 'category'], inplace=True)
if os.path.exists('../data/none.csv'):
df_none=pd.read_csv('../data/none.csv')
df_none=df_none.append((df[df['category']=='none'])[['URL', 'excerpt']])
df_none=df_none.append((df[df['category']=='none'])[['URL','contributor', 'excerpt']])
df_none.drop_duplicates(subset="excerpt").to_csv(path_or_buf='../data/none.csv', index=False)
else:
(df[df['category']=='none']) [['URL', 'excerpt']].to_csv(path_or_buf='../data/none.csv', index=False)
(df[df['category']=='none']) [['URL','contributor', 'excerpt']].to_csv(path_or_buf='../data/none.csv', index=False)


if os.path.exists('../data/description.csv'):
df_description=pd.read_csv('../data/description.csv')
df_description=df_description.append((df[df['category']=='description']) [['URL', 'excerpt']])
df_description=df_description.append((df[df['category']=='description']) [['URL','contributor', 'excerpt']])
df_description.drop_duplicates(subset="excerpt").to_csv(path_or_buf='../data/description.csv', index=False)
else:
(df[df['category']=='description']) [['URL', 'excerpt']].to_csv(path_or_buf='../data/description.csv', index=False)
(df[df['category']=='description']) [['URL', 'contributor','excerpt']].to_csv(path_or_buf='../data/description.csv', index=False)


if os.path.exists('../data/installation.csv'):
df_installation=pd.read_csv('../data/installation.csv')
df_installation=df_installation.append((df[df['category']=='installation'])[['URL', 'excerpt']])
df_installation=df_installation.append((df[df['category']=='installation'])[['URL','contributor', 'excerpt']])
df_installation.drop_duplicates(subset="excerpt").to_csv(path_or_buf='../data/installation.csv', index=False)
else:
(df[df['category']=='installation'])[['URL', 'excerpt']].to_csv(path_or_buf='../data/installation.csv', index=False)
(df[df['category']=='installation'])[['URL','contributor', 'excerpt']].to_csv(path_or_buf='../data/installation.csv', index=False)


if os.path.exists('../data/invocation.csv'):
df_invocation=pd.read_csv('../data/invocation.csv')
df_invocation=df_invocation.append((df[df['category']=='invocation'])[['URL', 'excerpt']])
df_invocation=df_invocation.append((df[df['category']=='invocation'])[['URL','contributor', 'excerpt']])
df_invocation.drop_duplicates(subset="excerpt").to_csv(path_or_buf='../data/invocation.csv', index=False)
else:
(df[df['category']=='invocation']).to_csv(path_or_buf='../data/invocation.csv', index=False)
(df[df['category']=='invocation'])[['URL', 'contributor','excerpt']].to_csv(path_or_buf='../data/invocation.csv', index=False)

if os.path.exists('../data/citation.csv'):
df_citation=pd.read_csv('~/Documents/ISI2019/SM2KG/data/citation.csv')
df_citation=df_citation.append((df[df['category']=='citation'])[['URL', 'excerpt']])
df_citation=df_citation.append((df[df['category']=='citation'])[['URL', 'contributor','excerpt']])
df_citation.drop_duplicates(subset="excerpt").to_csv(path_or_buf='../data/citation.csv', index=False)
else:
(df[df['category']=='citation'])[['URL', 'excerpt']].to_csv(path_or_buf='../data/citation.csv', index=False)
(df[df['category']=='citation'])[['URL','contributor','excerpt']].to_csv(path_or_buf='../data/citation.csv', index=False)

0 comments on commit 46cdbd6

Please sign in to comment.