Merge pull request #3 from Rosna/master

added contributor field in data/csv
KnowledgeCaptureAndDiscovery · Jul 26, 2019 · 46cdbd6 · 46cdbd6
2 parents f20076a + d836e5b
commit 46cdbd6
Show file tree

Hide file tree

Showing 6 changed files with 3,101 additions and 3,082 deletions.
diff --git a/data/citation.csv b/data/citation.csv
diff --git a/data/description.csv b/data/description.csv
diff --git a/data/installation.csv b/data/installation.csv
diff --git a/data/invocation.csv b/data/invocation.csv
diff --git a/data/none.csv b/data/none.csv
diff --git a/helper_scripts/splitcsvcategory.py b/helper_scripts/splitcsvcategory.py
@@ -8,38 +8,38 @@
 df.sort_values(by=['URL', 'category'], inplace=True)
 if os.path.exists('../data/none.csv'):
     df_none=pd.read_csv('../data/none.csv')
-    df_none=df_none.append((df[df['category']=='none'])[['URL', 'excerpt']])
+    df_none=df_none.append((df[df['category']=='none'])[['URL','contributor', 'excerpt']])
     df_none.drop_duplicates(subset="excerpt").to_csv(path_or_buf='../data/none.csv', index=False)
 else:
-    (df[df['category']=='none']) [['URL', 'excerpt']].to_csv(path_or_buf='../data/none.csv', index=False)
+    (df[df['category']=='none']) [['URL','contributor', 'excerpt']].to_csv(path_or_buf='../data/none.csv', index=False)
 
 
 if os.path.exists('../data/description.csv'):
     df_description=pd.read_csv('../data/description.csv')
-    df_description=df_description.append((df[df['category']=='description']) [['URL', 'excerpt']])
+    df_description=df_description.append((df[df['category']=='description']) [['URL','contributor', 'excerpt']])
     df_description.drop_duplicates(subset="excerpt").to_csv(path_or_buf='../data/description.csv', index=False)
 else:
-    (df[df['category']=='description']) [['URL', 'excerpt']].to_csv(path_or_buf='../data/description.csv', index=False)
+    (df[df['category']=='description']) [['URL', 'contributor','excerpt']].to_csv(path_or_buf='../data/description.csv', index=False)
 
 
 if os.path.exists('../data/installation.csv'):
     df_installation=pd.read_csv('../data/installation.csv')
-    df_installation=df_installation.append((df[df['category']=='installation'])[['URL', 'excerpt']])
+    df_installation=df_installation.append((df[df['category']=='installation'])[['URL','contributor', 'excerpt']])
     df_installation.drop_duplicates(subset="excerpt").to_csv(path_or_buf='../data/installation.csv', index=False)
 else:
-    (df[df['category']=='installation'])[['URL', 'excerpt']].to_csv(path_or_buf='../data/installation.csv', index=False)
+    (df[df['category']=='installation'])[['URL','contributor', 'excerpt']].to_csv(path_or_buf='../data/installation.csv', index=False)
 
 
 if os.path.exists('../data/invocation.csv'):
     df_invocation=pd.read_csv('../data/invocation.csv')
-    df_invocation=df_invocation.append((df[df['category']=='invocation'])[['URL', 'excerpt']])
+    df_invocation=df_invocation.append((df[df['category']=='invocation'])[['URL','contributor', 'excerpt']])
     df_invocation.drop_duplicates(subset="excerpt").to_csv(path_or_buf='../data/invocation.csv', index=False)
 else:
-    (df[df['category']=='invocation']).to_csv(path_or_buf='../data/invocation.csv', index=False)
+    (df[df['category']=='invocation'])[['URL', 'contributor','excerpt']].to_csv(path_or_buf='../data/invocation.csv', index=False)
 
 if os.path.exists('../data/citation.csv'):
     df_citation=pd.read_csv('~/Documents/ISI2019/SM2KG/data/citation.csv')
-    df_citation=df_citation.append((df[df['category']=='citation'])[['URL', 'excerpt']])
+    df_citation=df_citation.append((df[df['category']=='citation'])[['URL', 'contributor','excerpt']])
     df_citation.drop_duplicates(subset="excerpt").to_csv(path_or_buf='../data/citation.csv', index=False)
 else:
-    (df[df['category']=='citation'])[['URL', 'excerpt']].to_csv(path_or_buf='../data/citation.csv', index=False)
+    (df[df['category']=='citation'])[['URL','contributor','excerpt']].to_csv(path_or_buf='../data/citation.csv', index=False)