Merge pull request #102 from code4nagoya/issue101

patients.csv を3分割(patients1.csv, patients2.csv, patients3.csv) するようにした
code4nagoya · Sep 9, 2022 · 708a0e7 · 708a0e7
2 parents 8d84467 + 8678c60
commit 708a0e7
Show file tree

Hide file tree

Showing 2 changed files with 48 additions and 41 deletions.
diff --git a/build_json.py b/build_json.py
@@ -99,44 +99,45 @@
     '110代':'110代',
 }
 
-with open('data/patients.csv', 'r', encoding="utf-8") as csvfile:
-    reader = csv.DictReader(csvfile)
-    for row in reader:
-        patients_list.append(row)
-        dt = row['date']
-        patients_date_num_dic.setdefault(dt, 0)
-        patients_date_num_dic[dt] += 1
-
-        patients_date_place_dic.setdefault(dt, {})
-        placeDic = patients_date_place_dic[dt]
-
-        if row['住居地'] in cities:
-            cityCode = cities[row['住居地']]
-        else:
-            cityCode = 'NOT_FOUND'
-            print('住居地 ' + row['住居地'] + ' to NOT_FOUND')
-
-        placeDic.setdefault(cityCode, 0)
-        placeDic[cityCode] += 1
-
-        if row['sex'] in sexes:
-            sex = sexes[row['sex']]
-        else:
-            sex = 'その他'
-            print('sex ' + row['sex'] + ' to その他')
-
-        if row['age'] in ages:
-            age = ages[row['age']]
-        else:
-            age = 'その他'
-            print('age ' + row['age'] + ' to その他')
-
-        patients_date_age_sex_dic.setdefault(dt, {})
-        ageDic = patients_date_age_sex_dic[dt]
-        ageDic.setdefault(age, {})
-        sexDic = ageDic[age]
-        sexDic.setdefault(sex, 0)
-        sexDic[sex] += 1
+for csvFile in ['data/patients1.csv', 'data/patients2.csv', 'data/patients3.csv']:
+    with open(csvFile, 'r', encoding="utf-8") as csvfile:
+        reader = csv.DictReader(csvfile)
+        for row in reader:
+            patients_list.append(row)
+            dt = row['date']
+            patients_date_num_dic.setdefault(dt, 0)
+            patients_date_num_dic[dt] += 1
+
+            patients_date_place_dic.setdefault(dt, {})
+            placeDic = patients_date_place_dic[dt]
+
+            if row['住居地'] in cities:
+                cityCode = cities[row['住居地']]
+            else:
+                cityCode = 'NOT_FOUND'
+                print('住居地 ' + row['住居地'] + ' to NOT_FOUND')
+
+            placeDic.setdefault(cityCode, 0)
+            placeDic[cityCode] += 1
+
+            if row['sex'] in sexes:
+                sex = sexes[row['sex']]
+            else:
+                sex = 'その他'
+                print('sex ' + row['sex'] + ' to その他')
+
+            if row['age'] in ages:
+                age = ages[row['age']]
+            else:
+                age = 'その他'
+                print('age ' + row['age'] + ' to その他')
+
+            patients_date_age_sex_dic.setdefault(dt, {})
+            ageDic = patients_date_age_sex_dic[dt]
+            ageDic.setdefault(age, {})
+            sexDic = ageDic[age]
+            sexDic.setdefault(sex, 0)
+            sexDic[sex] += 1
 
 
 # 日付のリストを生成

diff --git a/scrape_patients.py b/scrape_patients.py
@@ -36,6 +36,7 @@
 import requests
 import pdfplumber
 import math
+import numpy as np
 
 base_url = "https://www.pref.aichi.jp"
 
@@ -187,10 +188,15 @@ def convert_pdf(FILE_PATHs):
     print("データ加工開始")
 
     print("CSV へ出力開始")
-    p = pathlib.Path("./data/patients.csv")
-    p.parent.mkdir(parents=True, exist_ok=True)
 
-    df.to_csv(p, encoding="utf_8")
+
+    dfCount = 1
+    for df_split in np.array_split(df, 3):
+        p = pathlib.Path("./data/patients" + str(dfCount) + ".csv")
+        p.parent.mkdir(parents=True, exist_ok=True)
+        df_split.to_csv(p, encoding="utf_8")
+        dfCount = dfCount + 1
+
     print("CSV へ出力終了")
 
     return df