MobilityNet · shankari · Mar 29, 2021 · Mar 9, 2021 · Mar 10, 2021 · Mar 12, 2021
diff --git a/bin/.DS_Store b/bin/.DS_Store
diff --git a/bin/dump_data_to_file.py b/bin/dump_data_to_file.py
@@ -0,0 +1,164 @@
+import json
+import os
+import math
+import requests
+import time
+import arrow
+import argparse
+import sys; sys.path.append("..")
+from emeval.input.spec_details import ServerSpecDetails
+from emeval.input.phone_view import PhoneView
+
+
+def dump_data_to_file(data, spec_id, user, key, start_ts, end_ts, out_dir):
+    """
+    Accepts serializable data (e.g. dict, array of dicts) and dumps it into an output file.
+    Dumped file are created recursively in the folder name specified by `out_dir` as such:
+
+    out_dir
+    └── user
+        └── spec_id
+            └── key
+                └── {start_ts}_{end_ts}.json
+    """
+    # key could have a slash if it corresponds to a key_list argument in SpecDetails::retrieve_data
+    # slashes are invalid in directory names, so replace with tilde
+    out_path = os.path.join(out_dir, user, spec_id, key.replace("/", "~"))
+    os.makedirs(out_path, exist_ok=True)
+
+    out_file = os.path.join(out_path, f"{math.floor(start_ts)}_{math.ceil(end_ts)}.json")
+    print(f"Creating {out_file=}...")
+    with open(out_file, "w") as f:
+        json.dump(data, f, indent=4)
+
+
+def get_all_spec_ids(datastore_url, spec_user):
+    """
+    Retrieves list of all spec_id's on E-Mission Server instance being used by script.
+    """
+    spec_data = ServerSpecDetails(datastore_url, spec_user).retrieve_data(spec_user, ["config/evaluation_spec"], 0, arrow.get().timestamp)
+    spec_ids = [s["data"]["label"]["id"] for s in spec_data]
+
+    return set(spec_ids)
+
+
+def parse_args():
+    """
+    Defines command line arguments for script.
+    """
+    parser = argparse.ArgumentParser(
+        description="Script that retrieves data from an E-Mission Server instance and dumps it into a hierarchical collection of JSON files.")
+
+    parser.add_argument("--out-dir",
+                        type=str,
+                        default="data",
+                        help="The name of the directory that data will be dumped to. Will be created if not already present. "
+                             "[default: data]")
+
+    parser.add_argument("--datastore-url",
+                        type=str,
+                        default="http://localhost:8080",
+                        help="The URL of the E-Mission Server instance from which data will be pulled. "
+                             "[default: http://localhost:8080]")
+
+    parser.add_argument("--spec-user",
+                        type=str,
+                        default="[email protected]",
+                        help="The user associated with retrieving specs. "
+                             "[default: [email protected]]")
+
+    parser.add_argument("--spec-id",
+                        type=str,
+                        help="The particular spec to retrieve data for. "
+                             "If not specified, data will be retrieved for all specs on the specified datastore instance.")
+
+    # if one of these arguments is specified, the others in this group must also be specified
+    parser.add_argument("--key",
+                        type=str,
+                        help="The time series key to be used if a single call to the E-Mission Server instance is to be made. "
+                             "--user, --start-ts, and --end-ts must also be specified.")
+
+    parser.add_argument("--user",
+                        type=str,
+                        help="The user to be used if a single call to the E-Mission Server instance is to be made. "
+                             "--key, --start-ts, and --end-ts must also be specified.")
+
+    parser.add_argument("--start-ts",
+                        type=float,
+                        help="The starting timestamp from which to pull data if a single call to the E-Mission Server instance is to be made. "
+                             "--key, --user, and --end-ts must also be specified.")
+
+    parser.add_argument("--end-ts",
+                        type=float,
+                        help="The ending timestamp from which to pull data if a single call to the E-Mission Server instance is to be made. "
+                             "--key, --user, and --start-ts must also be specified.")
+
+    return parser.parse_args()
+
+
+def run_full_pipeline(datastore_url, spec_user, spec_ids, out_dir):
+    print(f"Running full pipeline for {spec_ids[0] if len(spec_ids) == 1 else 'all specs in datastore'}...")
+
+    # collect ServerSpecDetails objects, dump specs
+    sds = []
+    for s_id in spec_ids:
+        sd = ServerSpecDetails(datastore_url, spec_user, s_id)
+        sds.append(sd)
+        dump_data_to_file(
+            sd.curr_spec_entry,
+            sd.CURR_SPEC_ID,
+            spec_user,
+            "config/evaluation_spec",
+            0,
+            sys.maxsize,
+            out_dir)
+
+    # build and dump phone view maps
+    for sd in sds:
+        pv = PhoneView(sd)
+        for phone_os, phone_map in pv.map().items():
+            for phone_label, phone_detail_map in phone_map.items():
+                for key in [k for k in phone_detail_map.keys() if "/" in k]:
+                    dump_data_to_file(
+                        phone_detail_map[key],
+                        sd.CURR_SPEC_ID,
+                        phone_label,
+                        key,
+                        sd.eval_start_ts,
+                        sd.eval_end_ts,
+                        out_dir)
+                for ranges in [phone_detail_map["calibration_ranges"], phone_detail_map["evaluation_ranges"]]:
+                    for r in ranges:
+                        for key in [k for k in r.keys() if "/" in k]:
+                            dump_data_to_file(
+                                r[key],
+                                sd.CURR_SPEC_ID,
+                                phone_label,
+                                key,
+                                r["start_ts"],
+                                r["end_ts"],
+                                out_dir)
+
+
+if __name__ == "__main__":
+    args = parse_args()
+
+    # verify spec_id is valid if specified
+    spec_ids = get_all_spec_ids(args.datastore_url, args.spec_user)
+    if args.spec_id:
+        assert args.spec_id in spec_ids, f"spec_id `{args.spec_id}` not found within current datastore instance"
+        spec_ids = [args.spec_id]
+
+    # enforce that --key, --user, --start-ts, and --end-ts are all specifed if one of these arguments is specified
+    cond_req_args = ["--key", "--user", "--start-ts", "--end-ts"]
+    for arg in cond_req_args:
+        if arg in sys.argv:
+            assert set(a for a in cond_req_args if a != arg) <= set(sys.argv), "all of --key --user, --start-ts, and --end-ts must be specified"
+
+    # if --key, etc are specified, just call retrieve_data from an anonymous ServerSpecDetails instance
+    if "--key" in sys.argv:
+        for s_id in spec_ids:
+            data = ServerSpecDetails(args.datastore_url, args.user).retrieve_data(args.user, [args.key], args.start_ts, args.end_ts)
+            dump_data_to_file(data, s_id, args.user, args.key, args.start_ts, args.end_ts, args.out_dir)
+    else:
+        run_full_pipeline(args.datastore_url, args.spec_user, spec_ids, args.out_dir)
diff --git a/emeval/input/phone_view.py b/emeval/input/phone_view.py
@@ -92,12 +92,12 @@ def fill_transitions(self):
             print("Reading data for %s phones" % phoneOS)
             for phone_label in phone_map:
                 print("Loading transitions for phone %s" % phone_label)
-                curr_phone_transitions = self.spec_details.retrieve_data_from_server(
+                curr_phone_transitions = self.spec_details.retrieve_data(
                     phone_label, ["manual/evaluation_transition"],
                     self.spec_details.eval_start_ts, self.spec_details.eval_end_ts)
                 curr_phone_role = phone_map[phone_label]
                 phone_map[phone_label] = {"role": curr_phone_role}
-                phone_map[phone_label]["transitions"] = curr_phone_transitions
+                phone_map[phone_label]["manual/evaluation_transition"] = curr_phone_transitions
 
     """
     Inputs:
@@ -114,7 +114,7 @@ def filter_transitions(self, start_tt, end_tt, start_ti, end_ti,
             print("Processing data for %s phones" % phoneOS)
             for phone_label in phone_map:
                 print("Processing transitions for phone %s" % phone_label)
-                curr_phone_transitions = [t["data"] for t in phone_map[phone_label]["transitions"]]
+                curr_phone_transitions = [t["data"] for t in phone_map[phone_label]["manual/evaluation_transition"]]
                 # print(curr_phone_transitions)
                 curr_calibration_transitions = [t for t in curr_phone_transitions
                     if (t["transition"] in [start_tt, end_tt, start_ti, end_ti]) and
@@ -279,57 +279,37 @@ def fill_battery_df(self, storage_key):
             for phone_label in phone_map:
                 curr_calibration_ranges = phone_map[phone_label]["{}_ranges".format(storage_key)]
                 for r in curr_calibration_ranges:
-                    battery_entries = self.spec_details.retrieve_data_from_server(phone_label, ["background/battery"], r["start_ts"], r["end_ts"])
+                    battery_entries = self.spec_details.retrieve_data(phone_label, ["background/battery"], r["start_ts"], r["end_ts"])
                     # ios entries before running the pipeline are marked with battery_level_ratio, which is a float from 0 ->1
                     # convert it to % to be consistent with android and easier to understand
                     if phoneOS == "ios":
                         for e in battery_entries:
                             if "battery_level_pct" not in e["data"]:
                                 e["data"]["battery_level_pct"] = e["data"]["battery_level_ratio"] * 100
                                 del e["data"]["battery_level_ratio"]
-                    r["battery_entries"] = battery_entries
+                    r["background/battery"] = battery_entries
                     battery_df = pd.DataFrame([e["data"] for e in battery_entries])
                     if len(battery_df) > 0:
                         battery_df["hr"] = (battery_df.ts-r["start_ts"])/3600.0
                     r["battery_df"] = battery_df
 
-    def _read_until_done(self, phone_label, key, start_ts, end_ts):
-        all_done = False
-        location_entries = []
-        curr_start_ts = start_ts
-        prev_retrieved_count = 0
-
-        while not all_done:
-            print("About to retrieve data for %s from %s -> %s" % (phone_label, curr_start_ts, end_ts))
-            curr_location_entries = self.spec_details.retrieve_data_from_server(phone_label, [key], curr_start_ts, end_ts)
-            print("Retrieved %d entries with timestamps %s..." % (len(curr_location_entries), [cle["data"]["ts"] for cle in curr_location_entries[0:10]]))
-            if len(curr_location_entries) == 0 or len(curr_location_entries) == 1:
-                all_done = True
-            else:
-                location_entries.extend(curr_location_entries)
-                new_start_ts = curr_location_entries[-1]["metadata"]["write_ts"]
-                assert new_start_ts > curr_start_ts
-                curr_start_ts = new_start_ts
-                prev_retrieved_count = len(curr_location_entries)
-        return location_entries
-
     def fill_location_df(self, storage_key):
         for phoneOS, phone_map in self.phone_view_map.items():
             print("Processing data for %s phones" % phoneOS)
             for phone_label in phone_map:
                 curr_calibration_ranges = phone_map[phone_label]["{}_ranges".format(storage_key)]
                 for r in curr_calibration_ranges:
-                    r["location_entries"] = self._read_until_done(phone_label,
+                    r["background/location"] = self.spec_details.read_until_done(phone_label,
                         "background/location",
                         r["start_ts"], r["end_ts"])
-                    r["filtered_location_entries"] = self._read_until_done(
+                    r["background/filtered_location"] = self.spec_details.read_until_done(
                         phone_label,
                         "background/filtered_location",
                         r["start_ts"], r["end_ts"])
                     location_df = pd.DataFrame([e["data"] for e in
-                        r["location_entries"]])
+                        r["background/location"]])
                     filtered_location_df = pd.DataFrame([e["data"] for e in
-                        r["filtered_location_entries"]])
+                        r["background/filtered_location"]])
                     if len(location_df) > 0:
                         location_df["hr"] = (location_df.ts-r["start_ts"])/3600.0
                     if len(filtered_location_df) > 0:
@@ -350,15 +330,15 @@ def fill_motion_activity_df(self, storage_key):
 
                     while not all_done:
                         print("About to retrieve data for %s from %s -> %s" % (phone_label, curr_start_ts, r["end_ts"]))
-                        curr_motion_activity_entries = self.spec_details.retrieve_data_from_server(phone_label, ["background/motion_activity"], curr_start_ts, r["end_ts"])
+                        curr_motion_activity_entries = self.spec_details.retrieve_data(phone_label, ["background/motion_activity"], curr_start_ts, r["end_ts"])
                         print("Retrieved %d entries with timestamps %s..." % (len(curr_motion_activity_entries), [cle["metadata"]["write_ts"] for cle in curr_motion_activity_entries[0:10]]))
                         if len(curr_motion_activity_entries) == 0 or len(curr_motion_activity_entries) == 1 or len(curr_motion_activity_entries) == prev_retrieved_count:
                             all_done = True
                         else:
                             motion_activity_entries.extend(curr_motion_activity_entries)
                             curr_start_ts = curr_motion_activity_entries[-1]["metadata"]["write_ts"]
                             prev_retrieved_count = len(curr_motion_activity_entries)
-                    r["motion_activity_entries"] = motion_activity_entries
+                    r["background/motion_activity"] = motion_activity_entries
                     motion_activity_df = pd.DataFrame([e["data"] for e in motion_activity_entries])
                     if "ts" not in motion_activity_df.columns:
                         print("motion activity has not been processed, copying write_ts -> ts")
@@ -373,11 +353,11 @@ def fill_transition_df(self, storage_key):
             for phone_label in phone_map:
                 curr_calibration_ranges = phone_map[phone_label]["{}_ranges".format(storage_key)]
                 for r in curr_calibration_ranges:
-                    transition_entries = self.spec_details.retrieve_data_from_server(
+                    transition_entries = self.spec_details.retrieve_data(
                         phone_label, ["statemachine/transition"], r["start_ts"], r["end_ts"])
                     # ios entries before running the pipeline are marked with battery_level_ratio, which is a float from 0 ->1
                     # convert it to % to be consistent with android and easier to understand
-                    r["transition_entries"] = transition_entries
+                    r["statemachine/transition"] = transition_entries
                     transition_df = pd.DataFrame([e["data"] for e in transition_entries])
                     if "ts" in transition_df.columns:
                         if "fmt_time" not in transition_df.columns:
@@ -418,7 +398,7 @@ def fill_eval_role_maps(self):
 
     def fill_accuracy_control_trip_ranges(self):
         for phoneOS, phone_map in self.accuracy_control_maps.items():
-            curr_control_transitions = [t["data"] for t in phone_map["transitions"]] # from control phone
+            curr_control_transitions = [t["data"] for t in phone_map["manual/evaluation_transition"]] # from control phone
             curr_evaluation_ranges = phone_map["evaluation_ranges"] # from this phone
             trip_type_check = lambda t: t["transition"] in ["START_EVALUATION_TRIP", "STOP_EVALUATION_TRIP", 4, 5]
             trip_time_check = lambda t, r: t["ts"] >= r["start_ts"] and t["ts"] <= r["end_ts"]
@@ -441,7 +421,7 @@ def fill_accuracy_control_trip_ranges(self):
 
     def fill_accuracy_control_section_ranges(self):
         for phoneOS, phone_map in self.accuracy_control_maps.items():
-            curr_control_transitions = [t["data"] for t in phone_map["transitions"]] # from control phone
+            curr_control_transitions = [t["data"] for t in phone_map["manual/evaluation_transition"]] # from control phone
             curr_evaluation_ranges = phone_map["evaluation_ranges"] # from this phone
             trip_type_check = lambda t: t["transition"] in ["START_EVALUATION_SECTION", "STOP_EVALUATION_SECTION", 6, 7]
             trip_time_check = lambda t, r: t["ts"] >= r["start_ts"] and t["ts"] <= r["end_ts"]