diff --git a/.gitignore b/.gitignore index 87f73237..18ca98fa 100644 --- a/.gitignore +++ b/.gitignore @@ -106,3 +106,7 @@ ENV/ # mypy .mypy_cache/ +# DS store +.DS_Store +**/.DS_Store + diff --git a/Data_exploration_template.ipynb b/Data_exploration_template.ipynb index 088cfc4f..3f2f00aa 100644 --- a/Data_exploration_template.ipynb +++ b/Data_exploration_template.ipynb @@ -74,6 +74,15 @@ "import importlib" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import arrow" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -89,9 +98,17 @@ "metadata": {}, "outputs": [], "source": [ - "DATASTORE_URL = \"http://cardshark.cs.berkeley.edu\"\n", "AUTHOR_EMAIL = \"shankari@eecs.berkeley.edu\"\n", - "sd = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")" + "\n", + "# If using ServerSpecDetails, data can alternatively be retrieved as such:\n", + "# DATASTORE_LOC = \"http://localhost:8080\"\n", + "# sd = eisd.ServerSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")\n", + "\n", + "# You must run `cd bin/ && python dump_data_to_file.py --spec-id train_bus_ebike_mtv_ucb`\n", + "# before using this notebook!\n", + "\n", + "DATASTORE_LOC = \"bin/data/\"\n", + "sd = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")" ] }, { @@ -139,7 +156,7 @@ " for r in phone_detail_map[\"evaluation_ranges\"]:\n", " print(8 * ' ', 30 * \"=\")\n", " print(8 * ' ',r.keys())\n", - " print(8 * ' ',r[\"trip_id\"], r[\"eval_common_trip_id\"], r[\"eval_role\"], len(r[\"evaluation_trip_ranges\"]))\n", + " print(8 * ' ',r[\"trip_id\"], r[\"eval_common_trip_id\"], r[\"eval_role\"], len(r[\"evaluation_trip_ranges\"]), arrow.get(r[\"start_ts\"]))\n", " for tr in r[\"evaluation_trip_ranges\"]:\n", " print(12 * ' ', 30 * \"-\")\n", " print(12 * ' ',tr[\"trip_id\"], tr.keys())\n", @@ -160,7 +177,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "ev = eiev.EvaluationView()\n", @@ -240,7 +259,8 @@ " print(12 * ' ', 30 * \"-\")\n", " print(12 * ' ',tr[\"trip_id\"], tr.keys())\n", " # I am not printing the actual trajectories since that would be too long, only displaying modes\n", - " gt_trip = sd.get_ground_truth_for_trip(tr[\"trip_id_base\"])\n", + " print(tr)\n", + " gt_trip = sd.get_ground_truth_for_trip(tr[\"trip_id_base\"], tr[\"start_ts\"], tr[\"end_ts\"])\n", " print(12 * ' ', eisd.SpecDetails.get_concat_trajectories(gt_trip)[\"properties\"])" ] }, diff --git a/Evaluate_power_vs_classification.ipynb b/Evaluate_power_vs_classification.ipynb index dce443fe..f1b4d728 100644 --- a/Evaluate_power_vs_classification.ipynb +++ b/Evaluate_power_vs_classification.ipynb @@ -113,11 +113,11 @@ "metadata": {}, "outputs": [], "source": [ - "DATASTORE_URL = \"http://cardshark.cs.berkeley.edu\"\n", + "DATASTORE_LOC = \"bin/data/\"\n", "AUTHOR_EMAIL = \"shankari@eecs.berkeley.edu\"\n", - "sd_la = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"unimodal_trip_car_bike_mtv_la\")\n", - "sd_sj = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"car_scooter_brex_san_jose\")\n", - "sd_ucb = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")" + "sd_la = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"unimodal_trip_car_bike_mtv_la\")\n", + "sd_sj = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"car_scooter_brex_san_jose\")\n", + "sd_ucb = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")" ] }, { @@ -327,8 +327,8 @@ "metadata": {}, "outputs": [], "source": [ - "r2q_map = {\"power_control\": 0, \"HAMFDC\": 1, \"MAHFDC\": 2, \"HAHFDC\": 3, \"accuracy_control\": 4}\n", - "q2r_map = {0: \"power\", 1: \"HAMFDC\", 2: \"MAHFDC\", 3: \"HAHFDC\", 4: \"accuracy\"}" + "r2q_map = {\"power_control\": 0, \"HAMFDC\": 1, \"MAHFDC\": 2, \"HAHFDC\": 3, \"MAMFDC\": 4, \"accuracy_control\": 5}\n", + "q2r_map = {0: \"power\", 1: \"HAMFDC\", 2: \"MAHFDC\", 3: \"HAHFDC\", 4: \"MAMFDC\", 5: \"accuracy\"}" ] }, { @@ -760,13 +760,6 @@ "source": [ "check_outlier(pv_la.map()['android']['ucb-sdb-android-2'][\"evaluation_ranges\"][0], 0, \"suburb_city_driving_weekend_0\", \"AUTOMOTIVE\")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -785,7 +778,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.2" } }, "nbformat": 4, diff --git a/Evaluate_power_vs_motion_activity.ipynb b/Evaluate_power_vs_motion_activity.ipynb index c65019e1..b3c330ac 100644 --- a/Evaluate_power_vs_motion_activity.ipynb +++ b/Evaluate_power_vs_motion_activity.ipynb @@ -113,11 +113,11 @@ "metadata": {}, "outputs": [], "source": [ - "DATASTORE_URL = \"http://cardshark.cs.berkeley.edu\"\n", + "DATASTORE_LOC = \"bin/data/\"\n", "AUTHOR_EMAIL = \"shankari@eecs.berkeley.edu\"\n", - "sd_la = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"unimodal_trip_car_bike_mtv_la\")\n", - "sd_sj = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"car_scooter_brex_san_jose\")\n", - "sd_ucb = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")" + "sd_la = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"unimodal_trip_car_bike_mtv_la\")\n", + "sd_sj = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"car_scooter_brex_san_jose\")\n", + "sd_ucb = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")" ] }, { @@ -960,7 +960,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.2" } }, "nbformat": 4, diff --git a/Evaluate_power_vs_trip_start_end.ipynb b/Evaluate_power_vs_trip_start_end.ipynb index 4afdde18..e9249be4 100644 --- a/Evaluate_power_vs_trip_start_end.ipynb +++ b/Evaluate_power_vs_trip_start_end.ipynb @@ -120,11 +120,11 @@ "metadata": {}, "outputs": [], "source": [ - "DATASTORE_URL = \"http://cardshark.cs.berkeley.edu\"\n", + "DATASTORE_LOC = \"bin/data/\"\n", "AUTHOR_EMAIL = \"shankari@eecs.berkeley.edu\"\n", - "sd_la = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"unimodal_trip_car_bike_mtv_la\")\n", - "sd_sj = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"car_scooter_brex_san_jose\")\n", - "sd_ucb = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")" + "sd_la = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"unimodal_trip_car_bike_mtv_la\")\n", + "sd_sj = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"car_scooter_brex_san_jose\")\n", + "sd_ucb = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")" ] }, { @@ -323,8 +323,8 @@ "metadata": {}, "outputs": [], "source": [ - "r2q_map = {\"power_control\": 0, \"HAMFDC\": 1, \"MAHFDC\": 2, \"HAHFDC\": 3, \"accuracy_control\": 4}\n", - "q2r_map = {0: \"power\", 1: \"HAMFDC\", 2: \"MAHFDC\", 3: \"HAHFDC\", 4: \"accuracy\"}" + "r2q_map = {\"power_control\": 0, \"HAMFDC\": 1, \"MAHFDC\": 2, \"HAHFDC\": 3, \"MAMFDC\": 4, \"accuracy_control\": 5}\n", + "q2r_map = {0: \"power\", 1: \"HAMFDC\", 2: \"MAHFDC\", 3: \"HAHFDC\", 4: \"MAMFDC\", 5: \"accuracy\"}" ] }, { @@ -398,7 +398,7 @@ "metadata": {}, "outputs": [], "source": [ - "ifig, ax_array = plt.subplots(nrows=2,ncols=3,figsize=(9,6), sharex=False, sharey=True)\n", + "ifig, ax_array = plt.subplots(nrows=2,ncols=3,figsize=(16,6), sharex=False, sharey=True)\n", "timeline_list = [\"train_bus_ebike_mtv_ucb\", \"car_scooter_brex_san_jose\", \"unimodal_trip_car_bike_mtv_la\"]\n", "for i, tl in enumerate(timeline_list):\n", " tradeoff_df.query(\"timeline == @tl & phone_os == 'android'\").boxplot(ax = ax_array[0][i], column=[\"count_diff\"], by=[\"quality\"])\n", @@ -587,7 +587,7 @@ "metadata": {}, "outputs": [], "source": [ - "ifig, ax_array = plt.subplots(nrows=4,ncols=3,figsize=(10,10), sharex=False, sharey=True)\n", + "ifig, ax_array = plt.subplots(nrows=4,ncols=3,figsize=(16,10), sharex=False, sharey=True)\n", "timeline_list = [\"train_bus_ebike_mtv_ucb\", \"car_scooter_brex_san_jose\", \"unimodal_trip_car_bike_mtv_la\"]\n", "for i, tl in enumerate(timeline_list):\n", " tradeoff_df.query(\"timeline == @tl & phone_os == 'android'\").boxplot(ax = ax_array[0][i], column=[\"start_diff_mins\"], by=[\"quality\"])\n", @@ -1015,7 +1015,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.2" } }, "nbformat": 4, diff --git a/Evaluations_power_boxplots.ipynb b/Evaluations_power_boxplots.ipynb index 3c265fd8..d09f6fae 100644 --- a/Evaluations_power_boxplots.ipynb +++ b/Evaluations_power_boxplots.ipynb @@ -88,11 +88,11 @@ "metadata": {}, "outputs": [], "source": [ - "DATASTORE_URL = \"http://cardshark.cs.berkeley.edu\"\n", + "DATASTORE_LOC = \"bin/data/\"\n", "AUTHOR_EMAIL = \"shankari@eecs.berkeley.edu\"\n", - "sd_la = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"unimodal_trip_car_bike_mtv_la\")\n", - "sd_sj = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"car_scooter_brex_san_jose\")\n", - "sd_ucb = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")" + "sd_la = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"unimodal_trip_car_bike_mtv_la\")\n", + "sd_sj = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"car_scooter_brex_san_jose\")\n", + "sd_ucb = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")" ] }, { @@ -135,7 +135,7 @@ "metadata": {}, "outputs": [], "source": [ - "ios_loc_entries = sd_sj.retrieve_data_from_server(\"ucb-sdb-ios-1\", [\"background/location\"],\n", + "ios_loc_entries = sd_sj.retrieve_data(\"ucb-sdb-ios-1\", [\"background/location\"],\n", " arrow.get(\"2019-08-07T14:50:57.445000-07:00\").timestamp,\n", " arrow.get(\"2019-08-07T15:00:16.787000-07:00\").timestamp)\n", "ios_location_df = pd.DataFrame([e[\"data\"] for e in ios_loc_entries])" @@ -147,7 +147,7 @@ "metadata": {}, "outputs": [], "source": [ - "android_loc_entries = sd_sj.retrieve_data_from_server(\"ucb-sdb-android-1\", [\"background/location\"],\n", + "android_loc_entries = sd_sj.retrieve_data(\"ucb-sdb-android-1\", [\"background/location\"],\n", " arrow.get(\"2019-08-07T14:50:57.445000-07:00\").timestamp,\n", " arrow.get(\"2019-08-07T15:00:16.787000-07:00\").timestamp)\n", "android_location_df = pd.DataFrame([e[\"data\"] for e in android_loc_entries])" diff --git a/bin/dump_data_to_file.py b/bin/dump_data_to_file.py new file mode 100644 index 00000000..0fc3cfb9 --- /dev/null +++ b/bin/dump_data_to_file.py @@ -0,0 +1,206 @@ +import json +import os +import math +import requests +import time +import arrow +import argparse +import sys; sys.path.append("..") +from emeval.input.spec_details import ServerSpecDetails +from emeval.input.phone_view import PhoneView + + +def dump_data_to_file(data, spec_id, user, key, start_ts, end_ts, out_dir): + """ + Accepts serializable data (e.g. dict, array of dicts) and dumps it into an output file. + Dumped file are created recursively in the folder name specified by `out_dir` as such: + + out_dir + └── user + └── spec_id + └── key + └── {start_ts}_{end_ts}.json + """ + # key could have a slash if it corresponds to a key_list argument in SpecDetails::retrieve_data + # slashes are invalid in directory names, so replace with tilde + out_path = os.path.join(out_dir, user, spec_id, key.replace("/", "~")) + os.makedirs(out_path, exist_ok=True) + + out_file = os.path.join(out_path, f"{math.floor(start_ts)}_{math.ceil(end_ts)}.json") + print(f"Creating {out_file=}...") + with open(out_file, "w") as f: + json.dump(data, f, indent=4) + + +def make_call_to_server(datastore_url, author_email, user, key, start_ts, end_ts): + """ + Makes a direct call to the E-Mission Server instance based on the specified user/key/start_ts/end_ts. + """ + return ServerSpecDetails(datastore_url, author_email).retrieve_one_batch(user, [key], start_ts, end_ts) + + +def get_all_spec_ids(datastore_url, author_email): + """ + Retrieves list of all spec_id's on E-Mission Server instance being used by script. + """ + spec_data = make_call_to_server( + datastore_url, + author_email, + author_email, + "config/evaluation_spec", + 0, + sys.maxsize) + + spec_ids = [s["data"]["label"]["id"] for s in spec_data] + + return set(spec_ids) + + +def run_full_pipeline(datastore_url, author_email, spec_ids, out_dir): + """ + Runs the full data retrieval pipeline in the event that a user/key/start_ts/end_ts combination isn't provided. + """ + print(f"Running full pipeline for {spec_ids[0] if len(spec_ids) == 1 else 'all specs in datastore'}...") + + # collect ServerSpecDetails objects, dump specs + sds = [] + for s_id in spec_ids: + sd = ServerSpecDetails(datastore_url, author_email, s_id) + sds.append(sd) + dump_data_to_file( + sd.curr_spec_entry, + sd.CURR_SPEC_ID, + author_email, + "config/evaluation_spec", + 0, + sys.maxsize, + out_dir) + + # build and dump phone view maps + for sd in sds: + pv = PhoneView(sd) + for phone_os, phone_map in pv.map().items(): + for phone_label, phone_detail_map in phone_map.items(): + for key in [k for k in phone_detail_map.keys() if "/" in k]: + dump_data_to_file( + phone_detail_map[key], + sd.CURR_SPEC_ID, + phone_label, + key, + sd.eval_start_ts, + sd.eval_end_ts, + out_dir) + for ranges in [phone_detail_map["evaluation_ranges"], phone_detail_map["calibration_ranges"]]: + for r in ranges: + for key in [k for k in r.keys() if "/" in k]: + dump_data_to_file( + r[key], + sd.CURR_SPEC_ID, + phone_label, + key, + r["start_ts"], + r["end_ts"], + out_dir) + + +def parse_args(): + """ + Defines command line arguments for script. + """ + parser = argparse.ArgumentParser( + description="Script that retrieves data from an E-Mission Server instance " + "and dumps it into a hierarchical collection of JSON files.") + + parser.add_argument("--out-dir", + type=str, + default="data", + help="The name of the directory that data will be dumped to. " + "Will be created if not already present. " + "[default: data]") + + parser.add_argument("--datastore-url", + type=str, + default="http://localhost:8080", + help="The URL of the E-Mission Server instance from which data will be pulled. " + "[default: http://localhost:8080]") + + parser.add_argument("--author-email", + type=str, + default="shankari@eecs.berkeley.edu", + help="The user associated with retrieving specs. " + "This is usually the email of a spec author. " + "[default: shankari@eecs.berkeley.edu]") + + parser.add_argument("--spec-id", + type=str, + help="The particular spec to retrieve data for. " + "If not specified, data will be retrieved for all specs " + "on the specified datastore instance.") + + # if one of these arguments is specified, the others in this group must also be specified + parser.add_argument("--key", + type=str, + help="The time series key to be used if a single call " + "to the E-Mission Server instance is to be made. " + "--user, --start-ts, and --end-ts must also be specified.") + + parser.add_argument("--user", + type=str, + help="The user to be used if a single call to the E-Mission Server instance is to be made. " + "--key, --start-ts, and --end-ts must also be specified.") + + parser.add_argument("--start-ts", + type=float, + help="The starting timestamp from which to pull data if a single call " + "to the E-Mission Server instance is to be made. " + "--key, --user, and --end-ts must also be specified.") + + parser.add_argument("--end-ts", + type=float, + help="The ending timestamp from which to pull data if a single call " + "to the E-Mission Server instance is to be made. " + "--key, --user, and --start-ts must also be specified.") + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + # enforce that --key, --user, --start-ts, and --end-ts are all specifed if one of these arguments is specified + cond_req_args = ["--key", "--user", "--start-ts", "--end-ts"] + for arg in cond_req_args: + if arg in sys.argv: + assert set(a for a in cond_req_args if a != arg) <= set(sys.argv),\ + "all of --key --user, --start-ts, and --end-ts must be specified" + + # verify spec_id is valid if specified + spec_ids = get_all_spec_ids(args.datastore_url, args.author_email) + if args.spec_id: + assert args.spec_id in spec_ids,\ + f"spec_id `{args.spec_id}` not found within current datastore instance" + + spec_ids = [args.spec_id] + + # if --key, etc are specified, just call retrieve_data from an anonymous ServerSpecDetails instance + if args.key: + for s_id in spec_ids: + data = make_call_to_server( + args.datastore_url, + args.author_email, + args.user, + args.key, + args.start_ts, + args.end_ts) + + dump_data_to_file( + data, + s_id, + args.user, + args.key, + args.start_ts, + args.end_ts, + args.out_dir) + + else: + run_full_pipeline(args.datastore_url, args.author_email, spec_ids, args.out_dir) diff --git a/emeval/analysed/phone_view.py b/emeval/analysed/phone_view.py index 57e248aa..37e79524 100644 --- a/emeval/analysed/phone_view.py +++ b/emeval/analysed/phone_view.py @@ -25,24 +25,24 @@ def create_analysed_view(input_view, analysis_datastore, location_key, trip_key, (input_view.spec_details.CURR_SPEC_ID)) asd = av.spec_details # Overwrite the result so that we can read from the analysis datastore - asd.DATASTORE_URL = analysis_datastore + asd.DATASTORE_LOC = analysis_datastore for phone_os, phone_map in av.map().items(): print(15 * "=*") print(phone_os, phone_map.keys()) for phone_label, phone_detail_map in phone_map.items(): print(4 * ' ', 15 * "-*") print(4 * ' ', phone_label, phone_detail_map["role"], phone_detail_map.keys()) - phone_detail_map["location_entries"] = av.spec_details.retrieve_data_from_server( + phone_detail_map["location_entries"] = av.spec_details.retrieve_data( phone_label, [location_key], av.spec_details.eval_start_ts, arrow.now().timestamp) location_df = pd.DataFrame([e["data"] for e in phone_detail_map["location_entries"]]) if len(location_df) > 0: location_df["hr"] = (location_df.ts-r["start_ts"])/3600.0 phone_detail_map["location_df"] = location_df - phone_detail_map["sensed_trip_ranges"] = av.spec_details.retrieve_data_from_server( + phone_detail_map["sensed_trip_ranges"] = av.spec_details.retrieve_data( phone_label, [trip_key], av.spec_details.eval_start_ts, arrow.now().timestamp) - phone_detail_map["sensed_section_ranges"] = av.spec_details.retrieve_data_from_server( + phone_detail_map["sensed_section_ranges"] = av.spec_details.retrieve_data( phone_label, [section_key], av.spec_details.eval_start_ts, arrow.now().timestamp) diff --git a/emeval/input/phone_view.py b/emeval/input/phone_view.py index 375e7107..274d276b 100644 --- a/emeval/input/phone_view.py +++ b/emeval/input/phone_view.py @@ -33,8 +33,17 @@ import pandas as pd import emeval.validate.phone_view as evpv + TIME_SYNC_FUZZ = 0 # seconds = 1 minute +# keys +EVAL_TRANSITION = "manual/evaluation_transition" +BG_BATTERY = "background/battery" +BG_LOCATION = "background/location" +BG_FILTERED_LOCATION = "background/filtered_location" +BG_MOTION_ACTIVITY = "background/motion_activity" +SM_TRANSITION = "statemachine/transition" + class PhoneView: def __init__(self, spec_details): self.phone_view_map = {} @@ -92,12 +101,12 @@ def fill_transitions(self): print("Reading data for %s phones" % phoneOS) for phone_label in phone_map: print("Loading transitions for phone %s" % phone_label) - curr_phone_transitions = self.spec_details.retrieve_data_from_server( - phone_label, ["manual/evaluation_transition"], + curr_phone_transitions = self.spec_details.retrieve_data( + phone_label, [EVAL_TRANSITION], self.spec_details.eval_start_ts, self.spec_details.eval_end_ts) curr_phone_role = phone_map[phone_label] phone_map[phone_label] = {"role": curr_phone_role} - phone_map[phone_label]["transitions"] = curr_phone_transitions + phone_map[phone_label][EVAL_TRANSITION] = curr_phone_transitions """ Inputs: @@ -114,7 +123,7 @@ def filter_transitions(self, start_tt, end_tt, start_ti, end_ti, print("Processing data for %s phones" % phoneOS) for phone_label in phone_map: print("Processing transitions for phone %s" % phone_label) - curr_phone_transitions = [t["data"] for t in phone_map[phone_label]["transitions"]] + curr_phone_transitions = [t["data"] for t in phone_map[phone_label][EVAL_TRANSITION]] # print(curr_phone_transitions) curr_calibration_transitions = [t for t in curr_phone_transitions if (t["transition"] in [start_tt, end_tt, start_ti, end_ti]) and @@ -279,7 +288,7 @@ def fill_battery_df(self, storage_key): for phone_label in phone_map: curr_calibration_ranges = phone_map[phone_label]["{}_ranges".format(storage_key)] for r in curr_calibration_ranges: - battery_entries = self.spec_details.retrieve_data_from_server(phone_label, ["background/battery"], r["start_ts"], r["end_ts"]) + battery_entries = self.spec_details.retrieve_data(phone_label, [BG_BATTERY], r["start_ts"], r["end_ts"]) # ios entries before running the pipeline are marked with battery_level_ratio, which is a float from 0 ->1 # convert it to % to be consistent with android and easier to understand if phoneOS == "ios": @@ -287,49 +296,29 @@ def fill_battery_df(self, storage_key): if "battery_level_pct" not in e["data"]: e["data"]["battery_level_pct"] = e["data"]["battery_level_ratio"] * 100 del e["data"]["battery_level_ratio"] - r["battery_entries"] = battery_entries + r[BG_BATTERY] = battery_entries battery_df = pd.DataFrame([e["data"] for e in battery_entries]) if len(battery_df) > 0: battery_df["hr"] = (battery_df.ts-r["start_ts"])/3600.0 r["battery_df"] = battery_df - def _read_until_done(self, phone_label, key, start_ts, end_ts): - all_done = False - location_entries = [] - curr_start_ts = start_ts - prev_retrieved_count = 0 - - while not all_done: - print("About to retrieve data for %s from %s -> %s" % (phone_label, curr_start_ts, end_ts)) - curr_location_entries = self.spec_details.retrieve_data_from_server(phone_label, [key], curr_start_ts, end_ts) - print("Retrieved %d entries with timestamps %s..." % (len(curr_location_entries), [cle["data"]["ts"] for cle in curr_location_entries[0:10]])) - if len(curr_location_entries) == 0 or len(curr_location_entries) == 1: - all_done = True - else: - location_entries.extend(curr_location_entries) - new_start_ts = curr_location_entries[-1]["metadata"]["write_ts"] - assert new_start_ts > curr_start_ts - curr_start_ts = new_start_ts - prev_retrieved_count = len(curr_location_entries) - return location_entries - def fill_location_df(self, storage_key): for phoneOS, phone_map in self.phone_view_map.items(): print("Processing data for %s phones" % phoneOS) for phone_label in phone_map: curr_calibration_ranges = phone_map[phone_label]["{}_ranges".format(storage_key)] for r in curr_calibration_ranges: - r["location_entries"] = self._read_until_done(phone_label, - "background/location", + r[BG_LOCATION] = self.spec_details.retrieve_data(phone_label, + [BG_LOCATION], r["start_ts"], r["end_ts"]) - r["filtered_location_entries"] = self._read_until_done( + r[BG_FILTERED_LOCATION] = self.spec_details.retrieve_data( phone_label, - "background/filtered_location", + [BG_FILTERED_LOCATION], r["start_ts"], r["end_ts"]) location_df = pd.DataFrame([e["data"] for e in - r["location_entries"]]) + r[BG_LOCATION]]) filtered_location_df = pd.DataFrame([e["data"] for e in - r["filtered_location_entries"]]) + r[BG_FILTERED_LOCATION]]) if len(location_df) > 0: location_df["hr"] = (location_df.ts-r["start_ts"])/3600.0 if len(filtered_location_df) > 0: @@ -343,27 +332,12 @@ def fill_motion_activity_df(self, storage_key): for phone_label in phone_map: curr_calibration_ranges = phone_map[phone_label]["{}_ranges".format(storage_key)] for r in curr_calibration_ranges: - all_done = False - motion_activity_entries = [] - curr_start_ts = r["start_ts"] - prev_retrieved_count = 0 - - while not all_done: - print("About to retrieve data for %s from %s -> %s" % (phone_label, curr_start_ts, r["end_ts"])) - curr_motion_activity_entries = self.spec_details.retrieve_data_from_server(phone_label, ["background/motion_activity"], curr_start_ts, r["end_ts"]) - print("Retrieved %d entries with timestamps %s..." % (len(curr_motion_activity_entries), [cle["metadata"]["write_ts"] for cle in curr_motion_activity_entries[0:10]])) - if len(curr_motion_activity_entries) == 0 or len(curr_motion_activity_entries) == 1 or len(curr_motion_activity_entries) == prev_retrieved_count: - all_done = True - else: - motion_activity_entries.extend(curr_motion_activity_entries) - curr_start_ts = curr_motion_activity_entries[-1]["metadata"]["write_ts"] - prev_retrieved_count = len(curr_motion_activity_entries) - r["motion_activity_entries"] = motion_activity_entries - motion_activity_df = pd.DataFrame([e["data"] for e in motion_activity_entries]) + r[BG_MOTION_ACTIVITY] = self.spec_details.retrieve_data(phone_label, [BG_MOTION_ACTIVITY], r["start_ts"], r["end_ts"]) + motion_activity_df = pd.DataFrame([e["data"] for e in r[BG_MOTION_ACTIVITY]]) if "ts" not in motion_activity_df.columns: print("motion activity has not been processed, copying write_ts -> ts") - motion_activity_df["ts"] = [e["metadata"]["write_ts"] for e in motion_activity_entries] - motion_activity_df["fmt_time"] = [arrow.get(e["metadata"]["write_ts"]).to(self.spec_details.eval_tz) for e in motion_activity_entries] + motion_activity_df["ts"] = [e["metadata"]["write_ts"] for e in r[BG_MOTION_ACTIVITY]] + motion_activity_df["fmt_time"] = [arrow.get(e["metadata"]["write_ts"]).to(self.spec_details.eval_tz) for e in r[BG_MOTION_ACTIVITY]] motion_activity_df["hr"] = (motion_activity_df.ts-r["start_ts"])/3600.0 r["motion_activity_df"] = motion_activity_df @@ -373,11 +347,11 @@ def fill_transition_df(self, storage_key): for phone_label in phone_map: curr_calibration_ranges = phone_map[phone_label]["{}_ranges".format(storage_key)] for r in curr_calibration_ranges: - transition_entries = self.spec_details.retrieve_data_from_server( - phone_label, ["statemachine/transition"], r["start_ts"], r["end_ts"]) + transition_entries = self.spec_details.retrieve_data( + phone_label, [SM_TRANSITION], r["start_ts"], r["end_ts"]) # ios entries before running the pipeline are marked with battery_level_ratio, which is a float from 0 ->1 # convert it to % to be consistent with android and easier to understand - r["transition_entries"] = transition_entries + r[SM_TRANSITION] = transition_entries transition_df = pd.DataFrame([e["data"] for e in transition_entries]) if "ts" in transition_df.columns: if "fmt_time" not in transition_df.columns: @@ -418,7 +392,7 @@ def fill_eval_role_maps(self): def fill_accuracy_control_trip_ranges(self): for phoneOS, phone_map in self.accuracy_control_maps.items(): - curr_control_transitions = [t["data"] for t in phone_map["transitions"]] # from control phone + curr_control_transitions = [t["data"] for t in phone_map[EVAL_TRANSITION]] # from control phone curr_evaluation_ranges = phone_map["evaluation_ranges"] # from this phone trip_type_check = lambda t: t["transition"] in ["START_EVALUATION_TRIP", "STOP_EVALUATION_TRIP", 4, 5] trip_time_check = lambda t, r: t["ts"] >= r["start_ts"] and t["ts"] <= r["end_ts"] @@ -441,7 +415,7 @@ def fill_accuracy_control_trip_ranges(self): def fill_accuracy_control_section_ranges(self): for phoneOS, phone_map in self.accuracy_control_maps.items(): - curr_control_transitions = [t["data"] for t in phone_map["transitions"]] # from control phone + curr_control_transitions = [t["data"] for t in phone_map[EVAL_TRANSITION]] # from control phone curr_evaluation_ranges = phone_map["evaluation_ranges"] # from this phone trip_type_check = lambda t: t["transition"] in ["START_EVALUATION_SECTION", "STOP_EVALUATION_SECTION", 6, 7] trip_time_check = lambda t, r: t["ts"] >= r["start_ts"] and t["ts"] <= r["end_ts"] diff --git a/emeval/input/spec_details.py b/emeval/input/spec_details.py index b90493e7..22d49485 100644 --- a/emeval/input/spec_details.py +++ b/emeval/input/spec_details.py @@ -6,54 +6,32 @@ import requests import shapely as shp import geojson as gj +from abc import ABC, abstractmethod +import os +import json +import sys +import math -class SpecDetails: - def __init__(self, datastore_url, author_email, spec_id): - self.DATASTORE_URL = datastore_url + +class SpecDetails(ABC): + def __init__(self, datastore_loc, author_email, spec_id=None): + self.DATASTORE_LOC = datastore_loc self.AUTHOR_EMAIL = author_email - self.CURR_SPEC_ID = spec_id - self.curr_spec_entry = self.get_current_spec() - self.populate_spec_details(self.curr_spec_entry) + # make spec_id optional if instance is only being used to call retrieve_data + if spec_id: + self.CURR_SPEC_ID = spec_id + self.curr_spec_entry = self.get_current_spec() + self.populate_spec_details(self.curr_spec_entry) - def retrieve_data_from_server(self, user_label, key_list, start_ts, end_ts): - post_msg = { - "user": user_label, - "key_list": key_list, - "start_time": start_ts, - "end_time": end_ts - } - print("About to retrieve messages using %s" % post_msg) - try: - response = requests.post(self.DATASTORE_URL+"/datastreams/find_entries/timestamp", json=post_msg) - print("response = %s" % response) - response.raise_for_status() - ret_list = response.json()["phone_data"] - except Exception as e: - print("Got %s error %s, retrying" % (type(e).__name__, e)) - time.sleep(10) - response = requests.post(self.DATASTORE_URL+"/datastreams/find_entries/timestamp", json=post_msg) - print("response = %s" % response) - response.raise_for_status() - ret_list = response.json()["phone_data"] - # write_ts may not be the same as data.ts, specially in the case of - # transitions, where we first generate the data.ts in javascript and - # then pass it down to the native code to store - # normally, this doesn't matter because it is a microsecond difference, but - # it does matter in this case because we store several entries in quick - # succession and we want to find the entries within a particular range. - # Putting it into the "data" object makes the write_ts accessible in the - # subsequent dataframes, etc - for e in ret_list: - e["data"]["write_ts"] = e["metadata"]["write_ts"] - print("Found %d entries" % len(ret_list)) - return ret_list + @abstractmethod + def retrieve_data(self, user, key_list, start_ts, end_ts): + pass - def retrieve_all_data_from_server(self, user_label, key_list): - return self.retrieve_data_from_server(user_label, key_list, 0, - arrow.get().timestamp) + def retrieve_all_data(self, user, key_list): + return self.retrieve_data(user, key_list, 0, sys.maxsize) def get_current_spec(self): - all_spec_entry_list = self.retrieve_all_data_from_server(self.AUTHOR_EMAIL, ["config/evaluation_spec"]) + all_spec_entry_list = self.retrieve_all_data(self.AUTHOR_EMAIL, ["config/evaluation_spec"]) curr_spec_entry = None for s in all_spec_entry_list: if s["data"]["label"]["id"] == self.CURR_SPEC_ID: @@ -95,7 +73,6 @@ def get_ground_truth_for_trip(self, trip_id, start_ts, end_ts): return tl - @staticmethod def get_concat_trajectories(trip): coords_list = [] @@ -126,7 +103,6 @@ def get_shapes_for_leg(gt_leg): else: return {"loc": shp.geometry.shape(gt_leg["loc"]["geometry"])} - @classmethod def get_geojson_for_leg(cls, gt_leg): if gt_leg["type"] == "TRAVEL": @@ -138,4 +114,71 @@ def get_geojson_for_leg(cls, gt_leg): else: gt_leg["loc"]["properties"]["style"] = {"color": "purple", "fillColor": "purple"} return gt_leg["loc"] - + + +class ServerSpecDetails(SpecDetails): + def retrieve_one_batch(self, user, key_list, start_ts, end_ts): + post_body = { + "user": user, + "key_list": key_list, + "start_time": start_ts, + "end_time": end_ts + } + + print(f"Retrieving data for: {post_body=}") + try: + response = requests.post(f"{self.DATASTORE_LOC}/datastreams/find_entries/timestamp", json=post_body) + print(f"{response=}") + response.raise_for_status() + data = response.json()["phone_data"] + except Exception as e: + print(f"Got {type(e).__name__}: {e}, retrying...") + time.sleep(10) + response = requests.post(f"{self.DATASTORE_LOC}/datastreams/find_entries/timestamp", json=post_body) + print(f"{response=}") + response.raise_for_status() + data = response.json()["phone_data"] + + for e in data: + e["data"]["write_ts"] = e["metadata"]["write_ts"] + + print(f"Found {len(data)} entries") + return data + + def retrieve_data(self, user, key_list, start_ts, end_ts): + all_done = False + location_entries = [] + curr_start_ts = start_ts + prev_retrieved_count = 0 + + while not all_done: + print("Retrieving data for %s from %s -> %s" % (user, curr_start_ts, end_ts)) + curr_location_entries = self.retrieve_one_batch(user, key_list, curr_start_ts, end_ts) + #print("Retrieved %d entries with timestamps %s..." % (len(curr_location_entries), [cle["data"]["ts"] for cle in curr_location_entries[0:10]])) + if len(curr_location_entries) == 0 or len(curr_location_entries) == 1: + all_done = True + else: + location_entries.extend(curr_location_entries) + new_start_ts = curr_location_entries[-1]["metadata"]["write_ts"] + assert new_start_ts > curr_start_ts + curr_start_ts = new_start_ts + prev_retrieved_count = len(curr_location_entries) + return location_entries + + +class FileSpecDetails(SpecDetails): + def retrieve_data(self, user, key_list, start_ts, end_ts): + data = [] + for key in key_list: + data_file = os.path.join( + os.getcwd(), + self.DATASTORE_LOC, + f"{user}/{self.CURR_SPEC_ID}/{key.replace('/', '~')}/{math.floor(start_ts)}_{math.ceil(end_ts)}.json") + assert os.path.isfile(data_file), f"not found: {data_file=}" + with open(data_file, "r") as f: + d = json.load(f) + if isinstance(d, list): + data.extend(d) + else: + data.append(d) + return data diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/testSpecDetails.py b/test/testSpecDetails.py new file mode 100644 index 00000000..6193716c --- /dev/null +++ b/test/testSpecDetails.py @@ -0,0 +1,26 @@ +# Standard imports +import unittest +import unittest.mock as um +import sys +import os +import arrow +import logging + +# Our imports +import emeval.input.spec_details as eisd + +class TestSpecDetails(unittest.TestCase): + def setUp(self): + pass + + @um.patch.multiple('emeval.input.spec_details.SpecDetails', + get_current_spec = um.DEFAULT, + populate_spec_details = um.DEFAULT) + def testCreation(self, get_current_spec, populate_spec_details): + testsd = eisd.SpecDetails("foo", "bar", "baz") + get_current_spec.assert_called_once() + populate_spec_details.assert_called_once() + +if __name__ == '__main__': + etc.configLogging() + unittest.main() diff --git a/trajectory_evaluation.ipynb b/trajectory_evaluation.ipynb index 4a9f3e72..f8c8ed18 100644 --- a/trajectory_evaluation.ipynb +++ b/trajectory_evaluation.ipynb @@ -65,11 +65,11 @@ "metadata": {}, "outputs": [], "source": [ - "DATASTORE_URL = \"http://cardshark.cs.berkeley.edu\"\n", + "DATASTORE_LOC = \"bin/data/\"\n", "AUTHOR_EMAIL = \"shankari@eecs.berkeley.edu\"\n", - "sd_la = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"unimodal_trip_car_bike_mtv_la\")\n", - "sd_sj = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"car_scooter_brex_san_jose\")\n", - "sd_ucb = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")" + "sd_la = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"unimodal_trip_car_bike_mtv_la\")\n", + "sd_sj = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"car_scooter_brex_san_jose\")\n", + "sd_ucb = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")" ] }, { @@ -158,7 +158,7 @@ "}\n", "\n", "for t in test_cases.values():\n", - " t[\"gt_shapes\"] = gpd.GeoSeries(eisd.SpecDetails.get_shapes_for_leg(t[\"ground_truth\"]))" + " t[\"gt_shapes\"] = gpd.GeoSeries(eisd.FileSpecDetails.get_shapes_for_leg(t[\"ground_truth\"]))" ] }, { @@ -231,7 +231,7 @@ "outputs": [], "source": [ "# t = \"commuter_rail_aboveground\"\n", - "# gt_gj = eisd.SpecDetails.get_geojson_for_leg(test_cases[t][\"ground_truth\"])\n", + "# gt_gj = eisd.FileSpecDetails.get_geojson_for_leg(test_cases[t][\"ground_truth\"])\n", "# print(gt_gj.features[2])\n", "# gt_gj.features[2] = ezgj.get_geojson_for_linestring(emd.filter_ground_truth_linestring(test_cases[t][\"gt_shapes\"]))\n", "# curr_map = ezgj.get_map_for_geojson(gt_gj)\n", @@ -274,7 +274,7 @@ "outputs": [], "source": [ "# curr_map = ezpv.display_map_detail_from_df(manual_check_points)\n", - "# curr_map.add_child(folium.GeoJson(eisd.SpecDetails.get_geojson_for_leg(t[\"ground_truth\"])))" + "# curr_map.add_child(folium.GeoJson(eisd.FileSpecDetails.get_geojson_for_leg(t[\"ground_truth\"])))" ] }, { @@ -342,7 +342,7 @@ " # This is a Shapely LineString\n", " \n", " section_gt_leg = pv.spec_details.get_ground_truth_for_leg(tr[\"trip_id_base\"], sr[\"trip_id_base\"])\n", - " section_gt_shapes = gpd.GeoSeries(eisd.SpecDetails.get_shapes_for_leg(section_gt_leg))\n", + " section_gt_shapes = gpd.GeoSeries(eisd.FileSpecDetails.get_shapes_for_leg(section_gt_leg))\n", " if len(section_gt_shapes) == 1:\n", " print(\"No ground truth route for %s %s, must be polygon, skipping...\" % (tr[\"trip_id_base\"], sr[\"trip_id_base\"]))\n", " assert section_gt_leg[\"type\"] != \"TRAVEL\", \"For %s, %s, %s, %s, %s found type %s\" % (phone_os, phone_label, r_idx, tr_idx, sr_idx, section_gt_leg[\"type\"])\n", @@ -364,9 +364,12 @@ " filtered_gt_linestring = emd.filter_ground_truth_linestring(utm_section_gt_shapes)\n", " meter_dist = filtered_us_gpdf.geometry.distance(filtered_gt_linestring)\n", " ne = len(meter_dist)\n", + " filtered_section_geo_df = section_geo_df.loc[filtered_us_gpdf.index]\n", " curr_spatial_error_df = gpd.GeoDataFrame({\"error\": meter_dist,\n", " \"ts\": section_geo_df.ts,\n", " \"geometry\": section_geo_df.geometry,\n", + " \"ts\": filtered_section_geo_df.ts,\n", + " \"geometry\": filtered_section_geo_df.geometry,\n", " \"phone_os\": np.repeat(phone_os, ne),\n", " \"phone_label\": np.repeat(phone_label, ne),\n", " \"role\": np.repeat(r[\"eval_role_base\"], ne),\n", @@ -405,8 +408,8 @@ "metadata": {}, "outputs": [], "source": [ - "r2q_map = {\"power_control\": 0, \"HAMFDC\": 1, \"MAHFDC\": 2, \"HAHFDC\": 3, \"accuracy_control\": 4}\n", - "q2r_map = {0: \"power\", 1: \"HAMFDC\", 2: \"MAHFDC\", 3: \"HAHFDC\", 4: \"accuracy\"}" + "r2q_map = {\"power_control\": 0, \"HAMFDC\": 1, \"MAHFDC\": 2, \"HAHFDC\": 3, \"MAMFDC\": 4, \"accuracy_control\": 5}\n", + "q2r_map = {0: \"power\", 1: \"HAMFDC\", 2: \"MAHFDC\", 3: \"HAHFDC\", 4: \"MAMFDC\", 5: \"accuracy\"}" ] }, { @@ -442,7 +445,7 @@ "metadata": {}, "outputs": [], "source": [ - "ifig, ax_array = plt.subplots(nrows=1,ncols=2,figsize=(8,2), sharey=True)\n", + "ifig, ax_array = plt.subplots(nrows=1,ncols=2,figsize=(16,6), sharey=True)\n", "\n", "spatial_errors_df.query(\"phone_os == 'android' & quality > 0\").boxplot(ax = ax_array[0], column=[\"error\"], by=[\"quality\"], showfliers=False)\n", "ax_array[0].set_title('android')\n", @@ -466,7 +469,7 @@ "metadata": {}, "outputs": [], "source": [ - "ifig, ax_array = plt.subplots(nrows=1,ncols=2,figsize=(8,2), sharey=True)\n", + "ifig, ax_array = plt.subplots(nrows=1,ncols=2,figsize=(16,6), sharey=True)\n", "\n", "spatial_errors_df.query(\"phone_os == 'android' & quality > 0\").boxplot(ax = ax_array[0], column=[\"error\"], by=[\"quality\"])\n", "ax_array[0].set_title('android')\n", @@ -497,7 +500,7 @@ "metadata": {}, "outputs": [], "source": [ - "ifig, ax_array = plt.subplots(nrows=2,ncols=3,figsize=(12,6), sharex=False, sharey=False)\n", + "ifig, ax_array = plt.subplots(nrows=2,ncols=3,figsize=(15,6), sharex=False, sharey=False)\n", "timeline_list = [\"train_bus_ebike_mtv_ucb\", \"car_scooter_brex_san_jose\", \"unimodal_trip_car_bike_mtv_la\"]\n", "for i, tl in enumerate(timeline_list):\n", " spatial_errors_df.query(\"timeline == @tl & phone_os == 'android' & quality > 0\").boxplot(ax = ax_array[0][i], column=[\"error\"], by=[\"quality\"])\n", @@ -670,7 +673,7 @@ "source": [ "gt_leg = sd_ucb.get_ground_truth_for_leg(\"berkeley_to_mtv_SF_express_bus\", \"express_bus\"); print(gt_leg[\"id\"])\n", "curr_map = ezgj.get_map_for_geojson(sd_ucb.get_geojson_for_leg(gt_leg), name=\"ground_truth\")\n", - "ezgj.get_fg_for_loc_df(emd.linestring_to_geo_df(eisd.SpecDetails.get_shapes_for_leg(gt_leg)[\"route\"]),\n", + "ezgj.get_fg_for_loc_df(emd.linestring_to_geo_df(eisd.FileSpecDetails.get_shapes_for_leg(gt_leg)[\"route\"]),\n", " name=\"gt_points\", color=\"green\").add_to(curr_map)\n", "\n", "name_err_time = lambda lr: \"%d: %d, %s, %s\" % (lr[\"index\"], lr[\"df_idx\"], lr[\"error\"], sd_ucb.fmt(lr[\"ts\"], \"MM-DD HH:mm:ss\"))\n", @@ -701,7 +704,7 @@ "source": [ "gt_leg = sd_ucb.get_ground_truth_for_leg(\"berkeley_to_mtv_SF_express_bus\", \"express_bus\"); print(gt_leg[\"id\"])\n", "curr_map = ezgj.get_map_for_geojson(sd_ucb.get_geojson_for_leg(gt_leg), name=\"ground_truth\")\n", - "ezgj.get_fg_for_loc_df(emd.linestring_to_geo_df(eisd.SpecDetails.get_shapes_for_leg(gt_leg)[\"route\"]),\n", + "ezgj.get_fg_for_loc_df(emd.linestring_to_geo_df(eisd.FileSpecDetails.get_shapes_for_leg(gt_leg)[\"route\"]),\n", " name=\"gt_points\", color=\"green\").add_to(curr_map)\n", "\n", "name_err_time = lambda lr: \"%d: %d, %s, %s\" % (lr[\"index\"], lr[\"df_idx\"], lr[\"error\"], sd_ucb.fmt(lr[\"ts\"], \"MM-DD HH:mm:ss\"))\n", @@ -753,7 +756,7 @@ "source": [ "gt_leg = sd_ucb.get_ground_truth_for_leg(\"mtv_to_berkeley_sf_bart\", \"commuter_rail_aboveground\"); print(gt_leg[\"id\"])\n", "curr_map = ezgj.get_map_for_geojson(sd_ucb.get_geojson_for_leg(gt_leg), name=\"ground_truth\")\n", - "ezgj.get_fg_for_loc_df(emd.linestring_to_geo_df(eisd.SpecDetails.get_shapes_for_leg(gt_leg)[\"route\"]),\n", + "ezgj.get_fg_for_loc_df(emd.linestring_to_geo_df(eisd.FileSpecDetails.get_shapes_for_leg(gt_leg)[\"route\"]),\n", " name=\"gt_points\", color=\"green\").add_to(curr_map)\n", "\n", "name_err_time = lambda lr: \"%d: %d, %s, %s\" % (lr[\"index\"], lr[\"df_idx\"], lr[\"error\"], sd_ucb.fmt(lr[\"ts\"], \"MM-DD HH:mm:ss\"))\n", @@ -868,7 +871,7 @@ "source": [ "gt_leg = sd_ucb.get_ground_truth_for_leg(\"mtv_to_berkeley_sf_bart\", \"walk_to_bus\"); print(gt_leg[\"id\"])\n", "curr_map = ezgj.get_map_for_geojson(sd_ucb.get_geojson_for_leg(gt_leg), name=\"ground_truth\")\n", - "ezgj.get_fg_for_loc_df(emd.linestring_to_geo_df(eisd.SpecDetails.get_shapes_for_leg(gt_leg)[\"route\"]),\n", + "ezgj.get_fg_for_loc_df(emd.linestring_to_geo_df(eisd.FileSpecDetails.get_shapes_for_leg(gt_leg)[\"route\"]),\n", " name=\"gt_points\", color=\"green\").add_to(curr_map)\n", "\n", "name_err_time = lambda lr: \"%d: %d, %s, %s\" % (lr[\"index\"], lr[\"df_idx\"], lr[\"error\"], sd_ucb.fmt(lr[\"ts\"], \"MM-DD HH:mm:ss\"))\n", @@ -946,7 +949,7 @@ "source": [ "gt_leg = sd_ucb.get_ground_truth_for_leg(\"berkeley_to_mtv_SF_express_bus\", \"light_rail_below_above_ground\"); print(gt_leg[\"id\"])\n", "curr_map = ezgj.get_map_for_geojson(sd_ucb.get_geojson_for_leg(gt_leg), name=\"ground_truth\")\n", - "ezgj.get_fg_for_loc_df(emd.linestring_to_geo_df(eisd.SpecDetails.get_shapes_for_leg(gt_leg)[\"route\"]),\n", + "ezgj.get_fg_for_loc_df(emd.linestring_to_geo_df(eisd.FileSpecDetails.get_shapes_for_leg(gt_leg)[\"route\"]),\n", " name=\"gt_points\", color=\"green\").add_to(curr_map)\n", "\n", "name_err_time = lambda lr: \"%d: %d, %s, %s\" % (lr[\"index\"], lr[\"df_idx\"], lr[\"error\"], sd_ucb.fmt(lr[\"ts\"], \"MM-DD HH:mm:ss\"))\n", @@ -990,7 +993,7 @@ "source": [ "gt_leg = sd_ucb.get_ground_truth_for_leg(\"mtv_to_berkeley_sf_bart\", \"subway_underground\"); print(gt_leg[\"id\"])\n", "curr_map = ezgj.get_map_for_geojson(sd_ucb.get_geojson_for_leg(gt_leg), name=\"ground_truth\")\n", - "ezgj.get_fg_for_loc_df(emd.linestring_to_geo_df(eisd.SpecDetails.get_shapes_for_leg(gt_leg)[\"route\"]),\n", + "ezgj.get_fg_for_loc_df(emd.linestring_to_geo_df(eisd.FileSpecDetails.get_shapes_for_leg(gt_leg)[\"route\"]),\n", " name=\"gt_points\", color=\"green\").add_to(curr_map)\n", "\n", "name_err_time = lambda lr: \"%d: %d, %s, %s\" % (lr[\"index\"], lr[\"df_idx\"], lr[\"error\"], sd_ucb.fmt(lr[\"ts\"], \"MM-DD HH:mm:ss\"))\n", @@ -1013,7 +1016,7 @@ "outputs": [], "source": [ "gt_leg = sd_ucb.get_ground_truth_for_leg(\"mtv_to_berkeley_sf_bart\", \"subway_underground\"); gt_leg[\"id\"]\n", - "eisd.SpecDetails.get_shapes_for_leg(gt_leg)[\"route\"].is_simple" + "eisd.FileSpecDetails.get_shapes_for_leg(gt_leg)[\"route\"].is_simple" ] }, { @@ -1119,7 +1122,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.2" } }, "nbformat": 4, diff --git a/trajectory_evaluation_spatio_temporal.ipynb b/trajectory_evaluation_spatio_temporal.ipynb index ffae0282..c8c66aa2 100644 --- a/trajectory_evaluation_spatio_temporal.ipynb +++ b/trajectory_evaluation_spatio_temporal.ipynb @@ -66,11 +66,11 @@ "metadata": {}, "outputs": [], "source": [ - "DATASTORE_URL = \"http://cardshark.cs.berkeley.edu\"\n", + "DATASTORE_LOC = \"bin/data/\"\n", "AUTHOR_EMAIL = \"shankari@eecs.berkeley.edu\"\n", - "sd_la = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"unimodal_trip_car_bike_mtv_la\")\n", - "sd_sj = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"car_scooter_brex_san_jose\")\n", - "sd_ucb = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")" + "sd_la = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"unimodal_trip_car_bike_mtv_la\")\n", + "sd_sj = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"car_scooter_brex_san_jose\")\n", + "sd_ucb = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")" ] }, { @@ -135,7 +135,7 @@ " for (sr_idx, sr) in enumerate(tr[\"evaluation_section_ranges\"]):\n", " # This is a Shapely LineString\n", " section_gt_leg = pv.spec_details.get_ground_truth_for_leg(tr[\"trip_id_base\"], sr[\"trip_id_base\"])\n", - " section_gt_shapes = gpd.GeoSeries(eisd.SpecDetails.get_shapes_for_leg(section_gt_leg))\n", + " section_gt_shapes = gpd.GeoSeries(eisd.FileSpecDetails.get_shapes_for_leg(section_gt_leg))\n", " if len(section_gt_shapes) == 1:\n", " print(\"No ground truth route for %s %s, must be polygon, skipping...\" % (tr[\"trip_id_base\"], sr[\"trip_id_base\"]))\n", " assert section_gt_leg[\"type\"] != \"TRAVEL\", \"For %s, %s, %s, %s, %s found type %s\" % (phone_os, phone_label, r_idx, tr_idx, sr_idx, section_gt_leg[\"type\"])\n", @@ -214,7 +214,7 @@ " for (sr_idx, sr) in enumerate(tr[\"evaluation_section_ranges\"]):\n", " # This is a Shapely LineString\n", " section_gt_leg = pv.spec_details.get_ground_truth_for_leg(tr[\"trip_id_base\"], sr[\"trip_id_base\"])\n", - " section_gt_shapes = gpd.GeoSeries(eisd.SpecDetails.get_shapes_for_leg(section_gt_leg))\n", + " section_gt_shapes = gpd.GeoSeries(eisd.FileSpecDetails.get_shapes_for_leg(section_gt_leg))\n", " if len(section_gt_shapes) == 1:\n", " print(\"No ground truth route for %s %s, must be polygon, skipping...\" % (tr[\"trip_id_base\"], sr[\"trip_id_base\"]))\n", " assert section_gt_leg[\"type\"] != \"TRAVEL\", \"For %s, %s, %s, %s, %s found type %s\" % (phone_os, phone_label, r_idx, tr_idx, sr_idx, section_gt_leg[\"type\"])\n", @@ -281,8 +281,8 @@ "metadata": {}, "outputs": [], "source": [ - "r2q_map = {\"power_control\": 0, \"HAMFDC\": 1, \"MAHFDC\": 2, \"HAHFDC\": 3, \"accuracy_control\": 4}\n", - "q2r_map = {0: \"power\", 1: \"HAMFDC\", 2: \"MAHFDC\", 3: \"HAHFDC\", 4: \"accuracy\"}" + "r2q_map = {\"power_control\": 0, \"HAMFDC\": 1, \"MAHFDC\": 2, \"HAHFDC\": 3, \"MAMFDC\": 4, \"accuracy_control\": 5}\n", + "q2r_map = {0: \"power\", 1: \"HAMFDC\", 2: \"MAHFDC\", 3: \"HAHFDC\", 4: \"MAMFDC\", 5: \"accuracy\"}" ] }, { @@ -660,7 +660,7 @@ "source": [ "gt_leg = sd_ucb.get_ground_truth_for_leg(\"berkeley_to_mtv_SF_express_bus\", \"express_bus\"); print(gt_leg[\"id\"])\n", "curr_map = ezgj.get_map_for_geojson(sd_ucb.get_geojson_for_leg(gt_leg), name=\"ground_truth\")\n", - "ezgj.get_fg_for_loc_df(emd.linestring_to_geo_df(eisd.SpecDetails.get_shapes_for_leg(gt_leg)[\"route\"]),\n", + "ezgj.get_fg_for_loc_df(emd.linestring_to_geo_df(eisd.FileSpecDetails.get_shapes_for_leg(gt_leg)[\"route\"]),\n", " name=\"gt_points\", color=\"green\").add_to(curr_map)\n", "\n", "name_err_time = lambda lr: \"%d: %d, %s, %s\" % (lr[\"index\"], lr[\"df_idx\"], lr[\"error\"], sd_ucb.fmt(lr[\"ts\"], \"MM-DD HH:mm:ss\"))\n", @@ -1000,7 +1000,7 @@ "def display_gt_and_controls(entry, loc_df_label, with_points=False):\n", " curr_map = folium.Map()\n", " print(\"Using ground truth %s\" % entry[\"ground_truth\"][\"leg\"][\"id\"])\n", - " gt_leg_gj = eisd.SpecDetails.get_geojson_for_leg(entry[\"ground_truth\"][\"leg\"])\n", + " gt_leg_gj = eisd.FileSpecDetails.get_geojson_for_leg(entry[\"ground_truth\"][\"leg\"])\n", " gt_leg_gj_feature = folium.GeoJson(gt_leg_gj, name=\"ground_truth\")\n", " curr_map.add_child(gt_leg_gj_feature)\n", " if with_points:\n", @@ -1036,7 +1036,7 @@ "metadata": {}, "outputs": [], "source": [ - "ezgj.get_map_for_geojson(eisd.SpecDetails.get_geojson_for_leg(e[\"ground_truth\"][\"leg\"]))" + "ezgj.get_map_for_geojson(eisd.FileSpecDetails.get_geojson_for_leg(e[\"ground_truth\"][\"leg\"]))" ] }, { @@ -1063,7 +1063,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.2" } }, "nbformat": 4, diff --git a/trajectory_evaluation_timestamp_metric_selection.ipynb b/trajectory_evaluation_timestamp_metric_selection.ipynb index 6b7526db..71b8fc85 100644 --- a/trajectory_evaluation_timestamp_metric_selection.ipynb +++ b/trajectory_evaluation_timestamp_metric_selection.ipynb @@ -112,11 +112,11 @@ "metadata": {}, "outputs": [], "source": [ - "DATASTORE_URL = \"http://cardshark.cs.berkeley.edu\"\n", + "DATASTORE_LOC = \"bin/data/\"\n", "AUTHOR_EMAIL = \"shankari@eecs.berkeley.edu\"\n", - "sd_la = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"unimodal_trip_car_bike_mtv_la\")\n", - "sd_sj = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"car_scooter_brex_san_jose\")\n", - "sd_ucb = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")" + "sd_la = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"unimodal_trip_car_bike_mtv_la\")\n", + "sd_sj = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"car_scooter_brex_san_jose\")\n", + "sd_ucb = eisd.FileSpecDetails(DATASTORE_LOC, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")" ] }, { @@ -236,7 +236,7 @@ "def display_gt_and_controls(entry, loc_df_label, with_points=False):\n", " curr_map = folium.Map()\n", " print(\"Using ground truth %s\" % entry[\"ground_truth\"][\"leg\"][\"id\"])\n", - " gt_leg_gj = eisd.SpecDetails.get_geojson_for_leg(entry[\"ground_truth\"][\"leg\"])\n", + " gt_leg_gj = eisd.FileSpecDetails.get_geojson_for_leg(entry[\"ground_truth\"][\"leg\"])\n", " gt_leg_gj_feature = folium.GeoJson(gt_leg_gj, name=\"ground_truth\")\n", " curr_map.add_child(gt_leg_gj_feature)\n", " if with_points:\n", @@ -602,7 +602,7 @@ " for (sr_idx, sr) in enumerate(tr[\"evaluation_section_ranges\"]):\n", " # This is a Shapely LineString\n", " section_gt_leg = pv.spec_details.get_ground_truth_for_leg(tr[\"trip_id_base\"], sr[\"trip_id_base\"])\n", - " section_gt_shapes = gpd.GeoSeries(eisd.SpecDetails.get_shapes_for_leg(section_gt_leg))\n", + " section_gt_shapes = gpd.GeoSeries(eisd.FileSpecDetails.get_shapes_for_leg(section_gt_leg))\n", " if len(section_gt_shapes) == 1:\n", " print(\"No ground truth route for %s %s, must be polygon, skipping...\" % (tr[\"trip_id_base\"], sr[\"trip_id_base\"]))\n", " assert section_gt_leg[\"type\"] != \"TRAVEL\", \"For %s, %s, %s, %s, %s found type %s\" % (phone_os, phone_label, r_idx, tr_idx, sr_idx, section_gt_leg[\"type\"])\n", @@ -1156,7 +1156,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.2" } }, "nbformat": 4,