Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 18 #54

Merged
merged 25 commits into from
Mar 29, 2021
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
3f5be09
add config.py and read_data_from_file.py
Mar 9, 2021
ed63666
finalize dump_data_to_file
Mar 10, 2021
4b0a640
update cli args
Mar 12, 2021
254f410
add capability to export raw data to file when additional parameters …
Mar 12, 2021
9cb24ee
successfully replicate behavior of SpecDetails, handle duplicate spec…
Mar 15, 2021
ec28776
significant refactors + add fill transitions functionality
Mar 16, 2021
d78e497
create v2 based on new ServerSpecDetails class and refactored PhoneVi…
Mar 17, 2021
06b8185
finish script refactor, create general structure for dumping based on…
Mar 18, 2021
6b909cf
move read_until_done to spec_details
Mar 19, 2021
ffa5d27
add FileSpecDetails, configure proper output from PhoneView map
Mar 19, 2021
c0eb143
Add simple unit test with a mocking example
shankari Mar 22, 2021
e8c9fe0
add in code review changes aside from read_until_done
Mar 22, 2021
5746c99
Merge pull request #10 from MobilityNet/add_simple_unit_test
Mar 22, 2021
80c8638
add documentation to parser for dump_data_to_file
Mar 22, 2021
9a7ff44
fix PhoneView to use time series keys from emission
Mar 23, 2021
1821b87
mostly working FileSpecDetails, constants added to PhoneView
Mar 24, 2021
bee4c2a
fully working FileSpecDetails
Mar 25, 2021
1252e2d
updated notebooks + incorporated changes on PR
Mar 26, 2021
24832dc
Restore full tree display
shankari Mar 26, 2021
e942ab8
remove outputs from Evaluations_power_boxplots
Mar 29, 2021
0cf5836
Merge branch 'issue18' of https://github.com/singhish/mobilitynet-ana…
Mar 29, 2021
46c67c8
revert _master notebooks
Mar 29, 2021
e2d72f8
remove .DS_Store
Mar 29, 2021
550ea7a
remove .DS_Store from subfolders
Mar 29, 2021
fb7007d
Add quotes around the `DATASTORE_LOC` as well
shankari Mar 29, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 23 additions & 11 deletions Data_exploration_template.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,15 @@
"import importlib"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import arrow"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -89,9 +98,9 @@
"metadata": {},
"outputs": [],
"source": [
"DATASTORE_URL = \"http://cardshark.cs.berkeley.edu\"\n",
"DATASTORE_URL = \"bin/data\"\n",
"AUTHOR_EMAIL = \"[email protected]\"\n",
"sd = eisd.SpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")"
"sd = eisd.FileSpecDetails(DATASTORE_URL, AUTHOR_EMAIL, \"train_bus_ebike_mtv_ucb\")"
shankari marked this conversation as resolved.
Show resolved Hide resolved
]
},
{
Expand Down Expand Up @@ -139,13 +148,13 @@
" for r in phone_detail_map[\"evaluation_ranges\"]:\n",
" print(8 * ' ', 30 * \"=\")\n",
" print(8 * ' ',r.keys())\n",
" print(8 * ' ',r[\"trip_id\"], r[\"eval_common_trip_id\"], r[\"eval_role\"], len(r[\"evaluation_trip_ranges\"]))\n",
" for tr in r[\"evaluation_trip_ranges\"]:\n",
" print(12 * ' ', 30 * \"-\")\n",
" print(12 * ' ',tr[\"trip_id\"], tr.keys())\n",
" for sr in tr[\"evaluation_section_ranges\"]:\n",
" print(16 * ' ', 30 * \"~\")\n",
" print(16 * ' ',sr[\"trip_id\"], sr.keys())"
" print(8 * ' ',r[\"trip_id\"], r[\"eval_common_trip_id\"], r[\"eval_role\"], len(r[\"evaluation_trip_ranges\"]), arrow.get(r[\"start_ts\"]))\n",
" #for tr in r[\"evaluation_trip_ranges\"]:\n",
" # print(12 * ' ', 30 * \"-\")\n",
" # print(12 * ' ',tr[\"trip_id\"], tr.keys())\n",
" # for sr in tr[\"evaluation_section_ranges\"]:\n",
" # print(16 * ' ', 30 * \"~\")\n",
" # print(16 * ' ',sr[\"trip_id\"], sr.keys())"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Restore

]
},
{
Expand All @@ -160,7 +169,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"ev = eiev.EvaluationView()\n",
Expand Down Expand Up @@ -240,7 +251,8 @@
" print(12 * ' ', 30 * \"-\")\n",
" print(12 * ' ',tr[\"trip_id\"], tr.keys())\n",
" # I am not printing the actual trajectories since that would be too long, only displaying modes\n",
" gt_trip = sd.get_ground_truth_for_trip(tr[\"trip_id_base\"])\n",
" print(tr)\n",
" gt_trip = sd.get_ground_truth_for_trip(tr[\"trip_id_base\"], tr[\"start_ts\"], tr[\"end_ts\"])\n",
" print(12 * ' ', eisd.SpecDetails.get_concat_trajectories(gt_trip)[\"properties\"])"
]
},
Expand Down
Binary file added bin/.DS_Store
Binary file not shown.
206 changes: 206 additions & 0 deletions bin/dump_data_to_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
import json
import os
import math
import requests
import time
import arrow
import argparse
import sys; sys.path.append("..")
from emeval.input.spec_details import ServerSpecDetails
from emeval.input.phone_view import PhoneView


def dump_data_to_file(data, spec_id, user, key, start_ts, end_ts, out_dir):
"""
Accepts serializable data (e.g. dict, array of dicts) and dumps it into an output file.
Dumped file are created recursively in the folder name specified by `out_dir` as such:

out_dir
└── user
└── spec_id
└── key
└── {start_ts}_{end_ts}.json
"""
# key could have a slash if it corresponds to a key_list argument in SpecDetails::retrieve_data
# slashes are invalid in directory names, so replace with tilde
out_path = os.path.join(out_dir, user, spec_id, key.replace("/", "~"))
os.makedirs(out_path, exist_ok=True)

out_file = os.path.join(out_path, f"{math.floor(start_ts)}_{math.ceil(end_ts)}.json")
print(f"Creating {out_file=}...")
with open(out_file, "w") as f:
json.dump(data, f, indent=4)


def make_call_to_server(datastore_url, author_email, user, key, start_ts, end_ts):
"""
Makes a direct call to the E-Mission Server instance based on the specified user/key/start_ts/end_ts.
"""
return ServerSpecDetails(datastore_url, author_email).retrieve_one_batch(user, [key], start_ts, end_ts)


def get_all_spec_ids(datastore_url, author_email):
"""
Retrieves list of all spec_id's on E-Mission Server instance being used by script.
"""
spec_data = make_call_to_server(
datastore_url,
author_email,
author_email,
"config/evaluation_spec",
0,
sys.maxsize)

spec_ids = [s["data"]["label"]["id"] for s in spec_data]

return set(spec_ids)


def run_full_pipeline(datastore_url, author_email, spec_ids, out_dir):
shankari marked this conversation as resolved.
Show resolved Hide resolved
"""
Runs the full data retrieval pipeline in the event that a user/key/start_ts/end_ts combination isn't provided.
"""
print(f"Running full pipeline for {spec_ids[0] if len(spec_ids) == 1 else 'all specs in datastore'}...")

# collect ServerSpecDetails objects, dump specs
sds = []
for s_id in spec_ids:
sd = ServerSpecDetails(datastore_url, author_email, s_id)
sds.append(sd)
dump_data_to_file(
sd.curr_spec_entry,
sd.CURR_SPEC_ID,
author_email,
"config/evaluation_spec",
0,
sys.maxsize,
shankari marked this conversation as resolved.
Show resolved Hide resolved
out_dir)

# build and dump phone view maps
for sd in sds:
pv = PhoneView(sd)
for phone_os, phone_map in pv.map().items():
for phone_label, phone_detail_map in phone_map.items():
for key in [k for k in phone_detail_map.keys() if "/" in k]:
dump_data_to_file(
phone_detail_map[key],
sd.CURR_SPEC_ID,
phone_label,
key,
sd.eval_start_ts,
sd.eval_end_ts,
out_dir)
for ranges in [phone_detail_map["evaluation_ranges"], phone_detail_map["calibration_ranges"]]:
for r in ranges:
for key in [k for k in r.keys() if "/" in k]:
dump_data_to_file(
r[key],
sd.CURR_SPEC_ID,
phone_label,
key,
r["start_ts"],
r["end_ts"],
out_dir)


def parse_args():
"""
Defines command line arguments for script.
"""
parser = argparse.ArgumentParser(
description="Script that retrieves data from an E-Mission Server instance "
"and dumps it into a hierarchical collection of JSON files.")

parser.add_argument("--out-dir",
type=str,
default="data",
help="The name of the directory that data will be dumped to. "
"Will be created if not already present. "
"[default: data]")

parser.add_argument("--datastore-url",
type=str,
default="http://localhost:8080",
help="The URL of the E-Mission Server instance from which data will be pulled. "
"[default: http://localhost:8080]")

parser.add_argument("--author-email",
type=str,
default="[email protected]",
help="The user associated with retrieving specs. "
"This is usually the email of a spec author. "
"[default: [email protected]]")

parser.add_argument("--spec-id",
type=str,
help="The particular spec to retrieve data for. "
"If not specified, data will be retrieved for all specs "
"on the specified datastore instance.")

# if one of these arguments is specified, the others in this group must also be specified
parser.add_argument("--key",
type=str,
help="The time series key to be used if a single call "
"to the E-Mission Server instance is to be made. "
"--user, --start-ts, and --end-ts must also be specified.")

parser.add_argument("--user",
type=str,
help="The user to be used if a single call to the E-Mission Server instance is to be made. "
"--key, --start-ts, and --end-ts must also be specified.")

parser.add_argument("--start-ts",
type=float,
help="The starting timestamp from which to pull data if a single call "
"to the E-Mission Server instance is to be made. "
"--key, --user, and --end-ts must also be specified.")

parser.add_argument("--end-ts",
type=float,
help="The ending timestamp from which to pull data if a single call "
"to the E-Mission Server instance is to be made. "
"--key, --user, and --start-ts must also be specified.")

return parser.parse_args()


if __name__ == "__main__":
args = parse_args()

# enforce that --key, --user, --start-ts, and --end-ts are all specifed if one of these arguments is specified
cond_req_args = ["--key", "--user", "--start-ts", "--end-ts"]
for arg in cond_req_args:
if arg in sys.argv:
assert set(a for a in cond_req_args if a != arg) <= set(sys.argv),\
"all of --key --user, --start-ts, and --end-ts must be specified"

# verify spec_id is valid if specified
spec_ids = get_all_spec_ids(args.datastore_url, args.author_email)
if args.spec_id:
assert args.spec_id in spec_ids,\
f"spec_id `{args.spec_id}` not found within current datastore instance"

spec_ids = [args.spec_id]

# if --key, etc are specified, just call retrieve_data from an anonymous ServerSpecDetails instance
if args.key:
for s_id in spec_ids:
data = make_call_to_server(
args.datastore_url,
args.author_email,
args.user,
args.key,
args.start_ts,
args.end_ts)

dump_data_to_file(
data,
s_id,
args.user,
args.key,
args.start_ts,
args.end_ts,
args.out_dir)

else:
run_full_pipeline(args.datastore_url, args.author_email, spec_ids, args.out_dir)
Loading