Skip to content

Commit

Permalink
Included maximum_stored_model_limit parameter in trip_model config file
Browse files Browse the repository at this point in the history
Decided upon threshold value for model count above which redundant models will be deleted.
This was agreed upon to be 3 and is defined in the trip_model.config.json.sample file.

Necessary changes have been made in the related files to use this config value.
  • Loading branch information
Mahadik, Mukul Chandrakant authored and Mahadik, Mukul Chandrakant committed Jan 29, 2024
1 parent b1948dd commit 6f4ac50
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 20 deletions.
1 change: 1 addition & 0 deletions conf/analysis/trip_model.conf.json.sample
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"model_type": "greedy",
"model_storage": "document_database",
"minimum_trips": 14,
"maximum_stored_model_count": 3,
"model_parameters": {
"greedy": {
"metric": "od_similarity",
Expand Down
8 changes: 6 additions & 2 deletions emission/analysis/modelling/trip_model/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,9 @@ def get_minimum_trips():
raise TypeError(msg)
return minimum_trips



def get_maximum_stored_model_count():
maximum_stored_model_count = get_config_value_or_raise('maximum_stored_model_count')
if not isinstance(maximum_stored_model_count, int):
msg = f"config key 'maximum_stored_model_count' not an integer in config file {config_filename}"
raise TypeError(msg)
return maximum_stored_model_count
16 changes: 4 additions & 12 deletions emission/storage/modifiable/builtin_model_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,11 @@

import emission.core.get_database as edb
import emission.storage.modifiable.abstract_model_storage as esma

import emission.analysis.modelling.trip_model.config as eamtc
import emission.core.wrapper.entry as ecwe
import emission.core.wrapper.wrapperbase as ecwb

class BuiltinModelStorage(esma.ModelStorage):
# TODO: Discuss how to decide on model_count limit
K_MODEL_COUNT = 10

def __init__(self, user_id):
super(BuiltinModelStorage, self).__init__(user_id)
self.key_query = lambda key: {"metadata.key": key}
Expand Down Expand Up @@ -64,30 +61,25 @@ def trim_model_entries(self, key:str):
The flow of model insertion function calls is:
eamur.update_trip_model() -> eamums.save_model() -> esma.upsert_model() -> esma.trim_model_entries()
"""

current_model_count = edb.get_model_db().count_documents({"user_id": self.user_id})
logging.debug("Before trimming, model count for user %s = %s" % (self.user_id, current_model_count))
find_query = {"user_id": self.user_id, "metadata.key": key}
result_it = edb.get_model_db().find(find_query).sort("metadata.write_ts", -1)
result_list = list(result_it)

if current_model_count >= self.K_MODEL_COUNT:
maximum_stored_model_count = eamtc.get_maximum_stored_model_count()
if current_model_count >= maximum_stored_model_count:
# Specify the last or minimum timestamp of Kth model entry
write_ts_limit = result_list[self.K_MODEL_COUNT - 1]['metadata']['write_ts']
write_ts_limit = result_list[maximum_stored_model_count - 1]['metadata']['write_ts']
logging.debug(f"Write ts limit = {write_ts_limit}")

filter_clause = {
"user_id" : self.user_id,
"metadata.key" : key,
"metadata.write_ts" : { "$lte" : write_ts_limit }
}

models_to_delete = edb.get_model_db().delete_many(filter_clause)

if models_to_delete.deleted_count > 0:
logging.debug(f"{models_to_delete.deleted_count} documents deleted successfully\n")
else:
logging.debug("No documents found or none deleted\n")

new_model_count = edb.get_model_db().count_documents({"user_id": self.user_id})
logging.debug("After trimming, model count for user %s = %s" % (self.user_id, new_model_count))
11 changes: 5 additions & 6 deletions emission/tests/storageTests/TestModelStorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import emission.analysis.modelling.trip_model.run_model as eamur
import emission.storage.timeseries.abstract_timeseries as esta
import emission.tests.modellingTests.modellingTestAssets as etmm
from emission.storage.modifiable.builtin_model_storage import BuiltinModelStorage as esmb
import emission.analysis.modelling.trip_model.config as eamtc

# Test imports
import emission.tests.common as etc
Expand Down Expand Up @@ -98,7 +98,7 @@ def testTrimModelEntries(self):
Took this code from emission.tests.modellingTests.TestRunGreedyModel.py
with the objective of inserting multiple models into the model_db.
The test involves building and inserting 20 models, which is greater than
the K_MODEL_COUNT (= 10) limit defined in emission.storage.modifiable.builtin_model_storage.py
the maximum_stored_model_count (= 3) limit defined in conf/analysis/trip_model.conf.json.sample
train a model, save it, load it, and use it for prediction, using
the high-level training/testing API provided via
Expand All @@ -107,7 +107,6 @@ def testTrimModelEntries(self):
for clustering, use the default greedy similarity binning model
"""

# pass along debug model configuration
greedy_model_config = {
"metric": "od_similarity",
Expand All @@ -116,7 +115,7 @@ def testTrimModelEntries(self):
"clustering_way": 'origin-destination',
"incremental_evaluation": False
}

maximum_stored_model_count = eamtc.get_maximum_stored_model_count()
logging.debug(f'(TRAIN) creating a model based on trips in database')
for i in range(20):
logging.debug(f"Creating dummy model no. {i}")
Expand All @@ -128,10 +127,10 @@ def testTrimModelEntries(self):
model_config=greedy_model_config
)
current_model_count = edb.get_model_db().count_documents({"user_id": self.user_id})
if i <= (esmb.K_MODEL_COUNT - 1):
if i <= (maximum_stored_model_count - 1):
self.assertEqual(current_model_count, i+1)
else:
self.assertEqual(current_model_count, esmb.K_MODEL_COUNT)
self.assertEqual(current_model_count, maximum_stored_model_count)

if __name__ == '__main__':
import emission.tests.common as etc
Expand Down

0 comments on commit 6f4ac50

Please sign in to comment.