Skip to content

Commit

Permalink
Update clustering
Browse files Browse the repository at this point in the history
  • Loading branch information
acmiyaguchi committed Dec 11, 2019
1 parent 085cc7b commit a39edc4
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 10 deletions.
8 changes: 8 additions & 0 deletions NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,11 @@ for i in {0..6}; do scripts/run-command subgraph pageview --artifact-path sample
```bash
gsutil -m rsync -d -r data/design_matrix gs://wiki-forecast-data/design_matrix
```

```bash
docker run -v `PWD`:/app -v `realpath ../wikipedia-dump`:/app/data -it trmf

pip install pandas pyarrow
pip install -e external/exp-trmf-nips16/python
python -m wikicast.trmf_forecast
```
20 changes: 10 additions & 10 deletions wikicast/experiment_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ def run_trial(data, output, window_size, num_windows):
# return_train_score=False,
# )
search_ridge = ridge
# results += run_ablation(
# "ridge regression", search_ridge, train, validate, test, features_dict
# )
results += run_ablation(
"ridge regression", search_ridge, train, validate, test, features_dict
)
# write_search_results(search_ridge, f"{output}/ridge-random.csv")

# use lbfgs when the dataset is small, does not require a learning rate
Expand Down Expand Up @@ -143,18 +143,18 @@ def best_nn_random(params, output, **kwargs):
layers = [(64, 32, 64, 16)]
params = {
"hidden_layer_sizes": layers,
#"alpha": stats.reciprocal(1e-3, 1e6),
"alpha": [0.002, 20],
}
search = best_nn_grid(params, f"{output}/nn-grid-no-regularization.csv")
search = best_nn_grid(params, f"{output}/nn-grid-regularization.csv")

# layers = [
# np.hstack([train] + features).shape[1],
# (128, 8, 128),
# (16, 8, 8, 8),
# #np.hstack([train] + features).shape[1],
# #(128, 8, 128),
# #(16, 8, 8, 8),
# (64, 32, 64, 16),
# ]
# params = {"hidden_layer_sizes": layers, "alpha": stats.reciprocal(1e2, 1e8)}
# search = best_nn_random(params, f"{output}/nn-grid-layers-best.csv")
# params = {"hidden_layer_sizes": layers, "alpha": stats.reciprocal(1e-4, 1e2)}
# search = best_nn_random(params, f"{output}/nn-grid-layers-best.csv", n_iter=10)

best_nn = search.best_estimator_
results += run_ablation(
Expand Down

0 comments on commit a39edc4

Please sign in to comment.