diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 0000000..513cabe
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,50 @@
+name: website
+
+# build the documentation whenever there are new commits on main
+on:
+ push:
+ branches:
+ - main
+ # Alternative: only build for tags.
+ # tags:
+ # - '*'
+
+# security: restrict permissions for CI jobs.
+permissions:
+ contents: read
+
+jobs:
+ # Build the documentation and upload the static HTML files as an artifact.
+ build:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v4
+ with:
+ python-version: '3.12'
+
+ # ADJUST THIS: install all dependencies (including pdoc)
+ - run: pip install -e .
+ - run: pip install pdoc
+ # ADJUST THIS: build your documentation into docs/.
+ # We use a custom build script for pdoc itself, ideally you just run `pdoc -o docs/ ...` here.
+ - run: pdoc --docformat google -o docs/.html src
+
+ - uses: actions/upload-pages-artifact@v2
+ with:
+ path: docs/
+
+ # Deploy the artifact to GitHub pages.
+ # This is a separate job so that only actions/deploy-pages has the necessary permissions.
+ deploy:
+ needs: build
+ runs-on: ubuntu-latest
+ permissions:
+ pages: write
+ id-token: write
+ environment:
+ name: github-pages
+ url: ${{ steps.deployment.outputs.page_url }}
+ steps:
+ - id: deployment
+ uses: actions/deploy-pages@v2
\ No newline at end of file
diff --git a/CLASSDESIGN.md b/CLASSDESIGN.md
new file mode 100644
index 0000000..cae0d99
--- /dev/null
+++ b/CLASSDESIGN.md
@@ -0,0 +1,14 @@
+# Class Design
+Here we will outline the different kind of classes that are used in slune and how they interact with each other. There are 3 types:
+- 'Searcher' classes - these are the classes that are used to define and traverse a search space.
+- 'Logger' classes - these are the classes that are used to create and read log files.
+- 'Saver' classes - these are the classes that are used to save logs to files and read logs from files.
+
+The base module is where the base classes for each of these types are defined. The base classes are:
+- BaseSearcher
+- BaseLogger
+- BaseSaver
+
+To create a new searcher, logger or saver, you must inherit from the appropriate base class and implement the required methods. The required methods will have the '@abc.abstractmethod' decorator above them and will throw errors if they are not implemented. The compulsory methods allow for well-defined interactions between the different classes and should allow for any combination of searcher, logger and saver to be used together.
+
+Please read the docs for the base classes to see what methods are required to be implemented and how they should be implemented.
\ No newline at end of file
diff --git a/README.md b/README.md
index 92aa1ce..c83f99f 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,4 @@
+![PyPI - Version](https://img.shields.io/pypi/v/:slune-lib)
[![license](https://img.shields.io/badge/License-MIT-purple.svg)](LICENSE)
![badge](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/h-0-0/4aa01e058fee448070c587f6967037e4/raw/CodeCovSlune.json)
@@ -111,7 +112,6 @@ Please check out the examples folder for notebooks detailing in more depth some
## Roadmap
- Make package user friendly:
- - Add documentation.
- Go through automation settings.
- Code of conduct.
- Contributing guidelines.
diff --git a/docs/.html/index.html b/docs/.html/index.html
new file mode 100644
index 0000000..0ef869b
--- /dev/null
+++ b/docs/.html/index.html
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
diff --git a/docs/.html/search.js b/docs/.html/search.js
new file mode 100644
index 0000000..bcd0f25
--- /dev/null
+++ b/docs/.html/search.js
@@ -0,0 +1,46 @@
+window.pdocSearch = (function(){
+/** elasticlunr - http://weixsong.github.io * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song * MIT Licensed */!function(){function e(e){if(null===e||"object"!=typeof e)return e;var t=e.constructor();for(var n in e)e.hasOwnProperty(n)&&(t[n]=e[n]);return t}var t=function(e){var n=new t.Index;return n.pipeline.add(t.trimmer,t.stopWordFilter,t.stemmer),e&&e.call(n,n),n};t.version="0.9.5",lunr=t,t.utils={},t.utils.warn=function(e){return function(t){e.console&&console.warn&&console.warn(t)}}(this),t.utils.toString=function(e){return void 0===e||null===e?"":e.toString()},t.EventEmitter=function(){this.events={}},t.EventEmitter.prototype.addListener=function(){var e=Array.prototype.slice.call(arguments),t=e.pop(),n=e;if("function"!=typeof t)throw new TypeError("last argument must be a function");n.forEach(function(e){this.hasHandler(e)||(this.events[e]=[]),this.events[e].push(t)},this)},t.EventEmitter.prototype.removeListener=function(e,t){if(this.hasHandler(e)){var n=this.events[e].indexOf(t);-1!==n&&(this.events[e].splice(n,1),0==this.events[e].length&&delete this.events[e])}},t.EventEmitter.prototype.emit=function(e){if(this.hasHandler(e)){var t=Array.prototype.slice.call(arguments,1);this.events[e].forEach(function(e){e.apply(void 0,t)},this)}},t.EventEmitter.prototype.hasHandler=function(e){return e in this.events},t.tokenizer=function(e){if(!arguments.length||null===e||void 0===e)return[];if(Array.isArray(e)){var n=e.filter(function(e){return null===e||void 0===e?!1:!0});n=n.map(function(e){return t.utils.toString(e).toLowerCase()});var i=[];return n.forEach(function(e){var n=e.split(t.tokenizer.seperator);i=i.concat(n)},this),i}return e.toString().trim().toLowerCase().split(t.tokenizer.seperator)},t.tokenizer.defaultSeperator=/[\s\-]+/,t.tokenizer.seperator=t.tokenizer.defaultSeperator,t.tokenizer.setSeperator=function(e){null!==e&&void 0!==e&&"object"==typeof e&&(t.tokenizer.seperator=e)},t.tokenizer.resetSeperator=function(){t.tokenizer.seperator=t.tokenizer.defaultSeperator},t.tokenizer.getSeperator=function(){return t.tokenizer.seperator},t.Pipeline=function(){this._queue=[]},t.Pipeline.registeredFunctions={},t.Pipeline.registerFunction=function(e,n){n in t.Pipeline.registeredFunctions&&t.utils.warn("Overwriting existing registered function: "+n),e.label=n,t.Pipeline.registeredFunctions[n]=e},t.Pipeline.getRegisteredFunction=function(e){return e in t.Pipeline.registeredFunctions!=!0?null:t.Pipeline.registeredFunctions[e]},t.Pipeline.warnIfFunctionNotRegistered=function(e){var n=e.label&&e.label in this.registeredFunctions;n||t.utils.warn("Function is not registered with pipeline. This may cause problems when serialising the index.\n",e)},t.Pipeline.load=function(e){var n=new t.Pipeline;return e.forEach(function(e){var i=t.Pipeline.getRegisteredFunction(e);if(!i)throw new Error("Cannot load un-registered function: "+e);n.add(i)}),n},t.Pipeline.prototype.add=function(){var e=Array.prototype.slice.call(arguments);e.forEach(function(e){t.Pipeline.warnIfFunctionNotRegistered(e),this._queue.push(e)},this)},t.Pipeline.prototype.after=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i+1,0,n)},t.Pipeline.prototype.before=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i,0,n)},t.Pipeline.prototype.remove=function(e){var t=this._queue.indexOf(e);-1!==t&&this._queue.splice(t,1)},t.Pipeline.prototype.run=function(e){for(var t=[],n=e.length,i=this._queue.length,o=0;n>o;o++){for(var r=e[o],s=0;i>s&&(r=this._queue[s](r,o,e),void 0!==r&&null!==r);s++);void 0!==r&&null!==r&&t.push(r)}return t},t.Pipeline.prototype.reset=function(){this._queue=[]},t.Pipeline.prototype.get=function(){return this._queue},t.Pipeline.prototype.toJSON=function(){return this._queue.map(function(e){return t.Pipeline.warnIfFunctionNotRegistered(e),e.label})},t.Index=function(){this._fields=[],this._ref="id",this.pipeline=new t.Pipeline,this.documentStore=new t.DocumentStore,this.index={},this.eventEmitter=new t.EventEmitter,this._idfCache={},this.on("add","remove","update",function(){this._idfCache={}}.bind(this))},t.Index.prototype.on=function(){var e=Array.prototype.slice.call(arguments);return this.eventEmitter.addListener.apply(this.eventEmitter,e)},t.Index.prototype.off=function(e,t){return this.eventEmitter.removeListener(e,t)},t.Index.load=function(e){e.version!==t.version&&t.utils.warn("version mismatch: current "+t.version+" importing "+e.version);var n=new this;n._fields=e.fields,n._ref=e.ref,n.documentStore=t.DocumentStore.load(e.documentStore),n.pipeline=t.Pipeline.load(e.pipeline),n.index={};for(var i in e.index)n.index[i]=t.InvertedIndex.load(e.index[i]);return n},t.Index.prototype.addField=function(e){return this._fields.push(e),this.index[e]=new t.InvertedIndex,this},t.Index.prototype.setRef=function(e){return this._ref=e,this},t.Index.prototype.saveDocument=function(e){return this.documentStore=new t.DocumentStore(e),this},t.Index.prototype.addDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.addDoc(i,e),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));this.documentStore.addFieldLength(i,n,o.length);var r={};o.forEach(function(e){e in r?r[e]+=1:r[e]=1},this);for(var s in r){var u=r[s];u=Math.sqrt(u),this.index[n].addToken(s,{ref:i,tf:u})}},this),n&&this.eventEmitter.emit("add",e,this)}},t.Index.prototype.removeDocByRef=function(e){if(e&&this.documentStore.isDocStored()!==!1&&this.documentStore.hasDoc(e)){var t=this.documentStore.getDoc(e);this.removeDoc(t,!1)}},t.Index.prototype.removeDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.hasDoc(i)&&(this.documentStore.removeDoc(i),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));o.forEach(function(e){this.index[n].removeToken(e,i)},this)},this),n&&this.eventEmitter.emit("remove",e,this))}},t.Index.prototype.updateDoc=function(e,t){var t=void 0===t?!0:t;this.removeDocByRef(e[this._ref],!1),this.addDoc(e,!1),t&&this.eventEmitter.emit("update",e,this)},t.Index.prototype.idf=function(e,t){var n="@"+t+"/"+e;if(Object.prototype.hasOwnProperty.call(this._idfCache,n))return this._idfCache[n];var i=this.index[t].getDocFreq(e),o=1+Math.log(this.documentStore.length/(i+1));return this._idfCache[n]=o,o},t.Index.prototype.getFields=function(){return this._fields.slice()},t.Index.prototype.search=function(e,n){if(!e)return[];e="string"==typeof e?{any:e}:JSON.parse(JSON.stringify(e));var i=null;null!=n&&(i=JSON.stringify(n));for(var o=new t.Configuration(i,this.getFields()).get(),r={},s=Object.keys(e),u=0;u0&&t.push(e);for(var i in n)"docs"!==i&&"df"!==i&&this.expandToken(e+i,t,n[i]);return t},t.InvertedIndex.prototype.toJSON=function(){return{root:this.root}},t.Configuration=function(e,n){var e=e||"";if(void 0==n||null==n)throw new Error("fields should not be null");this.config={};var i;try{i=JSON.parse(e),this.buildUserConfig(i,n)}catch(o){t.utils.warn("user configuration parse failed, will use default configuration"),this.buildDefaultConfig(n)}},t.Configuration.prototype.buildDefaultConfig=function(e){this.reset(),e.forEach(function(e){this.config[e]={boost:1,bool:"OR",expand:!1}},this)},t.Configuration.prototype.buildUserConfig=function(e,n){var i="OR",o=!1;if(this.reset(),"bool"in e&&(i=e.bool||i),"expand"in e&&(o=e.expand||o),"fields"in e)for(var r in e.fields)if(n.indexOf(r)>-1){var s=e.fields[r],u=o;void 0!=s.expand&&(u=s.expand),this.config[r]={boost:s.boost||0===s.boost?s.boost:1,bool:s.bool||i,expand:u}}else t.utils.warn("field name in user configuration not found in index instance fields");else this.addAllFields2UserConfig(i,o,n)},t.Configuration.prototype.addAllFields2UserConfig=function(e,t,n){n.forEach(function(n){this.config[n]={boost:1,bool:e,expand:t}},this)},t.Configuration.prototype.get=function(){return this.config},t.Configuration.prototype.reset=function(){this.config={}},lunr.SortedSet=function(){this.length=0,this.elements=[]},lunr.SortedSet.load=function(e){var t=new this;return t.elements=e,t.length=e.length,t},lunr.SortedSet.prototype.add=function(){var e,t;for(e=0;e1;){if(r===e)return o;e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o]}return r===e?o:-1},lunr.SortedSet.prototype.locationFor=function(e){for(var t=0,n=this.elements.length,i=n-t,o=t+Math.floor(i/2),r=this.elements[o];i>1;)e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o];return r>e?o:e>r?o+1:void 0},lunr.SortedSet.prototype.intersect=function(e){for(var t=new lunr.SortedSet,n=0,i=0,o=this.length,r=e.length,s=this.elements,u=e.elements;;){if(n>o-1||i>r-1)break;s[n]!==u[i]?s[n]u[i]&&i++:(t.add(s[n]),n++,i++)}return t},lunr.SortedSet.prototype.clone=function(){var e=new lunr.SortedSet;return e.elements=this.toArray(),e.length=e.elements.length,e},lunr.SortedSet.prototype.union=function(e){var t,n,i;this.length>=e.length?(t=this,n=e):(t=e,n=this),i=t.clone();for(var o=0,r=n.toArray();o\n\n\n\n
\n\n
\n\n
slune (= slurm + tune!)
\n\n
A super simplistic python package for performing hyperparameter tuning (or more generally launching jobs and saving results) on a cluster using SLURM. Takes advantage of the fact that lots of jobs (including hyperparameter tuning) are embarrassingly parallel! With slune you can divide your compute into lots of separately scheduled jobs meaning that each small job can get running on your cluster more quickly, speeding up your workflow! Often significantly!
\n\n
Slune is super-easy to use! We have helper functions which can execute everything you need done for you. Letting you speed up your work without wasting time.
\n\n
Slune is barebones by design. This means that you can easily write code to integrate with slune if you want to do something a bit different! You can also workout what each function is doing pretty easily.
\n\n
Slune is flexible. In designing this package I've tried to make as few assumptions as possible meaning that it can be used for lots of stuff outside hyperparameter tuning! (or also within!) For example, you can get slune to give you paths for where to save things, submit lots of jobs in parallel for any sort of script and do grid search! and there's more to come!
\n\n
Usage
\n\n
Let's go through a quick example of how we can use slune ... first let's define a model that we want to train:
\n\n
\n
# Simple Regularized Linear Regression without using external libraries\n\n# Function to compute the mean of a list\ndefmean(values):\n returnsum(values)/float(len(values))\n\n# Function to compute the covariance between two lists\ndefcovariance(x,mean_x,y,mean_y):\n covar=0.0\n foriinrange(len(x)):\n covar+=(x[i]-mean_x)*(y[i]-mean_y)\n returncovar\n\n# Function to compute the variance of a list\ndefvariance(values,mean):\n returnsum((x-mean)**2forxinvalues)\n\n# Function to compute coefficients for a simple regularized linear regression\ndefcoefficients_regularized(x,y,alpha):\n mean_x,mean_y=mean(x),mean(y)\n var_x=variance(x,mean_x)\n covar=covariance(x,mean_x,y,mean_y)\n b1=(covar+alpha*var_x)/(var_x+alpha)\n b0=mean_y-b1*mean_x\n returnb0,b1\n\n# Function to make predictions with a simple regularized linear regression model\ndeflinear_regression_regularized(train_X,train_y,test_X,alpha):\n b0,b1=coefficients_regularized(train_X,train_y,alpha)\n predictions=[b0+b1*xforxintest_X]\n returnpredictions\n\n# ------------------\n# The above is code for a simple normalized linear regression model that we want to train.\n# Now let's fit the model and use slune to save how well our model performs!\n# ------------------\n\nif__name__=="__main__":\n # First let's load in the value for the regularization parameter alpha that has been passed to this script from the command line. We will use the slune helper function lsargs to do this. \n # lsargs returns a tuple of the python path and a list of arguments passed to the script. We can then use this to get the alpha value.\n fromsluneimportlsargs\n python_path,args=lsargs()\n alpha=float(args[0])\n\n # Mock training dataset, function is y = 1 + 1 * x\n X=[1,2,3,4,5]\n y=[2,3,4,5,6]\n\n # Mock test dataset\n test_X=[6,7,8]\n test_y=[7,8,9]\n test_predictions=linear_regression_regularized(X,y,test_X,alpha)\n\n # First let's load in a function that we can use to get a saver object that uses the default method of logging (we call this object a slog = saver + logger). The saving will be coordinated by a csv saver object which saves and reads results from csv files stored in a hierarchy of directories.\n fromsluneimportget_csv_slog\n csv_slog=get_csv_slog(params=args)\n\n # Let's now calculate the mean squared error of our predictions and log it!\n mse=mean((test_y[i]-test_predictions[i])**2foriinrange(len(test_y)))\n csv_slog.log({'mse':mse})\n\n # Let's now save our logged results!\n slog.save_collated()\n
\n
\n\n
Now let's write some code that will submit some jobs to train our model using different hyperparameters!!
\n\n
\n
# Let's now load in a function that will coordinate our search! We're going to do a grid search.\n# SearcherGrid is the class we can use to coordinate a grid search. We pass it a dictionary of hyperparameters and the values we want to try for each hyperparameter. We also pass it the number of runs we want to do for each combination of hyperparameters.\nfromslune.searchersimportSearcherGrid\ngrid_searcher=SearcherGrid({'alpha':[0.25,0.5,0.75]},runs=1)\n\n# Let's now import a function which will submit a job for our model, the script_path specifies the path to the script that contains the model we want to train. The template_path specifies the path to the template script that we want to specify the job with, cargs is a list of constant arguments we want to pass to the script for each tuning. \n# We set slog to None as we don't want to not run jobs if we have already run them before.\nfromsluneimportsbatchit\nscript_path='model.py'\ntemplate_path='template.sh'\nsbatchit(script_path,template_path,grid_searcher,cargs=[],slog=None)\n
\n
\n\n
Now we've submitted our jobs we will wait for them to finish \ud83d\udd5b\ud83d\udd50\ud83d\udd51\ud83d\udd52\ud83d\udd53\ud83d\udd54\ud83d\udd55\ud83d\udd56\ud83d\udd57\ud83d\udd58\ud83d\udd59\ud83d\udd5a\ud83d\udd5b, now that they are finished we can read the results!
Amazing! \ud83e\udd73 We have successfully used slune to train our model. I hope this gives you a good flavour of how you can use slune and how easy it is to use!
\n\n
Please check out the examples folder for notebooks detailing in more depth some potential ways you can use slune. The docs are not yet up and running \ud83d\ude22 but they are coming soon!
\n\n
Roadmap
\n\n
\n
Make package user friendly:\n
\n
Go through automation settings.
\n
Code of conduct.
\n
Contributing guidelines.
\n
Add to pypi.\nStill in early stages! First thing on the horizon is better integration with SLURM:
\n
\n
Set-up notifications for job completion, failure, etc.
\n
Auto job naming, job output naming and job output location saving.
\n
Auto save logged results when finishing a job.
\n
Automatically re-submit failed jobs.
\n
Tools for monitoring and cancelling jobs. \nThen it will be looking at adding more savers, loggers and searchers! For example integration with tensorboard, saving to one csv file (as opposed to a hierarchy of csv files in different directories) and different search methods like random search and cross validation. It would perhaps also be beneficial to be able to interface with other languages like R and Julia. Finally, more helper functions!
\n
\n\n
However, I am trying to keep this package as bloatless as possible to make it easy for you to tweak and configure to your individual needs. It's written in a simple and compartmentalized manner for this reason. You can of course use the helper functions and let slune handle everything under the hood, but, you can also very quickly and easily write your own classes to work with other savers, loggers and searchers to do as you please.
\n\n
Installation
\n\n
To install latest version use:
\n\n
\n
pipinstallslune-lib\n
\n
\n\n
To install latest dev version use (CURRENTLY RECOMENDED):
\n\n
\n
# With https\npipinstall"git+https://github.com/h-aze/slune.git#egg=slune-lib"\n
\n
\n\n
Class Design
\n\n
Here we will outline the different kind of classes that are used in slune and how they interact with each other. There are 3 types:
\n\n
\n
'Searcher' classes - these are the classes that are used to define and traverse a search space.
\n
'Logger' classes - these are the classes that are used to create and read log files.
\n
'Saver' classes - these are the classes that are used to save logs to files and read logs from files.
\n
\n\n
The base module is where the base classes for each of these types are defined. The base classes are:
\n\n
\n
BaseSearcher
\n
BaseLogger
\n
BaseSaver
\n
\n\n
To create a new searcher, logger or saver, you must inherit from the appropriate base class and implement the required methods. The required methods will have the '@abc.abstractmethod' decorator above them and will throw errors if they are not implemented. The compulsory methods allow for well-defined interactions between the different classes and should allow for any combination of searcher, logger and saver to be used together.
\n\n
Please read the docs for the base classes to see what methods are required to be implemented and how they should be implemented.
This must be subclassed to create different Searcher classes.\nPlease name your searcher class Searcher\nOutlines a protocol for creating a search space and creating configurations from it.\nMethods document what they should do once implemented.
Used to tell searcher to check if there are existing runs in storage.
\n\n
If there are existing runs, the searcher should skip them \nbased on the number of runs we would like for each job.\nThis may require a 'runs' attribute to be set in the searcher.\nIt will probably also require access to a Saver object,\nso we can use it's saving protocol to check if there are existing runs.\nIn this case is advised that this function takes a Saver object as an argument,\nand that the searcher is initialized with a 'runs' attribute.
This must be subclassed to implement different Logger classes.\nPlease name your logger class Logger.\nOutlines a protocol for logging metrics and reading from the logs.\nMethods document what they should do once implemented.
This must be subclassed to implement different Saver classes.\nPlease name your saver class Saver.\nOutlines a protocol for saving and reading results to/from storage.\nMethods document what they should do once implemented.
Stores the metric/s in a data frame that we can later save in storage.\nLogs by creating data frame out of the metrics and then appending it to the current results data frame.
\n\n
Attributes:
\n\n
\n
- results (pd.DataFrame): Data frame containing all the metrics logged so far.\nEach row stores all the metrics that were given in a call to the 'log' method,\neach column title is a metric name.\nThe first column is always the time stamp at which 'log' is called.
Stores them in a data frame that we can later save in storage.\nAll metrics provided will be saved as a row in the results data frame,\nthe first column is always the time stamp at which log is called.
\n\n
Arguments:
\n\n
\n
- metrics (dict): Metrics to be logged, keys are metric names and values are metric values.\nEach metric should only have one value! So please log as soon as you get a metric.
Saves the results of each run in a CSV file in hierarchy of directories.
\n\n
Each directory is named after a parameter - value pair in the form \"--parameter_name=value\".\nThe paths to csv files then define the configuration under which the results were obtained,\nfor example if we only have one parameter \"learning_rate\" with value 0.01 used to obtain the results,\nto save those results we would create a directory named \"--learning_rate=0.01\" and save the results in a csv file in that directory.
\n\n
If we have multiple parameters, for example \"learning_rate\" with value 0.01 and \"batch_size\" with value 32,\nwe would create a directory named \"--learning_rate=0.01\" with a subdirectory named \"--batch_size=32\",\nand save the results in a csv file in that subdirectory.
\n\n
We use this structure to then read the results from the csv files by searching for the directory that matches the parameters we want,\nand then reading the csv file in that directory.
\n\n
The order in which we create the directories is determined by the order in which the parameters are given,\nso if we are given [\"--learning_rate=0.01\", \"--batch_size=32\"] we would create the directories in the following order:\n\"--learning_rate=0.01/--batch_size=32\".
\n\n
The directory structure generated will also depend on existing directories in the root directory,\nif there are existing directories in the root directory that match some subset of the parameters given,\nwe will create the directory tree from the deepest matching directory.
\n\n
For example if we only have the following path in the root directory:\n\"--learning_rate=0.01/--batch_size=32\"\nand we are given the parameters [\"--learning_rate=0.01\", \"--batch_size=32\", \"--num_epochs=10\"],\nwe will create the path:\n\"--learning_rate=0.01/--batch_size=32/--num_epochs=10\".\non the other hand if we are given the parameters [\"--learning_rate=0.02\", \"--num_epochs=10\", \"--batch_size=32\"],\nwe will create the path:\n\"--learning_rate=0.02/--batch_size=32/--num_epochs=10\".
\n\n
Handles parallel runs trying to create the same directories by waiting a random time (under 1 second) before creating the directory.\nShould work pretty well in practice, however, may occasionally fail depending on the number of jobs launched at the same time.
\n\n
Attributes:
\n\n
\n
- root_dir (str): Path to the root directory where we will store the csv files.
\n
- current_path (str): Path to the csv file where we will store the results for the current run.
- logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
\n
- params (list, optional): List of strings containing the parameters used, in form [\"--parameter_name=parameter_value\", ...], default is None.\nIf None, we will create a path using the parameters given in the log.
\n
- root_dir (str, optional): Path to the root directory where we will store the csv files, default is './tuning_results'.
Searches the root directory for a path that matches the parameters given.
\n\n
If only partial matches are found, returns the deepest matching directory with the missing parameters appended.\nBy deepest we mean the directory with the most parameters matching.\nIf no matches are found creates a path using the parameters.\nCreates path using parameters in the order they are given, \nie. [\"--learning_rate=0.01\", \"--batch_size=32\"] -> \"--learning_rate=0.01/--batch_size=32\".
\n\n
If we find a partial match, we add the missing parameters to the end of the path,\nie. if we have the path \"--learning_rate=0.01\" in the root \nand are given the parameters [\"--learning_rate=0.01\", \"--batch_size=32\"],\nwe will create the path \"--learning_rate=0.01/--batch_size=32\".
\n\n
Arguments:
\n\n
\n
- params (list of str): List of strings containing the arguments used, in form [\"--argument_name=argument_value\", ...].
\n
\n\n
Returns:
\n\n
\n
\n
match (str): Path to the directory that matches the parameters given.
Does this by first checking for existing paths in the root directory that match the parameters given.
\n\n
Check get_match for how we create the path, \nonce we have the path we check if there is already a csv file with results in that path,\nif there is we increment the number of the results file name that we will use.
\n\n
For example if we get back the path \"--learning_rate=0.01/--batch_size=32\",\nand there exists a csv file named \"results_0.csv\" in the final directory,\nwe will name our csv file \"results_1.csv\".
\n\n
Arguments:
\n\n
\n
- params (list of str): List of strings containing the arguments used, in form [\"--argument_name=argument_value\", ...].
\n
\n\n
Returns:
\n\n
\n
\n
csv_file_path (str): Path to the csv file where we will store the results for the current run.
If the csv file already exists, \nwe append the collated results from the logger to the end of the csv file.\nIf the csv file does not exist,\nwe create it and save the results to it.
\n\n
Arguments:
\n\n
\n
- results (pd.DataFrame): Data frame containing the results to be saved.
\n
\n\n
TODO: \n - Could be making to many assumptions about the format in which we get the results from the logger,\n should be able to work with any logger.\n We should only be assuming that we are saving results to a csv file.
Given dictionary of parameters and values to try, creates grid of all possible configurations,\nand returns them one by one for each call to next_tune.
\n\n
Attributes:
\n\n
\n
- configs (dict): Parameters and values to create grid from.\nStructure of dictionary should be: { \"--parameter_name\" : [Value_1, Value_2, ...], ... }
\n
- runs (int): Controls search based on number of runs we want for each config.\nif runs > 0 -> run each config 'runs' times.\nif runs = 0 -> run each config once even if it already exists.\nThis behavior is modified if we want to (use) check_existing_runs, see methods description.
\n
- grid (list of dict): List of dictionaries, each containing one combination of argument values.
\n
- grid_index (int): Index of the current configuration in the grid.
\n
- saver_exists (function): Pointer to the savers exists method, used to check if there are existing runs.
- configs (dict): Dictionary of parameters and values to try.\nStructure of dictionary should be: { \"--parameter_name\" : [Value_1, Value_2, ...], ... }
\n
- runs (int, optional): Controls search based on number of runs we want for each config.\nif runs > 0 -> run each config 'runs' times.\nif runs = 0 -> run each config once even if it already exists.\nThis behavior is modified if we want to (use) check_existing_runs, see methods description.
Will check if there are existing runs for the current configuration,\nif there are existing runs we tally them up \nand skip configs or runs of a config based on the number of runs we want for each config.
\n\n
Arguments:
\n\n
\n
- grid_index (int): Index of the current configuration in the grid.
\n
\n\n
Returns:
\n\n
\n
\n
grid_index (int): Index of the next configuration in the grid.
\n
run_index (int): Index of the next run for the current configuration.
Will skip existing runs if check_existing_runs has been called.\nFor more information on how this works check the methods descriptions for check_existing_runs and skip_existing_runs.\nWill raise an error if we have reached the end of the grid.\nTo iterate through all configurations, use a for loop like so: \n for config in searcher: ...
\n\n
Returns:
\n\n
\n
\n
next_config (dict): The next configuration to try.
Submits jobs based on arguments given by searcher.
\n\n
For each job runs the script stored at script_path with selected parameter values given by searcher\nand the arguments given by cargs.
\n\n
Uses the sbatch script with path sbatch_path to submit each job to the cluster.
\n\n
If given a Saver object, uses it to check if there are existing runs for each job and skips them,\nbased on the number of runs we would like for each job (which is stored in the saver).
\n\n
Arguments:
\n\n
\n
- script_path (str): Path to the script (of the model) to be run for each job.
\n
- sbatch_path (str): Path to the sbatch script that will be used to submit each job.\nExamples of sbatch scripts can be found in the templates folder.
\n
- searcher (Searcher): Searcher object used to retrieve changing arguments for each job.
\n
- cargs (list, optional): Contains arguments to be passed to the script for every job.
\n
- saver (Saver, optional): Saver object used if we want to check if there are existing runs so we don't rerun.\nCan simply not give a Saver object if you want to rerun all jobs.
Searches the root directory for a path of directories that matches the strings given in any order.\nIf only a partial match is found, returns the deepest matching path.\nIf no matches are found returns root_directory.\nReturns a stripped matching path of directories, ie. where we convert '--string=value' to '--string='.
\n\n
Arguments:
\n\n
\n
- strings (list of str): List of strings to be matched in any order. Each string in list must be in the form '--string='.
\n
- root_directory (string, optional): Path to the root directory to be searched, default is current working directory.
\n
\n\n
Returns:
\n\n
\n
\n
max_depth (int): Depth of the deepest matching path.
\n
max_path (string): Path of the deepest matching path.
Replaces directories in path with existing directories with the same numerical value.
\n\n
Arguments:
\n\n
\n
- og_path (str): Path we want to check against existing paths, must be a subdirectory of root_directory and each directory must have form '--string=value'.
\n
- root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
\n
\n\n
Returns:
\n\n
\n
\n
equiv (str): Path with values changed to match existing directories if values are numerically equivalent, with root directory at beginning.
Find all possible paths of csv files that have directory matching one of each of all the parameters given.
\n\n
Finds all paths of csv files in all subdirectories of the root directory that have a directory in their path matching one of each of all the parameters given.
\n\n
Arguments:
\n\n
\n
- dirs (list of str): List of directory names we want returned paths to have in their path.
\n
- root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
\n
\n\n
Returns:
\n\n
\n
\n
matches (list of str): List of strings containing the paths to all csv files found.
\n
\n
\n", "signature": "(dirs:List[str], root_directory:Optional[str]='.') -> List[str]:", "funcdef": "def"}];
+
+ // mirrored in build-search-index.js (part 1)
+ // Also split on html tags. this is a cheap heuristic, but good enough.
+ elasticlunr.tokenizer.setSeperator(/[\s\-.;&_'"=,()]+|<[^>]*>/);
+
+ let searchIndex;
+ if (docs._isPrebuiltIndex) {
+ console.info("using precompiled search index");
+ searchIndex = elasticlunr.Index.load(docs);
+ } else {
+ console.time("building search index");
+ // mirrored in build-search-index.js (part 2)
+ searchIndex = elasticlunr(function () {
+ this.pipeline.remove(elasticlunr.stemmer);
+ this.pipeline.remove(elasticlunr.stopWordFilter);
+ this.addField("qualname");
+ this.addField("fullname");
+ this.addField("annotation");
+ this.addField("default_value");
+ this.addField("signature");
+ this.addField("bases");
+ this.addField("doc");
+ this.setRef("fullname");
+ });
+ for (let doc of docs) {
+ searchIndex.addDoc(doc);
+ }
+ console.timeEnd("building search index");
+ }
+
+ return (term) => searchIndex.search(term, {
+ fields: {
+ qualname: {boost: 4},
+ fullname: {boost: 2},
+ annotation: {boost: 2},
+ default_value: {boost: 2},
+ signature: {boost: 2},
+ bases: {boost: 2},
+ doc: {boost: 1},
+ },
+ expand: true
+ });
+})();
\ No newline at end of file
diff --git a/docs/.html/src.html b/docs/.html/src.html
new file mode 100644
index 0000000..81d7a74
--- /dev/null
+++ b/docs/.html/src.html
@@ -0,0 +1,432 @@
+
+
+
+
+
+
+ src API documentation
+
+
+
+
+
+
+
+
+
+
+
+
+src
+
+
+
+
+
+
+
+
+
+
slune (= slurm + tune!)
+
+
A super simplistic python package for performing hyperparameter tuning (or more generally launching jobs and saving results) on a cluster using SLURM. Takes advantage of the fact that lots of jobs (including hyperparameter tuning) are embarrassingly parallel! With slune you can divide your compute into lots of separately scheduled jobs meaning that each small job can get running on your cluster more quickly, speeding up your workflow! Often significantly!
+
+
Slune is super-easy to use! We have helper functions which can execute everything you need done for you. Letting you speed up your work without wasting time.
+
+
Slune is barebones by design. This means that you can easily write code to integrate with slune if you want to do something a bit different! You can also workout what each function is doing pretty easily.
+
+
Slune is flexible. In designing this package I've tried to make as few assumptions as possible meaning that it can be used for lots of stuff outside hyperparameter tuning! (or also within!) For example, you can get slune to give you paths for where to save things, submit lots of jobs in parallel for any sort of script and do grid search! and there's more to come!
+
+
Usage
+
+
Let's go through a quick example of how we can use slune ... first let's define a model that we want to train:
+
+
+
# Simple Regularized Linear Regression without using external libraries
+
+# Function to compute the mean of a list
+defmean(values):
+ returnsum(values)/float(len(values))
+
+# Function to compute the covariance between two lists
+defcovariance(x,mean_x,y,mean_y):
+ covar=0.0
+ foriinrange(len(x)):
+ covar+=(x[i]-mean_x)*(y[i]-mean_y)
+ returncovar
+
+# Function to compute the variance of a list
+defvariance(values,mean):
+ returnsum((x-mean)**2forxinvalues)
+
+# Function to compute coefficients for a simple regularized linear regression
+defcoefficients_regularized(x,y,alpha):
+ mean_x,mean_y=mean(x),mean(y)
+ var_x=variance(x,mean_x)
+ covar=covariance(x,mean_x,y,mean_y)
+ b1=(covar+alpha*var_x)/(var_x+alpha)
+ b0=mean_y-b1*mean_x
+ returnb0,b1
+
+# Function to make predictions with a simple regularized linear regression model
+deflinear_regression_regularized(train_X,train_y,test_X,alpha):
+ b0,b1=coefficients_regularized(train_X,train_y,alpha)
+ predictions=[b0+b1*xforxintest_X]
+ returnpredictions
+
+# ------------------
+# The above is code for a simple normalized linear regression model that we want to train.
+# Now let's fit the model and use slune to save how well our model performs!
+# ------------------
+
+if__name__=="__main__":
+ # First let's load in the value for the regularization parameter alpha that has been passed to this script from the command line. We will use the slune helper function lsargs to do this.
+ # lsargs returns a tuple of the python path and a list of arguments passed to the script. We can then use this to get the alpha value.
+ fromsluneimportlsargs
+ python_path,args=lsargs()
+ alpha=float(args[0])
+
+ # Mock training dataset, function is y = 1 + 1 * x
+ X=[1,2,3,4,5]
+ y=[2,3,4,5,6]
+
+ # Mock test dataset
+ test_X=[6,7,8]
+ test_y=[7,8,9]
+ test_predictions=linear_regression_regularized(X,y,test_X,alpha)
+
+ # First let's load in a function that we can use to get a saver object that uses the default method of logging (we call this object a slog = saver + logger). The saving will be coordinated by a csv saver object which saves and reads results from csv files stored in a hierarchy of directories.
+ fromsluneimportget_csv_slog
+ csv_slog=get_csv_slog(params=args)
+
+ # Let's now calculate the mean squared error of our predictions and log it!
+ mse=mean((test_y[i]-test_predictions[i])**2foriinrange(len(test_y)))
+ csv_slog.log({'mse':mse})
+
+ # Let's now save our logged results!
+ slog.save_collated()
+
+
+
+
Now let's write some code that will submit some jobs to train our model using different hyperparameters!!
+
+
+
# Let's now load in a function that will coordinate our search! We're going to do a grid search.
+# SearcherGrid is the class we can use to coordinate a grid search. We pass it a dictionary of hyperparameters and the values we want to try for each hyperparameter. We also pass it the number of runs we want to do for each combination of hyperparameters.
+fromslune.searchersimportSearcherGrid
+grid_searcher=SearcherGrid({'alpha':[0.25,0.5,0.75]},runs=1)
+
+# Let's now import a function which will submit a job for our model, the script_path specifies the path to the script that contains the model we want to train. The template_path specifies the path to the template script that we want to specify the job with, cargs is a list of constant arguments we want to pass to the script for each tuning.
+# We set slog to None as we don't want to not run jobs if we have already run them before.
+fromsluneimportsbatchit
+script_path='model.py'
+template_path='template.sh'
+sbatchit(script_path,template_path,grid_searcher,cargs=[],slog=None)
+
+
+
+
Now we've submitted our jobs we will wait for them to finish 🕛🕐🕑🕒🕓🕔🕕🕖🕗🕘🕙🕚🕛, now that they are finished we can read the results!
Amazing! 🥳 We have successfully used slune to train our model. I hope this gives you a good flavour of how you can use slune and how easy it is to use!
+
+
Please check out the examples folder for notebooks detailing in more depth some potential ways you can use slune. The docs are not yet up and running 😢 but they are coming soon!
+
+
Roadmap
+
+
+
Make package user friendly:
+
+
Go through automation settings.
+
Code of conduct.
+
Contributing guidelines.
+
Add to pypi.
+Still in early stages! First thing on the horizon is better integration with SLURM:
+
+
Set-up notifications for job completion, failure, etc.
+
Auto job naming, job output naming and job output location saving.
+
Auto save logged results when finishing a job.
+
Automatically re-submit failed jobs.
+
Tools for monitoring and cancelling jobs.
+Then it will be looking at adding more savers, loggers and searchers! For example integration with tensorboard, saving to one csv file (as opposed to a hierarchy of csv files in different directories) and different search methods like random search and cross validation. It would perhaps also be beneficial to be able to interface with other languages like R and Julia. Finally, more helper functions!
+
+
+
However, I am trying to keep this package as bloatless as possible to make it easy for you to tweak and configure to your individual needs. It's written in a simple and compartmentalized manner for this reason. You can of course use the helper functions and let slune handle everything under the hood, but, you can also very quickly and easily write your own classes to work with other savers, loggers and searchers to do as you please.
+
+
Installation
+
+
To install latest version use:
+
+
+
pipinstallslune-lib
+
+
+
+
To install latest dev version use (CURRENTLY RECOMENDED):
+
+
+
# With https
+pipinstall"git+https://github.com/h-aze/slune.git#egg=slune-lib"
+
+
+
+
Class Design
+
+
Here we will outline the different kind of classes that are used in slune and how they interact with each other. There are 3 types:
+
+
+
'Searcher' classes - these are the classes that are used to define and traverse a search space.
+
'Logger' classes - these are the classes that are used to create and read log files.
+
'Saver' classes - these are the classes that are used to save logs to files and read logs from files.
+
+
+
The base module is where the base classes for each of these types are defined. The base classes are:
+
+
+
BaseSearcher
+
BaseLogger
+
BaseSaver
+
+
+
To create a new searcher, logger or saver, you must inherit from the appropriate base class and implement the required methods. The required methods will have the '@abc.abstractmethod' decorator above them and will throw errors if they are not implemented. The compulsory methods allow for well-defined interactions between the different classes and should allow for any combination of searcher, logger and saver to be used together.
+
+
Please read the docs for the base classes to see what methods are required to be implemented and how they should be implemented.
1importabc
+ 2
+ 3classBaseSearcher(metaclass=abc.ABCMeta):
+ 4""" Base class for all Searchers.
+ 5
+ 6 This must be subclassed to create different Searcher classes.
+ 7 Please name your searcher class Searcher<SearcherName>
+ 8 Outlines a protocol for creating a search space and creating configurations from it.
+ 9 Methods document what they should do once implemented.
+ 10
+ 11 """
+ 12@abc.abstractmethod
+ 13def__init__(self,*args,**kwargs):
+ 14""" Initialises the searcher. """
+ 15
+ 16pass
+ 17
+ 18@abc.abstractmethod
+ 19def__len__(self,*args,**kwargs):
+ 20""" Returns the number of configurations defined by the search space of the searcher. """
+ 21
+ 22pass
+ 23
+ 24@abc.abstractmethod
+ 25defnext_tune(self,*args,**kwargs):
+ 26""" Returns the next configuration to try. """
+ 27
+ 28pass
+ 29
+ 30@abc.abstractmethod
+ 31defcheck_existing_runs(self,*args,**kwargs):
+ 32""" Used to tell searcher to check if there are existing runs in storage.
+ 33
+ 34 If there are existing runs, the searcher should skip them
+ 35 based on the number of runs we would like for each job.
+ 36 This may require a 'runs' attribute to be set in the searcher.
+ 37 It will probably also require access to a Saver object,
+ 38 so we can use it's saving protocol to check if there are existing runs.
+ 39 In this case is advised that this function takes a Saver object as an argument,
+ 40 and that the searcher is initialized with a 'runs' attribute.
+ 41
+ 42 """
+ 43
+ 44pass
+ 45
+ 46def__iter__(self):
+ 47""" Makes the searcher iterable, so we can use it in a for loop.
+ 48
+ 49 Feel free to override this method if needed.
+ 50
+ 51 """
+ 52
+ 53returnself
+ 54
+ 55def__next__(self):
+ 56""" Makes the searcher iterable, so we can use it in a for loop.
+ 57
+ 58 Feel free to override this method if needed.
+ 59
+ 60 """
+ 61
+ 62try:
+ 63returnself.next_tune()
+ 64except:
+ 65raiseStopIteration
+ 66
+ 67classBaseLogger(metaclass=abc.ABCMeta):
+ 68""" Base class for all Loggers.
+ 69
+ 70 This must be subclassed to implement different Logger classes.
+ 71 Please name your logger class Logger<LoggerName>.
+ 72 Outlines a protocol for logging metrics and reading from the logs.
+ 73 Methods document what they should do once implemented.
+ 74
+ 75 """
+ 76@abc.abstractmethod
+ 77def__init__(self,*args,**kwargs):
+ 78""" Initialises the logger. """
+ 79
+ 80pass
+ 81
+ 82@abc.abstractmethod
+ 83deflog(self,*args,**kwargs):
+ 84""" Logs the metric/s for the current hyperparameter configuration.
+ 85
+ 86 Should store metrics in some way so we can later save it using a Saver.
+ 87
+ 88 """
+ 89
+ 90pass
+ 91
+ 92@abc.abstractmethod
+ 93defread_log(self,*args,**kwargs):
+ 94""" Returns value of a metric from the log based on a selection criteria. """
+ 95
+ 96pass
+ 97
+ 98classBaseSaver(metaclass=abc.ABCMeta):
+ 99""" Base class for all savers.
+100
+101 This must be subclassed to implement different Saver classes.
+102 Please name your saver class Saver<SaverName>.
+103 Outlines a protocol for saving and reading results to/from storage.
+104 Methods document what they should do once implemented.
+105
+106 """
+107
+108@abc.abstractmethod
+109def__init__(self,logger_instance:BaseLogger,*args,**kwargs):
+110""" Initialises the saver.
+111
+112 Assigns the logger instance to self.logger and makes its methods accessible through self.log and self.read_log.
+113
+114 Args:
+115 - logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
+116
+117 """
+118
+119# Given a class that inherits from BaseLogger we make it accessible through self.logger and make its methods accessible through self.log and self.read_log
+120self.logger=logger_instance
+121self.log=self.logger.log
+122self.read_log=self.logger.read_log
+123
+124@abc.abstractmethod
+125defsave_collated(self,*args,**kwargs):
+126""" Saves the current results in logger to storage. """
+127
+128pass
+129
+130@abc.abstractmethod
+131defread(self,*args,**kwargs):
+132""" Reads results from storage. """
+133
+134pass
+135
+136@abc.abstractmethod
+137defexists(self,*args,**kwargs):
+138""" Checks if results already exist in storage.
+139
+140 Should return integer indicating the number of runs that exist in storage for the given parameters.
+141
+142 """
+143
+144pass
+
+
+
+
+
+
+
+
+ class
+ BaseSearcher:
+
+
+
+
+
+
4classBaseSearcher(metaclass=abc.ABCMeta):
+ 5""" Base class for all Searchers.
+ 6
+ 7 This must be subclassed to create different Searcher classes.
+ 8 Please name your searcher class Searcher<SearcherName>
+ 9 Outlines a protocol for creating a search space and creating configurations from it.
+10 Methods document what they should do once implemented.
+11
+12 """
+13@abc.abstractmethod
+14def__init__(self,*args,**kwargs):
+15""" Initialises the searcher. """
+16
+17pass
+18
+19@abc.abstractmethod
+20def__len__(self,*args,**kwargs):
+21""" Returns the number of configurations defined by the search space of the searcher. """
+22
+23pass
+24
+25@abc.abstractmethod
+26defnext_tune(self,*args,**kwargs):
+27""" Returns the next configuration to try. """
+28
+29pass
+30
+31@abc.abstractmethod
+32defcheck_existing_runs(self,*args,**kwargs):
+33""" Used to tell searcher to check if there are existing runs in storage.
+34
+35 If there are existing runs, the searcher should skip them
+36 based on the number of runs we would like for each job.
+37 This may require a 'runs' attribute to be set in the searcher.
+38 It will probably also require access to a Saver object,
+39 so we can use it's saving protocol to check if there are existing runs.
+40 In this case is advised that this function takes a Saver object as an argument,
+41 and that the searcher is initialized with a 'runs' attribute.
+42
+43 """
+44
+45pass
+46
+47def__iter__(self):
+48""" Makes the searcher iterable, so we can use it in a for loop.
+49
+50 Feel free to override this method if needed.
+51
+52 """
+53
+54returnself
+55
+56def__next__(self):
+57""" Makes the searcher iterable, so we can use it in a for loop.
+58
+59 Feel free to override this method if needed.
+60
+61 """
+62
+63try:
+64returnself.next_tune()
+65except:
+66raiseStopIteration
+
+
+
+
Base class for all Searchers.
+
+
This must be subclassed to create different Searcher classes.
+Please name your searcher class Searcher
+Outlines a protocol for creating a search space and creating configurations from it.
+Methods document what they should do once implemented.
+
+
+
+
+
+
+
@abc.abstractmethod
+
+ BaseSearcher(*args, **kwargs)
+
+
+
+
+
+
13@abc.abstractmethod
+14def__init__(self,*args,**kwargs):
+15""" Initialises the searcher. """
+16
+17pass
+
31@abc.abstractmethod
+32defcheck_existing_runs(self,*args,**kwargs):
+33""" Used to tell searcher to check if there are existing runs in storage.
+34
+35 If there are existing runs, the searcher should skip them
+36 based on the number of runs we would like for each job.
+37 This may require a 'runs' attribute to be set in the searcher.
+38 It will probably also require access to a Saver object,
+39 so we can use it's saving protocol to check if there are existing runs.
+40 In this case is advised that this function takes a Saver object as an argument,
+41 and that the searcher is initialized with a 'runs' attribute.
+42
+43 """
+44
+45pass
+
+
+
+
Used to tell searcher to check if there are existing runs in storage.
+
+
If there are existing runs, the searcher should skip them
+based on the number of runs we would like for each job.
+This may require a 'runs' attribute to be set in the searcher.
+It will probably also require access to a Saver object,
+so we can use it's saving protocol to check if there are existing runs.
+In this case is advised that this function takes a Saver object as an argument,
+and that the searcher is initialized with a 'runs' attribute.
+
+
+
+
+
+
+
+
+
+ class
+ BaseLogger:
+
+
+
+
+
+
68classBaseLogger(metaclass=abc.ABCMeta):
+69""" Base class for all Loggers.
+70
+71 This must be subclassed to implement different Logger classes.
+72 Please name your logger class Logger<LoggerName>.
+73 Outlines a protocol for logging metrics and reading from the logs.
+74 Methods document what they should do once implemented.
+75
+76 """
+77@abc.abstractmethod
+78def__init__(self,*args,**kwargs):
+79""" Initialises the logger. """
+80
+81pass
+82
+83@abc.abstractmethod
+84deflog(self,*args,**kwargs):
+85""" Logs the metric/s for the current hyperparameter configuration.
+86
+87 Should store metrics in some way so we can later save it using a Saver.
+88
+89 """
+90
+91pass
+92
+93@abc.abstractmethod
+94defread_log(self,*args,**kwargs):
+95""" Returns value of a metric from the log based on a selection criteria. """
+96
+97pass
+
+
+
+
Base class for all Loggers.
+
+
This must be subclassed to implement different Logger classes.
+Please name your logger class Logger.
+Outlines a protocol for logging metrics and reading from the logs.
+Methods document what they should do once implemented.
+
+
+
+
+
+
+
@abc.abstractmethod
+
+ BaseLogger(*args, **kwargs)
+
+
+
+
+
+
77@abc.abstractmethod
+78def__init__(self,*args,**kwargs):
+79""" Initialises the logger. """
+80
+81pass
+
+
+
+
Initialises the logger.
+
+
+
+
+
+
+
+
@abc.abstractmethod
+
+ def
+ log(self, *args, **kwargs):
+
+
+
+
+
+
83@abc.abstractmethod
+84deflog(self,*args,**kwargs):
+85""" Logs the metric/s for the current hyperparameter configuration.
+86
+87 Should store metrics in some way so we can later save it using a Saver.
+88
+89 """
+90
+91pass
+
+
+
+
Logs the metric/s for the current hyperparameter configuration.
+
+
Should store metrics in some way so we can later save it using a Saver.
93@abc.abstractmethod
+94defread_log(self,*args,**kwargs):
+95""" Returns value of a metric from the log based on a selection criteria. """
+96
+97pass
+
+
+
+
Returns value of a metric from the log based on a selection criteria.
+
+
+
+
+
+
+
+
+
+ class
+ BaseSaver:
+
+
+
+
+
+
99classBaseSaver(metaclass=abc.ABCMeta):
+100""" Base class for all savers.
+101
+102 This must be subclassed to implement different Saver classes.
+103 Please name your saver class Saver<SaverName>.
+104 Outlines a protocol for saving and reading results to/from storage.
+105 Methods document what they should do once implemented.
+106
+107 """
+108
+109@abc.abstractmethod
+110def__init__(self,logger_instance:BaseLogger,*args,**kwargs):
+111""" Initialises the saver.
+112
+113 Assigns the logger instance to self.logger and makes its methods accessible through self.log and self.read_log.
+114
+115 Args:
+116 - logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
+117
+118 """
+119
+120# Given a class that inherits from BaseLogger we make it accessible through self.logger and make its methods accessible through self.log and self.read_log
+121self.logger=logger_instance
+122self.log=self.logger.log
+123self.read_log=self.logger.read_log
+124
+125@abc.abstractmethod
+126defsave_collated(self,*args,**kwargs):
+127""" Saves the current results in logger to storage. """
+128
+129pass
+130
+131@abc.abstractmethod
+132defread(self,*args,**kwargs):
+133""" Reads results from storage. """
+134
+135pass
+136
+137@abc.abstractmethod
+138defexists(self,*args,**kwargs):
+139""" Checks if results already exist in storage.
+140
+141 Should return integer indicating the number of runs that exist in storage for the given parameters.
+142
+143 """
+144
+145pass
+
+
+
+
Base class for all savers.
+
+
This must be subclassed to implement different Saver classes.
+Please name your saver class Saver.
+Outlines a protocol for saving and reading results to/from storage.
+Methods document what they should do once implemented.
109@abc.abstractmethod
+110def__init__(self,logger_instance:BaseLogger,*args,**kwargs):
+111""" Initialises the saver.
+112
+113 Assigns the logger instance to self.logger and makes its methods accessible through self.log and self.read_log.
+114
+115 Args:
+116 - logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
+117
+118 """
+119
+120# Given a class that inherits from BaseLogger we make it accessible through self.logger and make its methods accessible through self.log and self.read_log
+121self.logger=logger_instance
+122self.log=self.logger.log
+123self.read_log=self.logger.read_log
+
+
+
+
Initialises the saver.
+
+
Assigns the logger instance to self.logger and makes its methods accessible through self.log and self.read_log.
+
+
Arguments:
+
+
+
- logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
137@abc.abstractmethod
+138defexists(self,*args,**kwargs):
+139""" Checks if results already exist in storage.
+140
+141 Should return integer indicating the number of runs that exist in storage for the given parameters.
+142
+143 """
+144
+145pass
+
+
+
+
Checks if results already exist in storage.
+
+
Should return integer indicating the number of runs that exist in storage for the given parameters.
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/.html/src/slune/loggers.html b/docs/.html/src/slune/loggers.html
new file mode 100644
index 0000000..455b6c7
--- /dev/null
+++ b/docs/.html/src/slune/loggers.html
@@ -0,0 +1,244 @@
+
+
+
+
+
+
+ src.slune.loggers API documentation
+
+
+
+
+
+
+
+
+
+
+
+
1importpandasaspd
+ 2fromslune.baseimportBaseLogger
+ 3
+ 4classLoggerDefault(BaseLogger):
+ 5""" Logs metric/s in a data frame.
+ 6
+ 7 Stores the metric/s in a data frame that we can later save in storage.
+ 8 Logs by creating data frame out of the metrics and then appending it to the current results data frame.
+ 9
+10 Attributes:
+11 - results (pd.DataFrame): Data frame containing all the metrics logged so far.
+12 Each row stores all the metrics that were given in a call to the 'log' method,
+13 each column title is a metric name.
+14 The first column is always the time stamp at which 'log' is called.
+15
+16 """
+17
+18def__init__(self,*args,**kwargs):
+19""" Initialises the logger. """
+20
+21super(LoggerDefault,self).__init__(*args,**kwargs)
+22# Raise warning if any arguments are given
+23ifargsorkwargs:
+24raiseWarning(f"Arguments {args} and keyword arguments {kwargs} are ignored")
+25# Initialise results data frame
+26self.results=pd.DataFrame()
+27
+28deflog(self,metrics:dict):
+29""" Logs the metric/s given.
+30
+31 Stores them in a data frame that we can later save in storage.
+32 All metrics provided will be saved as a row in the results data frame,
+33 the first column is always the time stamp at which log is called.
+34
+35 Args:
+36 - metrics (dict): Metrics to be logged, keys are metric names and values are metric values.
+37 Each metric should only have one value! So please log as soon as you get a metric.
+38
+39 """
+40
+41# Get current time stamp
+42time_stamp=pd.Timestamp.now()
+43# Add time stamp to metrics dictionary
+44metrics['time_stamp']=time_stamp
+45# Convert metrics dictionary to a dataframe
+46metrics_df=pd.DataFrame(metrics,index=[0])
+47# Append metrics dataframe to results dataframe
+48self.results=pd.concat([self.results,metrics_df],ignore_index=True)
+49
+50defread_log(self,data_frame:pd.DataFrame,metric_name:str,select_by:str='max')->float:
+51""" Reads log and returns value according to select_by.
+52
+53 Reads the values for given metric for given log and chooses metric value to return based on select_by.
+54
+55 Args:
+56 - data_frame (pd.DataFrame): Data frame containing the metric to be read.
+57 - metric_name (str): Name of the metric to be read.
+58 - select_by (str, optional): How to select the 'best' metric, currently can select by 'min' or 'max'.
+59
+60 Returns:
+61 - value (float): Minimum or maximum value of the metric.
+62
+63 TODO:
+64 - Add more options for select_by.
+65 - Should be able to return other types than float?
+66
+67 """
+68
+69# Get the metric column
+70metric_col=data_frame[metric_name]
+71# Get the index of the minimum or maximum value
+72ifselect_by=='max':
+73index=metric_col.idxmax()
+74elifselect_by=='min':
+75index=metric_col.idxmin()
+76else:
+77raiseValueError(f"select_by must be 'min' or 'max', got {select_by}")
+78# Get the value of the metric
+79value=metric_col.iloc[index]
+80returnvalue
+
+
+
+
+
+
+
+
+ class
+ LoggerDefault(slune.base.BaseLogger):
+
+
+
+
+
+
5classLoggerDefault(BaseLogger):
+ 6""" Logs metric/s in a data frame.
+ 7
+ 8 Stores the metric/s in a data frame that we can later save in storage.
+ 9 Logs by creating data frame out of the metrics and then appending it to the current results data frame.
+10
+11 Attributes:
+12 - results (pd.DataFrame): Data frame containing all the metrics logged so far.
+13 Each row stores all the metrics that were given in a call to the 'log' method,
+14 each column title is a metric name.
+15 The first column is always the time stamp at which 'log' is called.
+16
+17 """
+18
+19def__init__(self,*args,**kwargs):
+20""" Initialises the logger. """
+21
+22super(LoggerDefault,self).__init__(*args,**kwargs)
+23# Raise warning if any arguments are given
+24ifargsorkwargs:
+25raiseWarning(f"Arguments {args} and keyword arguments {kwargs} are ignored")
+26# Initialise results data frame
+27self.results=pd.DataFrame()
+28
+29deflog(self,metrics:dict):
+30""" Logs the metric/s given.
+31
+32 Stores them in a data frame that we can later save in storage.
+33 All metrics provided will be saved as a row in the results data frame,
+34 the first column is always the time stamp at which log is called.
+35
+36 Args:
+37 - metrics (dict): Metrics to be logged, keys are metric names and values are metric values.
+38 Each metric should only have one value! So please log as soon as you get a metric.
+39
+40 """
+41
+42# Get current time stamp
+43time_stamp=pd.Timestamp.now()
+44# Add time stamp to metrics dictionary
+45metrics['time_stamp']=time_stamp
+46# Convert metrics dictionary to a dataframe
+47metrics_df=pd.DataFrame(metrics,index=[0])
+48# Append metrics dataframe to results dataframe
+49self.results=pd.concat([self.results,metrics_df],ignore_index=True)
+50
+51defread_log(self,data_frame:pd.DataFrame,metric_name:str,select_by:str='max')->float:
+52""" Reads log and returns value according to select_by.
+53
+54 Reads the values for given metric for given log and chooses metric value to return based on select_by.
+55
+56 Args:
+57 - data_frame (pd.DataFrame): Data frame containing the metric to be read.
+58 - metric_name (str): Name of the metric to be read.
+59 - select_by (str, optional): How to select the 'best' metric, currently can select by 'min' or 'max'.
+60
+61 Returns:
+62 - value (float): Minimum or maximum value of the metric.
+63
+64 TODO:
+65 - Add more options for select_by.
+66 - Should be able to return other types than float?
+67
+68 """
+69
+70# Get the metric column
+71metric_col=data_frame[metric_name]
+72# Get the index of the minimum or maximum value
+73ifselect_by=='max':
+74index=metric_col.idxmax()
+75elifselect_by=='min':
+76index=metric_col.idxmin()
+77else:
+78raiseValueError(f"select_by must be 'min' or 'max', got {select_by}")
+79# Get the value of the metric
+80value=metric_col.iloc[index]
+81returnvalue
+
+
+
+
Logs metric/s in a data frame.
+
+
Stores the metric/s in a data frame that we can later save in storage.
+Logs by creating data frame out of the metrics and then appending it to the current results data frame.
+
+
Attributes:
+
+
+
- results (pd.DataFrame): Data frame containing all the metrics logged so far.
+Each row stores all the metrics that were given in a call to the 'log' method,
+each column title is a metric name.
+The first column is always the time stamp at which 'log' is called.
+
+
+
+
+
+
+
+
+ LoggerDefault(*args, **kwargs)
+
+
+
+
+
+
19def__init__(self,*args,**kwargs):
+20""" Initialises the logger. """
+21
+22super(LoggerDefault,self).__init__(*args,**kwargs)
+23# Raise warning if any arguments are given
+24ifargsorkwargs:
+25raiseWarning(f"Arguments {args} and keyword arguments {kwargs} are ignored")
+26# Initialise results data frame
+27self.results=pd.DataFrame()
+
+
+
+
Initialises the logger.
+
+
+
+
+
+
+ results
+
+
+
+
+
+
+
+
+
+
+
+
+ def
+ log(self, metrics:dict):
+
+
+
+
+
+
29deflog(self,metrics:dict):
+30""" Logs the metric/s given.
+31
+32 Stores them in a data frame that we can later save in storage.
+33 All metrics provided will be saved as a row in the results data frame,
+34 the first column is always the time stamp at which log is called.
+35
+36 Args:
+37 - metrics (dict): Metrics to be logged, keys are metric names and values are metric values.
+38 Each metric should only have one value! So please log as soon as you get a metric.
+39
+40 """
+41
+42# Get current time stamp
+43time_stamp=pd.Timestamp.now()
+44# Add time stamp to metrics dictionary
+45metrics['time_stamp']=time_stamp
+46# Convert metrics dictionary to a dataframe
+47metrics_df=pd.DataFrame(metrics,index=[0])
+48# Append metrics dataframe to results dataframe
+49self.results=pd.concat([self.results,metrics_df],ignore_index=True)
+
+
+
+
Logs the metric/s given.
+
+
Stores them in a data frame that we can later save in storage.
+All metrics provided will be saved as a row in the results data frame,
+the first column is always the time stamp at which log is called.
+
+
Arguments:
+
+
+
- metrics (dict): Metrics to be logged, keys are metric names and values are metric values.
+Each metric should only have one value! So please log as soon as you get a metric.
51defread_log(self,data_frame:pd.DataFrame,metric_name:str,select_by:str='max')->float:
+52""" Reads log and returns value according to select_by.
+53
+54 Reads the values for given metric for given log and chooses metric value to return based on select_by.
+55
+56 Args:
+57 - data_frame (pd.DataFrame): Data frame containing the metric to be read.
+58 - metric_name (str): Name of the metric to be read.
+59 - select_by (str, optional): How to select the 'best' metric, currently can select by 'min' or 'max'.
+60
+61 Returns:
+62 - value (float): Minimum or maximum value of the metric.
+63
+64 TODO:
+65 - Add more options for select_by.
+66 - Should be able to return other types than float?
+67
+68 """
+69
+70# Get the metric column
+71metric_col=data_frame[metric_name]
+72# Get the index of the minimum or maximum value
+73ifselect_by=='max':
+74index=metric_col.idxmax()
+75elifselect_by=='min':
+76index=metric_col.idxmin()
+77else:
+78raiseValueError(f"select_by must be 'min' or 'max', got {select_by}")
+79# Get the value of the metric
+80value=metric_col.iloc[index]
+81returnvalue
+
+
+
+
Reads log and returns value according to select_by.
+
+
Reads the values for given metric for given log and chooses metric value to return based on select_by.
+
+
Arguments:
+
+
+
- data_frame (pd.DataFrame): Data frame containing the metric to be read.
+
- metric_name (str): Name of the metric to be read.
+
- select_by (str, optional): How to select the 'best' metric, currently can select by 'min' or 'max'.
+
+
+
Returns:
+
+
+
+
value (float): Minimum or maximum value of the metric.
+
+
+
+
TODO:
+ - Add more options for select_by.
+ - Should be able to return other types than float?
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/.html/src/slune/savers.html b/docs/.html/src/slune/savers.html
new file mode 100644
index 0000000..6f8b8fc
--- /dev/null
+++ b/docs/.html/src/slune/savers.html
@@ -0,0 +1,244 @@
+
+
+
+
+
+
+ src.slune.savers API documentation
+
+
+
+
+
+
+
+
+
+
+
+
1fromtypingimportList,Optional
+ 2importos
+ 3importpandasaspd
+ 4fromslune.utilsimportfind_directory_path,get_all_paths,get_numeric_equiv
+ 5fromslune.baseimportBaseSaver,BaseLogger
+ 6importrandom
+ 7importtime
+ 8
+ 9classSaverCsv(BaseSaver):
+ 10""" Saves the results of each run in a CSV file in hierarchy of directories.
+ 11
+ 12 Each directory is named after a parameter - value pair in the form "--parameter_name=value".
+ 13 The paths to csv files then define the configuration under which the results were obtained,
+ 14 for example if we only have one parameter "learning_rate" with value 0.01 used to obtain the results,
+ 15 to save those results we would create a directory named "--learning_rate=0.01" and save the results in a csv file in that directory.
+ 16
+ 17 If we have multiple parameters, for example "learning_rate" with value 0.01 and "batch_size" with value 32,
+ 18 we would create a directory named "--learning_rate=0.01" with a subdirectory named "--batch_size=32",
+ 19 and save the results in a csv file in that subdirectory.
+ 20
+ 21 We use this structure to then read the results from the csv files by searching for the directory that matches the parameters we want,
+ 22 and then reading the csv file in that directory.
+ 23
+ 24 The order in which we create the directories is determined by the order in which the parameters are given,
+ 25 so if we are given ["--learning_rate=0.01", "--batch_size=32"] we would create the directories in the following order:
+ 26 "--learning_rate=0.01/--batch_size=32".
+ 27
+ 28 The directory structure generated will also depend on existing directories in the root directory,
+ 29 if there are existing directories in the root directory that match some subset of the parameters given,
+ 30 we will create the directory tree from the deepest matching directory.
+ 31
+ 32 For example if we only have the following path in the root directory:
+ 33 "--learning_rate=0.01/--batch_size=32"
+ 34 and we are given the parameters ["--learning_rate=0.01", "--batch_size=32", "--num_epochs=10"],
+ 35 we will create the path:
+ 36 "--learning_rate=0.01/--batch_size=32/--num_epochs=10".
+ 37 on the other hand if we are given the parameters ["--learning_rate=0.02", "--num_epochs=10", "--batch_size=32"],
+ 38 we will create the path:
+ 39 "--learning_rate=0.02/--batch_size=32/--num_epochs=10".
+ 40
+ 41 Handles parallel runs trying to create the same directories by waiting a random time (under 1 second) before creating the directory.
+ 42 Should work pretty well in practice, however, may occasionally fail depending on the number of jobs launched at the same time.
+ 43
+ 44 Attributes:
+ 45 - root_dir (str): Path to the root directory where we will store the csv files.
+ 46 - current_path (str): Path to the csv file where we will store the results for the current run.
+ 47
+ 48 """
+ 49
+ 50def__init__(self,logger_instance:BaseLogger,params:List[str]=None,root_dir:Optional[str]=os.path.join('.','tuning_results')):
+ 51""" Initialises the csv saver.
+ 52
+ 53 Args:
+ 54 - logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
+ 55 - params (list, optional): List of strings containing the parameters used, in form ["--parameter_name=parameter_value", ...], default is None.
+ 56 If None, we will create a path using the parameters given in the log.
+ 57 - root_dir (str, optional): Path to the root directory where we will store the csv files, default is './tuning_results'.
+ 58
+ 59 """
+ 60
+ 61super(SaverCsv,self).__init__(logger_instance)
+ 62self.root_dir=root_dir
+ 63ifparams!=None:
+ 64self.current_path=self.get_path(params)
+ 65
+ 66defstrip_params(self,params:List[str])->List[str]:
+ 67""" Strips the parameter values.
+ 68
+ 69 Strips the parameter values from the list of parameters given,
+ 70 ie. ["--parameter_name=parameter_value", ...] -> ["--parameter_name=", ...]
+ 71
+ 72 Also gets rid of blank spaces.
+ 73
+ 74 Args:
+ 75 - params (list of str): List of strings containing the parameters used, in form ["--parameter_name=parameter_value", ...].
+ 76
+ 77 Returns:
+ 78 - stripped_params (list of str): List of strings containing the parameters used, in form ["--parameter_name=", ...].
+ 79
+ 80 """
+ 81
+ 82stripped_params=[p.split('=')[0].strip()forpinparams]
+ 83returnstripped_params
+ 84
+ 85defget_match(self,params:List[str])->str:
+ 86""" Searches the root directory for a path that matches the parameters given.
+ 87
+ 88 If only partial matches are found, returns the deepest matching directory with the missing parameters appended.
+ 89 By deepest we mean the directory with the most parameters matching.
+ 90 If no matches are found creates a path using the parameters.
+ 91 Creates path using parameters in the order they are given,
+ 92 ie. ["--learning_rate=0.01", "--batch_size=32"] -> "--learning_rate=0.01/--batch_size=32".
+ 93
+ 94 If we find a partial match, we add the missing parameters to the end of the path,
+ 95 ie. if we have the path "--learning_rate=0.01" in the root
+ 96 and are given the parameters ["--learning_rate=0.01", "--batch_size=32"],
+ 97 we will create the path "--learning_rate=0.01/--batch_size=32".
+ 98
+ 99 Args:
+100 - params (list of str): List of strings containing the arguments used, in form ["--argument_name=argument_value", ...].
+101
+102 Returns:
+103 - match (str): Path to the directory that matches the parameters given.
+104
+105 """
+106
+107# First check if there is a directory with path matching some subset of the arguments
+108stripped_params=[p.split('=')[0].strip()+'='forpinparams]# Strip the params of whitespace and everything after the '='
+109iflen(set(stripped_params))!=len(stripped_params):
+110raiseValueError(f"Duplicate parameters found in {stripped_params}")
+111match=find_directory_path(stripped_params,root_directory=self.root_dir)
+112# Add on missing parameters
+113ifmatch==self.root_dir:
+114match=os.path.join(*stripped_params)
+115else:
+116missing_params=[pforpinstripped_paramsifpnotinmatch]
+117ifmissing_params!=[]:
+118match=[match]+missing_params
+119match=os.path.join(*match)
+120# Take the root directory out of the match
+121match=match.replace(self.root_dir,'')
+122ifmatch.startswith(os.path.sep):
+123match=match[1:]
+124# Now we add back in the values we stripped out
+125match=match.split(os.path.sep)
+126match=[[pforpinparamsifminp][0]forminmatch]
+127# Check if there is an existing path with the same numerical values, if so use that instead
+128match=get_numeric_equiv(os.path.join(*match),root_directory=self.root_dir)
+129returnmatch
+130
+131defget_path(self,params:List[str])->str:
+132""" Creates a path using the parameters.
+133
+134 Does this by first checking for existing paths in the root directory that match the parameters given.
+135
+136 Check get_match for how we create the path,
+137 once we have the path we check if there is already a csv file with results in that path,
+138 if there is we increment the number of the results file name that we will use.
+139
+140 For example if we get back the path "--learning_rate=0.01/--batch_size=32",
+141 and there exists a csv file named "results_0.csv" in the final directory,
+142 we will name our csv file "results_1.csv".
+143
+144 Args:
+145 - params (list of str): List of strings containing the arguments used, in form ["--argument_name=argument_value", ...].
+146
+147 Returns:
+148 - csv_file_path (str): Path to the csv file where we will store the results for the current run.
+149
+150 """
+151
+152# Check if root directory exists, if not create it
+153ifnotos.path.exists(self.root_dir):
+154time.sleep(random.random())# Wait a random amount of time under 1 second to avoid multiple processes creating the same directory
+155os.makedirs(self.root_dir)
+156# Get path of directory where we should store our csv of results
+157dir_path=self.get_match(params)
+158# Check if directory exists, if not create it
+159ifnotos.path.exists(dir_path):
+160csv_file_number=0
+161# If it does exist, check if there is already a csv file with results,
+162# if there is find the name of the last csv file and increment the number
+163else:
+164csv_files=[fforfinos.listdir(dir_path)iff.endswith('.csv')]
+165iflen(csv_files)>0:
+166last_csv_file=max(csv_files)
+167# Check that the last csv file starts with "results_"
+168ifnotlast_csv_file.startswith('results_'):
+169raiseValueError('Found csv file in directory that doesn\'t start with "results_"')
+170csv_file_number=int(last_csv_file.split('_')[1][:-4])+1
+171else:
+172csv_file_number=0
+173# Create path name for a new csv file where we can later store results
+174csv_file_path=os.path.join(dir_path,f'results_{csv_file_number}.csv')
+175returncsv_file_path
+176
+177defsave_collated_from_results(self,results:pd.DataFrame):
+178""" Saves results to csv file.
+179
+180 If the csv file already exists,
+181 we append the collated results from the logger to the end of the csv file.
+182 If the csv file does not exist,
+183 we create it and save the results to it.
+184
+185 Args:
+186 - results (pd.DataFrame): Data frame containing the results to be saved.
+187
+188 TODO:
+189 - Could be making to many assumptions about the format in which we get the results from the logger,
+190 should be able to work with any logger.
+191 We should only be assuming that we are saving results to a csv file.
+192
+193 """
+194
+195# If path does not exist, create it
+196# Remove the csv file name from the path
+197dir_path=self.current_path.split(os.path.sep)[:-1]
+198dir_path=os.path.join(*dir_path)
+199ifnotos.path.exists(dir_path):
+200time.sleep(random.random())# Wait a random amount of time under 1 second to avoid multiple processes creating the same directory
+201os.makedirs(dir_path)
+202# If csv file already exists, append results to the end
+203ifos.path.exists(self.current_path):
+204results=pd.concat([pd.read_csv(self.current_path),results])
+205results.to_csv(self.current_path,mode='w',index=False)
+206# If csv file does not exist, create it
+207else:
+208results.to_csv(self.current_path,index=False)
+209
+210defsave_collated(self):
+211""" Saves results to csv file. """
+212
+213self.save_collated_from_results(self.logger.results)
+214
+215defread(self,params:List[str],metric_name:str,select_by:str='max',avg:bool=True)->(List[str],float):
+216""" Finds the min/max value of a metric from all csv files in the root directory that match the parameters given.
+217
+218 Args:
+219 - params (list of str): Contains the parameters used, in form ["--parameter_name=parameter_value", ...].
+220 - metric_name (string): Name of the metric to be read.
+221 - select_by (string, optional): How to select the 'best' value for the metric from a log file, currently can select by 'min' or 'max'.
+222 - avg (bool, optional): Whether to average the metric over all runs, default is True.
+223
+224 Returns:
+225 - best_params (list of str): Contains the arguments used to get the 'best' value of the metric (determined by select_by).
+226 - best_value (float): Best value of the metric (determined by select_by).
+227
+228 """
+229
+230# Get all paths that match the parameters given
+231paths=get_all_paths(params,root_directory=self.root_dir)
+232ifpaths==[]:
+233raiseValueError(f"No paths found matching {params}")
+234# Read the metric from each path
+235values={}
+236# Do averaging for different runs of same params if avg is True, otherwise just read the metric from each path
+237ifavg:
+238paths_same_params=set([os.path.join(*p.split(os.path.sep)[:-1])forpinpaths])
+239forpathinpaths_same_params:
+240runs=get_all_paths(path.split(os.path.sep),root_directory=self.root_dir)
+241cumsum=0
+242forrinruns:
+243df=pd.read_csv(r)
+244cumsum+=self.read_log(df,metric_name,select_by)
+245avg_of_runs=cumsum/len(runs)
+246values[path]=avg_of_runs
+247else:
+248forpathinpaths:
+249df=pd.read_csv(path)
+250values[os.path.join(*path.split(os.path.sep)[:-1])]=self.read_log(df,metric_name,select_by)
+251# Get the key of the min/max value
+252ifselect_by=='min':
+253best_params=min(values,key=values.get)
+254elifselect_by=='max':
+255best_params=max(values,key=values.get)
+256else:
+257raiseValueError(f"select_by must be 'min' or 'max', got {select_by}")
+258# Find the best value of the metric from the key
+259best_value=values[best_params]
+260# Format the path into a list of arguments
+261best_params=best_params.replace(self.root_dir,'')
+262ifbest_params.startswith(os.path.sep):
+263best_params=best_params[1:]
+264best_params=best_params.split(os.path.sep)
+265returnbest_params,best_value
+266
+267defexists(self,params:List[str])->int:
+268""" Checks if results already exist in storage.
+269
+270 Args:
+271 - params (list of str): Contains the parameters used, in form ["--parameter_name=parameter_value", ...].
+272
+273 Returns:
+274 - num_runs (int): Number of runs that exist in storage for the given parameters.
+275
+276 """
+277
+278# Get all paths that match the parameters given
+279paths=get_all_paths(params,root_directory=self.root_dir)
+280returnlen(paths)
+281
+282defget_current_path(self)->str:
+283""" Getter function for the current_path attribute.
+284
+285 Returns:
+286 - current_path (str): Path to the csv file where we will store the results for the current run.
+287
+288 """
+289
+290returnself.current_path
+
+
+
+
+
+
+
+
+ class
+ SaverCsv(slune.base.BaseSaver):
+
+
+
+
+
+
10classSaverCsv(BaseSaver):
+ 11""" Saves the results of each run in a CSV file in hierarchy of directories.
+ 12
+ 13 Each directory is named after a parameter - value pair in the form "--parameter_name=value".
+ 14 The paths to csv files then define the configuration under which the results were obtained,
+ 15 for example if we only have one parameter "learning_rate" with value 0.01 used to obtain the results,
+ 16 to save those results we would create a directory named "--learning_rate=0.01" and save the results in a csv file in that directory.
+ 17
+ 18 If we have multiple parameters, for example "learning_rate" with value 0.01 and "batch_size" with value 32,
+ 19 we would create a directory named "--learning_rate=0.01" with a subdirectory named "--batch_size=32",
+ 20 and save the results in a csv file in that subdirectory.
+ 21
+ 22 We use this structure to then read the results from the csv files by searching for the directory that matches the parameters we want,
+ 23 and then reading the csv file in that directory.
+ 24
+ 25 The order in which we create the directories is determined by the order in which the parameters are given,
+ 26 so if we are given ["--learning_rate=0.01", "--batch_size=32"] we would create the directories in the following order:
+ 27 "--learning_rate=0.01/--batch_size=32".
+ 28
+ 29 The directory structure generated will also depend on existing directories in the root directory,
+ 30 if there are existing directories in the root directory that match some subset of the parameters given,
+ 31 we will create the directory tree from the deepest matching directory.
+ 32
+ 33 For example if we only have the following path in the root directory:
+ 34 "--learning_rate=0.01/--batch_size=32"
+ 35 and we are given the parameters ["--learning_rate=0.01", "--batch_size=32", "--num_epochs=10"],
+ 36 we will create the path:
+ 37 "--learning_rate=0.01/--batch_size=32/--num_epochs=10".
+ 38 on the other hand if we are given the parameters ["--learning_rate=0.02", "--num_epochs=10", "--batch_size=32"],
+ 39 we will create the path:
+ 40 "--learning_rate=0.02/--batch_size=32/--num_epochs=10".
+ 41
+ 42 Handles parallel runs trying to create the same directories by waiting a random time (under 1 second) before creating the directory.
+ 43 Should work pretty well in practice, however, may occasionally fail depending on the number of jobs launched at the same time.
+ 44
+ 45 Attributes:
+ 46 - root_dir (str): Path to the root directory where we will store the csv files.
+ 47 - current_path (str): Path to the csv file where we will store the results for the current run.
+ 48
+ 49 """
+ 50
+ 51def__init__(self,logger_instance:BaseLogger,params:List[str]=None,root_dir:Optional[str]=os.path.join('.','tuning_results')):
+ 52""" Initialises the csv saver.
+ 53
+ 54 Args:
+ 55 - logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
+ 56 - params (list, optional): List of strings containing the parameters used, in form ["--parameter_name=parameter_value", ...], default is None.
+ 57 If None, we will create a path using the parameters given in the log.
+ 58 - root_dir (str, optional): Path to the root directory where we will store the csv files, default is './tuning_results'.
+ 59
+ 60 """
+ 61
+ 62super(SaverCsv,self).__init__(logger_instance)
+ 63self.root_dir=root_dir
+ 64ifparams!=None:
+ 65self.current_path=self.get_path(params)
+ 66
+ 67defstrip_params(self,params:List[str])->List[str]:
+ 68""" Strips the parameter values.
+ 69
+ 70 Strips the parameter values from the list of parameters given,
+ 71 ie. ["--parameter_name=parameter_value", ...] -> ["--parameter_name=", ...]
+ 72
+ 73 Also gets rid of blank spaces.
+ 74
+ 75 Args:
+ 76 - params (list of str): List of strings containing the parameters used, in form ["--parameter_name=parameter_value", ...].
+ 77
+ 78 Returns:
+ 79 - stripped_params (list of str): List of strings containing the parameters used, in form ["--parameter_name=", ...].
+ 80
+ 81 """
+ 82
+ 83stripped_params=[p.split('=')[0].strip()forpinparams]
+ 84returnstripped_params
+ 85
+ 86defget_match(self,params:List[str])->str:
+ 87""" Searches the root directory for a path that matches the parameters given.
+ 88
+ 89 If only partial matches are found, returns the deepest matching directory with the missing parameters appended.
+ 90 By deepest we mean the directory with the most parameters matching.
+ 91 If no matches are found creates a path using the parameters.
+ 92 Creates path using parameters in the order they are given,
+ 93 ie. ["--learning_rate=0.01", "--batch_size=32"] -> "--learning_rate=0.01/--batch_size=32".
+ 94
+ 95 If we find a partial match, we add the missing parameters to the end of the path,
+ 96 ie. if we have the path "--learning_rate=0.01" in the root
+ 97 and are given the parameters ["--learning_rate=0.01", "--batch_size=32"],
+ 98 we will create the path "--learning_rate=0.01/--batch_size=32".
+ 99
+100 Args:
+101 - params (list of str): List of strings containing the arguments used, in form ["--argument_name=argument_value", ...].
+102
+103 Returns:
+104 - match (str): Path to the directory that matches the parameters given.
+105
+106 """
+107
+108# First check if there is a directory with path matching some subset of the arguments
+109stripped_params=[p.split('=')[0].strip()+'='forpinparams]# Strip the params of whitespace and everything after the '='
+110iflen(set(stripped_params))!=len(stripped_params):
+111raiseValueError(f"Duplicate parameters found in {stripped_params}")
+112match=find_directory_path(stripped_params,root_directory=self.root_dir)
+113# Add on missing parameters
+114ifmatch==self.root_dir:
+115match=os.path.join(*stripped_params)
+116else:
+117missing_params=[pforpinstripped_paramsifpnotinmatch]
+118ifmissing_params!=[]:
+119match=[match]+missing_params
+120match=os.path.join(*match)
+121# Take the root directory out of the match
+122match=match.replace(self.root_dir,'')
+123ifmatch.startswith(os.path.sep):
+124match=match[1:]
+125# Now we add back in the values we stripped out
+126match=match.split(os.path.sep)
+127match=[[pforpinparamsifminp][0]forminmatch]
+128# Check if there is an existing path with the same numerical values, if so use that instead
+129match=get_numeric_equiv(os.path.join(*match),root_directory=self.root_dir)
+130returnmatch
+131
+132defget_path(self,params:List[str])->str:
+133""" Creates a path using the parameters.
+134
+135 Does this by first checking for existing paths in the root directory that match the parameters given.
+136
+137 Check get_match for how we create the path,
+138 once we have the path we check if there is already a csv file with results in that path,
+139 if there is we increment the number of the results file name that we will use.
+140
+141 For example if we get back the path "--learning_rate=0.01/--batch_size=32",
+142 and there exists a csv file named "results_0.csv" in the final directory,
+143 we will name our csv file "results_1.csv".
+144
+145 Args:
+146 - params (list of str): List of strings containing the arguments used, in form ["--argument_name=argument_value", ...].
+147
+148 Returns:
+149 - csv_file_path (str): Path to the csv file where we will store the results for the current run.
+150
+151 """
+152
+153# Check if root directory exists, if not create it
+154ifnotos.path.exists(self.root_dir):
+155time.sleep(random.random())# Wait a random amount of time under 1 second to avoid multiple processes creating the same directory
+156os.makedirs(self.root_dir)
+157# Get path of directory where we should store our csv of results
+158dir_path=self.get_match(params)
+159# Check if directory exists, if not create it
+160ifnotos.path.exists(dir_path):
+161csv_file_number=0
+162# If it does exist, check if there is already a csv file with results,
+163# if there is find the name of the last csv file and increment the number
+164else:
+165csv_files=[fforfinos.listdir(dir_path)iff.endswith('.csv')]
+166iflen(csv_files)>0:
+167last_csv_file=max(csv_files)
+168# Check that the last csv file starts with "results_"
+169ifnotlast_csv_file.startswith('results_'):
+170raiseValueError('Found csv file in directory that doesn\'t start with "results_"')
+171csv_file_number=int(last_csv_file.split('_')[1][:-4])+1
+172else:
+173csv_file_number=0
+174# Create path name for a new csv file where we can later store results
+175csv_file_path=os.path.join(dir_path,f'results_{csv_file_number}.csv')
+176returncsv_file_path
+177
+178defsave_collated_from_results(self,results:pd.DataFrame):
+179""" Saves results to csv file.
+180
+181 If the csv file already exists,
+182 we append the collated results from the logger to the end of the csv file.
+183 If the csv file does not exist,
+184 we create it and save the results to it.
+185
+186 Args:
+187 - results (pd.DataFrame): Data frame containing the results to be saved.
+188
+189 TODO:
+190 - Could be making to many assumptions about the format in which we get the results from the logger,
+191 should be able to work with any logger.
+192 We should only be assuming that we are saving results to a csv file.
+193
+194 """
+195
+196# If path does not exist, create it
+197# Remove the csv file name from the path
+198dir_path=self.current_path.split(os.path.sep)[:-1]
+199dir_path=os.path.join(*dir_path)
+200ifnotos.path.exists(dir_path):
+201time.sleep(random.random())# Wait a random amount of time under 1 second to avoid multiple processes creating the same directory
+202os.makedirs(dir_path)
+203# If csv file already exists, append results to the end
+204ifos.path.exists(self.current_path):
+205results=pd.concat([pd.read_csv(self.current_path),results])
+206results.to_csv(self.current_path,mode='w',index=False)
+207# If csv file does not exist, create it
+208else:
+209results.to_csv(self.current_path,index=False)
+210
+211defsave_collated(self):
+212""" Saves results to csv file. """
+213
+214self.save_collated_from_results(self.logger.results)
+215
+216defread(self,params:List[str],metric_name:str,select_by:str='max',avg:bool=True)->(List[str],float):
+217""" Finds the min/max value of a metric from all csv files in the root directory that match the parameters given.
+218
+219 Args:
+220 - params (list of str): Contains the parameters used, in form ["--parameter_name=parameter_value", ...].
+221 - metric_name (string): Name of the metric to be read.
+222 - select_by (string, optional): How to select the 'best' value for the metric from a log file, currently can select by 'min' or 'max'.
+223 - avg (bool, optional): Whether to average the metric over all runs, default is True.
+224
+225 Returns:
+226 - best_params (list of str): Contains the arguments used to get the 'best' value of the metric (determined by select_by).
+227 - best_value (float): Best value of the metric (determined by select_by).
+228
+229 """
+230
+231# Get all paths that match the parameters given
+232paths=get_all_paths(params,root_directory=self.root_dir)
+233ifpaths==[]:
+234raiseValueError(f"No paths found matching {params}")
+235# Read the metric from each path
+236values={}
+237# Do averaging for different runs of same params if avg is True, otherwise just read the metric from each path
+238ifavg:
+239paths_same_params=set([os.path.join(*p.split(os.path.sep)[:-1])forpinpaths])
+240forpathinpaths_same_params:
+241runs=get_all_paths(path.split(os.path.sep),root_directory=self.root_dir)
+242cumsum=0
+243forrinruns:
+244df=pd.read_csv(r)
+245cumsum+=self.read_log(df,metric_name,select_by)
+246avg_of_runs=cumsum/len(runs)
+247values[path]=avg_of_runs
+248else:
+249forpathinpaths:
+250df=pd.read_csv(path)
+251values[os.path.join(*path.split(os.path.sep)[:-1])]=self.read_log(df,metric_name,select_by)
+252# Get the key of the min/max value
+253ifselect_by=='min':
+254best_params=min(values,key=values.get)
+255elifselect_by=='max':
+256best_params=max(values,key=values.get)
+257else:
+258raiseValueError(f"select_by must be 'min' or 'max', got {select_by}")
+259# Find the best value of the metric from the key
+260best_value=values[best_params]
+261# Format the path into a list of arguments
+262best_params=best_params.replace(self.root_dir,'')
+263ifbest_params.startswith(os.path.sep):
+264best_params=best_params[1:]
+265best_params=best_params.split(os.path.sep)
+266returnbest_params,best_value
+267
+268defexists(self,params:List[str])->int:
+269""" Checks if results already exist in storage.
+270
+271 Args:
+272 - params (list of str): Contains the parameters used, in form ["--parameter_name=parameter_value", ...].
+273
+274 Returns:
+275 - num_runs (int): Number of runs that exist in storage for the given parameters.
+276
+277 """
+278
+279# Get all paths that match the parameters given
+280paths=get_all_paths(params,root_directory=self.root_dir)
+281returnlen(paths)
+282
+283defget_current_path(self)->str:
+284""" Getter function for the current_path attribute.
+285
+286 Returns:
+287 - current_path (str): Path to the csv file where we will store the results for the current run.
+288
+289 """
+290
+291returnself.current_path
+
+
+
+
Saves the results of each run in a CSV file in hierarchy of directories.
+
+
Each directory is named after a parameter - value pair in the form "--parameter_name=value".
+The paths to csv files then define the configuration under which the results were obtained,
+for example if we only have one parameter "learning_rate" with value 0.01 used to obtain the results,
+to save those results we would create a directory named "--learning_rate=0.01" and save the results in a csv file in that directory.
+
+
If we have multiple parameters, for example "learning_rate" with value 0.01 and "batch_size" with value 32,
+we would create a directory named "--learning_rate=0.01" with a subdirectory named "--batch_size=32",
+and save the results in a csv file in that subdirectory.
+
+
We use this structure to then read the results from the csv files by searching for the directory that matches the parameters we want,
+and then reading the csv file in that directory.
+
+
The order in which we create the directories is determined by the order in which the parameters are given,
+so if we are given ["--learning_rate=0.01", "--batch_size=32"] we would create the directories in the following order:
+"--learning_rate=0.01/--batch_size=32".
+
+
The directory structure generated will also depend on existing directories in the root directory,
+if there are existing directories in the root directory that match some subset of the parameters given,
+we will create the directory tree from the deepest matching directory.
+
+
For example if we only have the following path in the root directory:
+"--learning_rate=0.01/--batch_size=32"
+and we are given the parameters ["--learning_rate=0.01", "--batch_size=32", "--num_epochs=10"],
+we will create the path:
+"--learning_rate=0.01/--batch_size=32/--num_epochs=10".
+on the other hand if we are given the parameters ["--learning_rate=0.02", "--num_epochs=10", "--batch_size=32"],
+we will create the path:
+"--learning_rate=0.02/--batch_size=32/--num_epochs=10".
+
+
Handles parallel runs trying to create the same directories by waiting a random time (under 1 second) before creating the directory.
+Should work pretty well in practice, however, may occasionally fail depending on the number of jobs launched at the same time.
+
+
Attributes:
+
+
+
- root_dir (str): Path to the root directory where we will store the csv files.
+
- current_path (str): Path to the csv file where we will store the results for the current run.
51def__init__(self,logger_instance:BaseLogger,params:List[str]=None,root_dir:Optional[str]=os.path.join('.','tuning_results')):
+52""" Initialises the csv saver.
+53
+54 Args:
+55 - logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
+56 - params (list, optional): List of strings containing the parameters used, in form ["--parameter_name=parameter_value", ...], default is None.
+57 If None, we will create a path using the parameters given in the log.
+58 - root_dir (str, optional): Path to the root directory where we will store the csv files, default is './tuning_results'.
+59
+60 """
+61
+62super(SaverCsv,self).__init__(logger_instance)
+63self.root_dir=root_dir
+64ifparams!=None:
+65self.current_path=self.get_path(params)
+
+
+
+
Initialises the csv saver.
+
+
Arguments:
+
+
+
- logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
+
- params (list, optional): List of strings containing the parameters used, in form ["--parameter_name=parameter_value", ...], default is None.
+If None, we will create a path using the parameters given in the log.
+
- root_dir (str, optional): Path to the root directory where we will store the csv files, default is './tuning_results'.
67defstrip_params(self,params:List[str])->List[str]:
+68""" Strips the parameter values.
+69
+70 Strips the parameter values from the list of parameters given,
+71 ie. ["--parameter_name=parameter_value", ...] -> ["--parameter_name=", ...]
+72
+73 Also gets rid of blank spaces.
+74
+75 Args:
+76 - params (list of str): List of strings containing the parameters used, in form ["--parameter_name=parameter_value", ...].
+77
+78 Returns:
+79 - stripped_params (list of str): List of strings containing the parameters used, in form ["--parameter_name=", ...].
+80
+81 """
+82
+83stripped_params=[p.split('=')[0].strip()forpinparams]
+84returnstripped_params
+
+
+
+
Strips the parameter values.
+
+
Strips the parameter values from the list of parameters given,
+ie. ["--parameter_name=parameter_value", ...] -> ["--parameter_name=", ...]
+
+
Also gets rid of blank spaces.
+
+
Arguments:
+
+
+
- params (list of str): List of strings containing the parameters used, in form ["--parameter_name=parameter_value", ...].
+
+
+
Returns:
+
+
+
+
stripped_params (list of str): List of strings containing the parameters used, in form ["--parameter_name=", ...].
86defget_match(self,params:List[str])->str:
+ 87""" Searches the root directory for a path that matches the parameters given.
+ 88
+ 89 If only partial matches are found, returns the deepest matching directory with the missing parameters appended.
+ 90 By deepest we mean the directory with the most parameters matching.
+ 91 If no matches are found creates a path using the parameters.
+ 92 Creates path using parameters in the order they are given,
+ 93 ie. ["--learning_rate=0.01", "--batch_size=32"] -> "--learning_rate=0.01/--batch_size=32".
+ 94
+ 95 If we find a partial match, we add the missing parameters to the end of the path,
+ 96 ie. if we have the path "--learning_rate=0.01" in the root
+ 97 and are given the parameters ["--learning_rate=0.01", "--batch_size=32"],
+ 98 we will create the path "--learning_rate=0.01/--batch_size=32".
+ 99
+100 Args:
+101 - params (list of str): List of strings containing the arguments used, in form ["--argument_name=argument_value", ...].
+102
+103 Returns:
+104 - match (str): Path to the directory that matches the parameters given.
+105
+106 """
+107
+108# First check if there is a directory with path matching some subset of the arguments
+109stripped_params=[p.split('=')[0].strip()+'='forpinparams]# Strip the params of whitespace and everything after the '='
+110iflen(set(stripped_params))!=len(stripped_params):
+111raiseValueError(f"Duplicate parameters found in {stripped_params}")
+112match=find_directory_path(stripped_params,root_directory=self.root_dir)
+113# Add on missing parameters
+114ifmatch==self.root_dir:
+115match=os.path.join(*stripped_params)
+116else:
+117missing_params=[pforpinstripped_paramsifpnotinmatch]
+118ifmissing_params!=[]:
+119match=[match]+missing_params
+120match=os.path.join(*match)
+121# Take the root directory out of the match
+122match=match.replace(self.root_dir,'')
+123ifmatch.startswith(os.path.sep):
+124match=match[1:]
+125# Now we add back in the values we stripped out
+126match=match.split(os.path.sep)
+127match=[[pforpinparamsifminp][0]forminmatch]
+128# Check if there is an existing path with the same numerical values, if so use that instead
+129match=get_numeric_equiv(os.path.join(*match),root_directory=self.root_dir)
+130returnmatch
+
+
+
+
Searches the root directory for a path that matches the parameters given.
+
+
If only partial matches are found, returns the deepest matching directory with the missing parameters appended.
+By deepest we mean the directory with the most parameters matching.
+If no matches are found creates a path using the parameters.
+Creates path using parameters in the order they are given,
+ie. ["--learning_rate=0.01", "--batch_size=32"] -> "--learning_rate=0.01/--batch_size=32".
+
+
If we find a partial match, we add the missing parameters to the end of the path,
+ie. if we have the path "--learning_rate=0.01" in the root
+and are given the parameters ["--learning_rate=0.01", "--batch_size=32"],
+we will create the path "--learning_rate=0.01/--batch_size=32".
+
+
Arguments:
+
+
+
- params (list of str): List of strings containing the arguments used, in form ["--argument_name=argument_value", ...].
+
+
+
Returns:
+
+
+
+
match (str): Path to the directory that matches the parameters given.
132defget_path(self,params:List[str])->str:
+133""" Creates a path using the parameters.
+134
+135 Does this by first checking for existing paths in the root directory that match the parameters given.
+136
+137 Check get_match for how we create the path,
+138 once we have the path we check if there is already a csv file with results in that path,
+139 if there is we increment the number of the results file name that we will use.
+140
+141 For example if we get back the path "--learning_rate=0.01/--batch_size=32",
+142 and there exists a csv file named "results_0.csv" in the final directory,
+143 we will name our csv file "results_1.csv".
+144
+145 Args:
+146 - params (list of str): List of strings containing the arguments used, in form ["--argument_name=argument_value", ...].
+147
+148 Returns:
+149 - csv_file_path (str): Path to the csv file where we will store the results for the current run.
+150
+151 """
+152
+153# Check if root directory exists, if not create it
+154ifnotos.path.exists(self.root_dir):
+155time.sleep(random.random())# Wait a random amount of time under 1 second to avoid multiple processes creating the same directory
+156os.makedirs(self.root_dir)
+157# Get path of directory where we should store our csv of results
+158dir_path=self.get_match(params)
+159# Check if directory exists, if not create it
+160ifnotos.path.exists(dir_path):
+161csv_file_number=0
+162# If it does exist, check if there is already a csv file with results,
+163# if there is find the name of the last csv file and increment the number
+164else:
+165csv_files=[fforfinos.listdir(dir_path)iff.endswith('.csv')]
+166iflen(csv_files)>0:
+167last_csv_file=max(csv_files)
+168# Check that the last csv file starts with "results_"
+169ifnotlast_csv_file.startswith('results_'):
+170raiseValueError('Found csv file in directory that doesn\'t start with "results_"')
+171csv_file_number=int(last_csv_file.split('_')[1][:-4])+1
+172else:
+173csv_file_number=0
+174# Create path name for a new csv file where we can later store results
+175csv_file_path=os.path.join(dir_path,f'results_{csv_file_number}.csv')
+176returncsv_file_path
+
+
+
+
Creates a path using the parameters.
+
+
Does this by first checking for existing paths in the root directory that match the parameters given.
+
+
Check get_match for how we create the path,
+once we have the path we check if there is already a csv file with results in that path,
+if there is we increment the number of the results file name that we will use.
+
+
For example if we get back the path "--learning_rate=0.01/--batch_size=32",
+and there exists a csv file named "results_0.csv" in the final directory,
+we will name our csv file "results_1.csv".
+
+
Arguments:
+
+
+
- params (list of str): List of strings containing the arguments used, in form ["--argument_name=argument_value", ...].
+
+
+
Returns:
+
+
+
+
csv_file_path (str): Path to the csv file where we will store the results for the current run.
178defsave_collated_from_results(self,results:pd.DataFrame):
+179""" Saves results to csv file.
+180
+181 If the csv file already exists,
+182 we append the collated results from the logger to the end of the csv file.
+183 If the csv file does not exist,
+184 we create it and save the results to it.
+185
+186 Args:
+187 - results (pd.DataFrame): Data frame containing the results to be saved.
+188
+189 TODO:
+190 - Could be making to many assumptions about the format in which we get the results from the logger,
+191 should be able to work with any logger.
+192 We should only be assuming that we are saving results to a csv file.
+193
+194 """
+195
+196# If path does not exist, create it
+197# Remove the csv file name from the path
+198dir_path=self.current_path.split(os.path.sep)[:-1]
+199dir_path=os.path.join(*dir_path)
+200ifnotos.path.exists(dir_path):
+201time.sleep(random.random())# Wait a random amount of time under 1 second to avoid multiple processes creating the same directory
+202os.makedirs(dir_path)
+203# If csv file already exists, append results to the end
+204ifos.path.exists(self.current_path):
+205results=pd.concat([pd.read_csv(self.current_path),results])
+206results.to_csv(self.current_path,mode='w',index=False)
+207# If csv file does not exist, create it
+208else:
+209results.to_csv(self.current_path,index=False)
+
+
+
+
Saves results to csv file.
+
+
If the csv file already exists,
+we append the collated results from the logger to the end of the csv file.
+If the csv file does not exist,
+we create it and save the results to it.
+
+
Arguments:
+
+
+
- results (pd.DataFrame): Data frame containing the results to be saved.
+
+
+
TODO:
+ - Could be making to many assumptions about the format in which we get the results from the logger,
+ should be able to work with any logger.
+ We should only be assuming that we are saving results to a csv file.
216defread(self,params:List[str],metric_name:str,select_by:str='max',avg:bool=True)->(List[str],float):
+217""" Finds the min/max value of a metric from all csv files in the root directory that match the parameters given.
+218
+219 Args:
+220 - params (list of str): Contains the parameters used, in form ["--parameter_name=parameter_value", ...].
+221 - metric_name (string): Name of the metric to be read.
+222 - select_by (string, optional): How to select the 'best' value for the metric from a log file, currently can select by 'min' or 'max'.
+223 - avg (bool, optional): Whether to average the metric over all runs, default is True.
+224
+225 Returns:
+226 - best_params (list of str): Contains the arguments used to get the 'best' value of the metric (determined by select_by).
+227 - best_value (float): Best value of the metric (determined by select_by).
+228
+229 """
+230
+231# Get all paths that match the parameters given
+232paths=get_all_paths(params,root_directory=self.root_dir)
+233ifpaths==[]:
+234raiseValueError(f"No paths found matching {params}")
+235# Read the metric from each path
+236values={}
+237# Do averaging for different runs of same params if avg is True, otherwise just read the metric from each path
+238ifavg:
+239paths_same_params=set([os.path.join(*p.split(os.path.sep)[:-1])forpinpaths])
+240forpathinpaths_same_params:
+241runs=get_all_paths(path.split(os.path.sep),root_directory=self.root_dir)
+242cumsum=0
+243forrinruns:
+244df=pd.read_csv(r)
+245cumsum+=self.read_log(df,metric_name,select_by)
+246avg_of_runs=cumsum/len(runs)
+247values[path]=avg_of_runs
+248else:
+249forpathinpaths:
+250df=pd.read_csv(path)
+251values[os.path.join(*path.split(os.path.sep)[:-1])]=self.read_log(df,metric_name,select_by)
+252# Get the key of the min/max value
+253ifselect_by=='min':
+254best_params=min(values,key=values.get)
+255elifselect_by=='max':
+256best_params=max(values,key=values.get)
+257else:
+258raiseValueError(f"select_by must be 'min' or 'max', got {select_by}")
+259# Find the best value of the metric from the key
+260best_value=values[best_params]
+261# Format the path into a list of arguments
+262best_params=best_params.replace(self.root_dir,'')
+263ifbest_params.startswith(os.path.sep):
+264best_params=best_params[1:]
+265best_params=best_params.split(os.path.sep)
+266returnbest_params,best_value
+
+
+
+
Finds the min/max value of a metric from all csv files in the root directory that match the parameters given.
+
+
Arguments:
+
+
+
- params (list of str): Contains the parameters used, in form ["--parameter_name=parameter_value", ...].
+
- metric_name (string): Name of the metric to be read.
+
- select_by (string, optional): How to select the 'best' value for the metric from a log file, currently can select by 'min' or 'max'.
+
- avg (bool, optional): Whether to average the metric over all runs, default is True.
+
+
+
Returns:
+
+
+
+
best_params (list of str): Contains the arguments used to get the 'best' value of the metric (determined by select_by).
+
best_value (float): Best value of the metric (determined by select_by).
268defexists(self,params:List[str])->int:
+269""" Checks if results already exist in storage.
+270
+271 Args:
+272 - params (list of str): Contains the parameters used, in form ["--parameter_name=parameter_value", ...].
+273
+274 Returns:
+275 - num_runs (int): Number of runs that exist in storage for the given parameters.
+276
+277 """
+278
+279# Get all paths that match the parameters given
+280paths=get_all_paths(params,root_directory=self.root_dir)
+281returnlen(paths)
+
+
+
+
Checks if results already exist in storage.
+
+
Arguments:
+
+
+
- params (list of str): Contains the parameters used, in form ["--parameter_name=parameter_value", ...].
+
+
+
Returns:
+
+
+
+
num_runs (int): Number of runs that exist in storage for the given parameters.
+
+
+
+
+
+
+
+
+
+
+ def
+ get_current_path(self) -> str:
+
+
+
+
+
+
283defget_current_path(self)->str:
+284""" Getter function for the current_path attribute.
+285
+286 Returns:
+287 - current_path (str): Path to the csv file where we will store the results for the current run.
+288
+289 """
+290
+291returnself.current_path
+
+
+
+
Getter function for the current_path attribute.
+
+
Returns:
+
+
+
+
current_path (str): Path to the csv file where we will store the results for the current run.
+
+
+
+
+
+
+
+
Inherited Members
+
+
slune.base.BaseSaver
+
logger
+
log
+
read_log
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/.html/src/slune/searchers.html b/docs/.html/src/slune/searchers.html
new file mode 100644
index 0000000..95b7faf
--- /dev/null
+++ b/docs/.html/src/slune/searchers.html
@@ -0,0 +1,244 @@
+
+
+
+
+
+
+ src.slune.searchers API documentation
+
+
+
+
+
+
+
+
+
+
+
+
1fromtypingimportList,Tuple
+ 2fromslune.baseimportBaseSearcher,BaseSaver
+ 3fromslune.utilsimportdict_to_strings
+ 4
+ 5classSearcherGrid(BaseSearcher):
+ 6""" Searcher for grid search.
+ 7
+ 8 Given dictionary of parameters and values to try, creates grid of all possible configurations,
+ 9 and returns them one by one for each call to next_tune.
+ 10
+ 11 Attributes:
+ 12 - configs (dict): Parameters and values to create grid from.
+ 13 Structure of dictionary should be: { "--parameter_name" : [Value_1, Value_2, ...], ... }
+ 14 - runs (int): Controls search based on number of runs we want for each config.
+ 15 if runs > 0 -> run each config 'runs' times.
+ 16 if runs = 0 -> run each config once even if it already exists.
+ 17 This behavior is modified if we want to (use) check_existing_runs, see methods description.
+ 18 - grid (list of dict): List of dictionaries, each containing one combination of argument values.
+ 19 - grid_index (int): Index of the current configuration in the grid.
+ 20 - saver_exists (function): Pointer to the savers exists method, used to check if there are existing runs.
+ 21
+ 22 """
+ 23
+ 24def__init__(self,configs:dict,runs:int=0):
+ 25""" Initialises the searcher.
+ 26
+ 27 Args:
+ 28 - configs (dict): Dictionary of parameters and values to try.
+ 29 Structure of dictionary should be: { "--parameter_name" : [Value_1, Value_2, ...], ... }
+ 30 - runs (int, optional): Controls search based on number of runs we want for each config.
+ 31 if runs > 0 -> run each config 'runs' times.
+ 32 if runs = 0 -> run each config once even if it already exists.
+ 33 This behavior is modified if we want to (use) check_existing_runs, see methods description.
+ 34
+ 35 """
+ 36
+ 37super().__init__()
+ 38self.runs=runs
+ 39self.configs=configs
+ 40self.grid=self.get_grid(configs)
+ 41self.grid_index=None
+ 42self.saver_exists=None
+ 43
+ 44def__len__(self):
+ 45""" Returns the number of configurations defined by search space.
+ 46
+ 47 This may not be accurate if we want to (use) check_existing_runs,
+ 48 as we may skip configurations,
+ 49 see methods description.
+ 50
+ 51 Returns:
+ 52 - num_configs (int): Number of configurations defined by search space.
+ 53
+ 54 """
+ 55
+ 56returnlen(self.grid)*self.runs
+ 57
+ 58defget_grid(self,param_dict:dict)->List:
+ 59""" Creates search grid.
+ 60
+ 61 Generates all possible combinations of values for each argument in the given dictionary using recursion.
+ 62
+ 63 Args:
+ 64 - param_dict (dict): A dictionary where keys are argument names and values are lists of values.
+ 65
+ 66 Returns:
+ 67 - all_combinations (list): A list of dictionaries, each containing one combination of argument values.
+ 68
+ 69 """
+ 70
+ 71# Helper function to recursively generate combinations
+ 72defgenerate_combinations(param_names,current_combination,all_combinations):
+ 73ifnotparam_names:
+ 74# If there are no more parameters to combine, add the current combination to the result
+ 75all_combinations.append(dict(current_combination))
+ 76return
+ 77
+ 78param_name=param_names[0]
+ 79param_values=param_dict[param_name]
+ 80
+ 81forvalueinparam_values:
+ 82current_combination[param_name]=value
+ 83# Recursively generate combinations for the remaining parameters
+ 84generate_combinations(param_names[1:],current_combination,all_combinations)
+ 85
+ 86# Start with an empty combination and generate all combinations
+ 87all_combinations=[]
+ 88generate_combinations(list(param_dict.keys()),{},all_combinations)
+ 89
+ 90returnall_combinations
+ 91
+ 92defcheck_existing_runs(self,saver:BaseSaver):
+ 93""" We save a pointer to the savers exists method to check if there are existing runs.
+ 94
+ 95 If there are n existing runs:
+ 96 n < runs -> run the remaining runs
+ 97 n >= runs -> skip all runs
+ 98
+ 99 Args:
+100 - saver (BaseSaver): Pointer to the savers exists method, used to check if there are existing runs.
+101
+102 """
+103
+104ifself.runs!=0:
+105self.saver_exists=saver.exists
+106else:
+107raiseValueError("Won't check for existing runs if runs = 0, Set runs > 0.")
+108
+109defskip_existing_runs(self,grid_index:int)->Tuple[int,int]:
+110""" Skips runs if they are in storage already.
+111
+112 Will check if there are existing runs for the current configuration,
+113 if there are existing runs we tally them up
+114 and skip configs or runs of a config based on the number of runs we want for each config.
+115
+116 Args:
+117 - grid_index (int): Index of the current configuration in the grid.
+118
+119 Returns:
+120 - grid_index (int): Index of the next configuration in the grid.
+121 - run_index (int): Index of the next run for the current configuration.
+122 """
+123ifself.saver_exists!=None:
+124# Check if there are existing runs, if so skip them
+125existing_runs=self.saver_exists(dict_to_strings(self.grid[grid_index]))
+126ifself.runs-existing_runs>0:
+127run_index=existing_runs
+128returngrid_index,run_index
+129else:
+130grid_index+=1
+131run_index=0
+132returnself.skip_existing_runs(grid_index)
+133else:
+134ifgrid_index==len(self.grid):
+135raiseIndexError('Reached end of grid, no more configurations to try.')
+136returngrid_index,0
+137
+138defnext_tune(self)->dict:
+139""" Returns the next configuration to try.
+140
+141 Will skip existing runs if check_existing_runs has been called.
+142 For more information on how this works check the methods descriptions for check_existing_runs and skip_existing_runs.
+143 Will raise an error if we have reached the end of the grid.
+144 To iterate through all configurations, use a for loop like so:
+145 for config in searcher: ...
+146
+147 Returns:
+148 - next_config (dict): The next configuration to try.
+149 """
+150# If this is the first call to next_tune, set grid_index to 0
+151ifself.grid_indexisNone:
+152self.grid_index=0
+153self.grid_index,self.run_index=self.skip_existing_runs(self.grid_index)
+154elifself.run_index<self.runs-1:
+155self.run_index+=1
+156else:
+157self.grid_index+=1
+158self.grid_index,self.run_index=self.skip_existing_runs(self.grid_index)
+159# If we have reached the end of the grid, raise an error
+160ifself.grid_index==len(self.grid):
+161raiseIndexError('Reached end of grid, no more configurations to try.')
+162# Return the next configuration to try
+163next_config=dict_to_strings(self.grid[self.grid_index])
+164returnnext_config
+
+
+
+
+
+
+
+
+ class
+ SearcherGrid(slune.base.BaseSearcher):
+
+
+
+
+
+
6classSearcherGrid(BaseSearcher):
+ 7""" Searcher for grid search.
+ 8
+ 9 Given dictionary of parameters and values to try, creates grid of all possible configurations,
+ 10 and returns them one by one for each call to next_tune.
+ 11
+ 12 Attributes:
+ 13 - configs (dict): Parameters and values to create grid from.
+ 14 Structure of dictionary should be: { "--parameter_name" : [Value_1, Value_2, ...], ... }
+ 15 - runs (int): Controls search based on number of runs we want for each config.
+ 16 if runs > 0 -> run each config 'runs' times.
+ 17 if runs = 0 -> run each config once even if it already exists.
+ 18 This behavior is modified if we want to (use) check_existing_runs, see methods description.
+ 19 - grid (list of dict): List of dictionaries, each containing one combination of argument values.
+ 20 - grid_index (int): Index of the current configuration in the grid.
+ 21 - saver_exists (function): Pointer to the savers exists method, used to check if there are existing runs.
+ 22
+ 23 """
+ 24
+ 25def__init__(self,configs:dict,runs:int=0):
+ 26""" Initialises the searcher.
+ 27
+ 28 Args:
+ 29 - configs (dict): Dictionary of parameters and values to try.
+ 30 Structure of dictionary should be: { "--parameter_name" : [Value_1, Value_2, ...], ... }
+ 31 - runs (int, optional): Controls search based on number of runs we want for each config.
+ 32 if runs > 0 -> run each config 'runs' times.
+ 33 if runs = 0 -> run each config once even if it already exists.
+ 34 This behavior is modified if we want to (use) check_existing_runs, see methods description.
+ 35
+ 36 """
+ 37
+ 38super().__init__()
+ 39self.runs=runs
+ 40self.configs=configs
+ 41self.grid=self.get_grid(configs)
+ 42self.grid_index=None
+ 43self.saver_exists=None
+ 44
+ 45def__len__(self):
+ 46""" Returns the number of configurations defined by search space.
+ 47
+ 48 This may not be accurate if we want to (use) check_existing_runs,
+ 49 as we may skip configurations,
+ 50 see methods description.
+ 51
+ 52 Returns:
+ 53 - num_configs (int): Number of configurations defined by search space.
+ 54
+ 55 """
+ 56
+ 57returnlen(self.grid)*self.runs
+ 58
+ 59defget_grid(self,param_dict:dict)->List:
+ 60""" Creates search grid.
+ 61
+ 62 Generates all possible combinations of values for each argument in the given dictionary using recursion.
+ 63
+ 64 Args:
+ 65 - param_dict (dict): A dictionary where keys are argument names and values are lists of values.
+ 66
+ 67 Returns:
+ 68 - all_combinations (list): A list of dictionaries, each containing one combination of argument values.
+ 69
+ 70 """
+ 71
+ 72# Helper function to recursively generate combinations
+ 73defgenerate_combinations(param_names,current_combination,all_combinations):
+ 74ifnotparam_names:
+ 75# If there are no more parameters to combine, add the current combination to the result
+ 76all_combinations.append(dict(current_combination))
+ 77return
+ 78
+ 79param_name=param_names[0]
+ 80param_values=param_dict[param_name]
+ 81
+ 82forvalueinparam_values:
+ 83current_combination[param_name]=value
+ 84# Recursively generate combinations for the remaining parameters
+ 85generate_combinations(param_names[1:],current_combination,all_combinations)
+ 86
+ 87# Start with an empty combination and generate all combinations
+ 88all_combinations=[]
+ 89generate_combinations(list(param_dict.keys()),{},all_combinations)
+ 90
+ 91returnall_combinations
+ 92
+ 93defcheck_existing_runs(self,saver:BaseSaver):
+ 94""" We save a pointer to the savers exists method to check if there are existing runs.
+ 95
+ 96 If there are n existing runs:
+ 97 n < runs -> run the remaining runs
+ 98 n >= runs -> skip all runs
+ 99
+100 Args:
+101 - saver (BaseSaver): Pointer to the savers exists method, used to check if there are existing runs.
+102
+103 """
+104
+105ifself.runs!=0:
+106self.saver_exists=saver.exists
+107else:
+108raiseValueError("Won't check for existing runs if runs = 0, Set runs > 0.")
+109
+110defskip_existing_runs(self,grid_index:int)->Tuple[int,int]:
+111""" Skips runs if they are in storage already.
+112
+113 Will check if there are existing runs for the current configuration,
+114 if there are existing runs we tally them up
+115 and skip configs or runs of a config based on the number of runs we want for each config.
+116
+117 Args:
+118 - grid_index (int): Index of the current configuration in the grid.
+119
+120 Returns:
+121 - grid_index (int): Index of the next configuration in the grid.
+122 - run_index (int): Index of the next run for the current configuration.
+123 """
+124ifself.saver_exists!=None:
+125# Check if there are existing runs, if so skip them
+126existing_runs=self.saver_exists(dict_to_strings(self.grid[grid_index]))
+127ifself.runs-existing_runs>0:
+128run_index=existing_runs
+129returngrid_index,run_index
+130else:
+131grid_index+=1
+132run_index=0
+133returnself.skip_existing_runs(grid_index)
+134else:
+135ifgrid_index==len(self.grid):
+136raiseIndexError('Reached end of grid, no more configurations to try.')
+137returngrid_index,0
+138
+139defnext_tune(self)->dict:
+140""" Returns the next configuration to try.
+141
+142 Will skip existing runs if check_existing_runs has been called.
+143 For more information on how this works check the methods descriptions for check_existing_runs and skip_existing_runs.
+144 Will raise an error if we have reached the end of the grid.
+145 To iterate through all configurations, use a for loop like so:
+146 for config in searcher: ...
+147
+148 Returns:
+149 - next_config (dict): The next configuration to try.
+150 """
+151# If this is the first call to next_tune, set grid_index to 0
+152ifself.grid_indexisNone:
+153self.grid_index=0
+154self.grid_index,self.run_index=self.skip_existing_runs(self.grid_index)
+155elifself.run_index<self.runs-1:
+156self.run_index+=1
+157else:
+158self.grid_index+=1
+159self.grid_index,self.run_index=self.skip_existing_runs(self.grid_index)
+160# If we have reached the end of the grid, raise an error
+161ifself.grid_index==len(self.grid):
+162raiseIndexError('Reached end of grid, no more configurations to try.')
+163# Return the next configuration to try
+164next_config=dict_to_strings(self.grid[self.grid_index])
+165returnnext_config
+
+
+
+
Searcher for grid search.
+
+
Given dictionary of parameters and values to try, creates grid of all possible configurations,
+and returns them one by one for each call to next_tune.
+
+
Attributes:
+
+
+
- configs (dict): Parameters and values to create grid from.
+Structure of dictionary should be: { "--parameter_name" : [Value_1, Value_2, ...], ... }
+
- runs (int): Controls search based on number of runs we want for each config.
+if runs > 0 -> run each config 'runs' times.
+if runs = 0 -> run each config once even if it already exists.
+This behavior is modified if we want to (use) check_existing_runs, see methods description.
+
- grid (list of dict): List of dictionaries, each containing one combination of argument values.
+
- grid_index (int): Index of the current configuration in the grid.
+
- saver_exists (function): Pointer to the savers exists method, used to check if there are existing runs.
25def__init__(self,configs:dict,runs:int=0):
+26""" Initialises the searcher.
+27
+28 Args:
+29 - configs (dict): Dictionary of parameters and values to try.
+30 Structure of dictionary should be: { "--parameter_name" : [Value_1, Value_2, ...], ... }
+31 - runs (int, optional): Controls search based on number of runs we want for each config.
+32 if runs > 0 -> run each config 'runs' times.
+33 if runs = 0 -> run each config once even if it already exists.
+34 This behavior is modified if we want to (use) check_existing_runs, see methods description.
+35
+36 """
+37
+38super().__init__()
+39self.runs=runs
+40self.configs=configs
+41self.grid=self.get_grid(configs)
+42self.grid_index=None
+43self.saver_exists=None
+
+
+
+
Initialises the searcher.
+
+
Arguments:
+
+
+
- configs (dict): Dictionary of parameters and values to try.
+Structure of dictionary should be: { "--parameter_name" : [Value_1, Value_2, ...], ... }
+
- runs (int, optional): Controls search based on number of runs we want for each config.
+if runs > 0 -> run each config 'runs' times.
+if runs = 0 -> run each config once even if it already exists.
+This behavior is modified if we want to (use) check_existing_runs, see methods description.
59defget_grid(self,param_dict:dict)->List:
+60""" Creates search grid.
+61
+62 Generates all possible combinations of values for each argument in the given dictionary using recursion.
+63
+64 Args:
+65 - param_dict (dict): A dictionary where keys are argument names and values are lists of values.
+66
+67 Returns:
+68 - all_combinations (list): A list of dictionaries, each containing one combination of argument values.
+69
+70 """
+71
+72# Helper function to recursively generate combinations
+73defgenerate_combinations(param_names,current_combination,all_combinations):
+74ifnotparam_names:
+75# If there are no more parameters to combine, add the current combination to the result
+76all_combinations.append(dict(current_combination))
+77return
+78
+79param_name=param_names[0]
+80param_values=param_dict[param_name]
+81
+82forvalueinparam_values:
+83current_combination[param_name]=value
+84# Recursively generate combinations for the remaining parameters
+85generate_combinations(param_names[1:],current_combination,all_combinations)
+86
+87# Start with an empty combination and generate all combinations
+88all_combinations=[]
+89generate_combinations(list(param_dict.keys()),{},all_combinations)
+90
+91returnall_combinations
+
+
+
+
Creates search grid.
+
+
Generates all possible combinations of values for each argument in the given dictionary using recursion.
+
+
Arguments:
+
+
+
- param_dict (dict): A dictionary where keys are argument names and values are lists of values.
+
+
+
Returns:
+
+
+
+
all_combinations (list): A list of dictionaries, each containing one combination of argument values.
93defcheck_existing_runs(self,saver:BaseSaver):
+ 94""" We save a pointer to the savers exists method to check if there are existing runs.
+ 95
+ 96 If there are n existing runs:
+ 97 n < runs -> run the remaining runs
+ 98 n >= runs -> skip all runs
+ 99
+100 Args:
+101 - saver (BaseSaver): Pointer to the savers exists method, used to check if there are existing runs.
+102
+103 """
+104
+105ifself.runs!=0:
+106self.saver_exists=saver.exists
+107else:
+108raiseValueError("Won't check for existing runs if runs = 0, Set runs > 0.")
+
+
+
+
We save a pointer to the savers exists method to check if there are existing runs.
+
+
If there are n existing runs:
+
+
+
n < runs -> run the remaining runs
+ n >= runs -> skip all runs
+
+
+
Arguments:
+
+
+
- saver (BaseSaver): Pointer to the savers exists method, used to check if there are existing runs.
110defskip_existing_runs(self,grid_index:int)->Tuple[int,int]:
+111""" Skips runs if they are in storage already.
+112
+113 Will check if there are existing runs for the current configuration,
+114 if there are existing runs we tally them up
+115 and skip configs or runs of a config based on the number of runs we want for each config.
+116
+117 Args:
+118 - grid_index (int): Index of the current configuration in the grid.
+119
+120 Returns:
+121 - grid_index (int): Index of the next configuration in the grid.
+122 - run_index (int): Index of the next run for the current configuration.
+123 """
+124ifself.saver_exists!=None:
+125# Check if there are existing runs, if so skip them
+126existing_runs=self.saver_exists(dict_to_strings(self.grid[grid_index]))
+127ifself.runs-existing_runs>0:
+128run_index=existing_runs
+129returngrid_index,run_index
+130else:
+131grid_index+=1
+132run_index=0
+133returnself.skip_existing_runs(grid_index)
+134else:
+135ifgrid_index==len(self.grid):
+136raiseIndexError('Reached end of grid, no more configurations to try.')
+137returngrid_index,0
+
+
+
+
Skips runs if they are in storage already.
+
+
Will check if there are existing runs for the current configuration,
+if there are existing runs we tally them up
+and skip configs or runs of a config based on the number of runs we want for each config.
+
+
Arguments:
+
+
+
- grid_index (int): Index of the current configuration in the grid.
+
+
+
Returns:
+
+
+
+
grid_index (int): Index of the next configuration in the grid.
+
run_index (int): Index of the next run for the current configuration.
+
+
+
+
+
+
+
+
+
+
+ def
+ next_tune(self) -> dict:
+
+
+
+
+
+
139defnext_tune(self)->dict:
+140""" Returns the next configuration to try.
+141
+142 Will skip existing runs if check_existing_runs has been called.
+143 For more information on how this works check the methods descriptions for check_existing_runs and skip_existing_runs.
+144 Will raise an error if we have reached the end of the grid.
+145 To iterate through all configurations, use a for loop like so:
+146 for config in searcher: ...
+147
+148 Returns:
+149 - next_config (dict): The next configuration to try.
+150 """
+151# If this is the first call to next_tune, set grid_index to 0
+152ifself.grid_indexisNone:
+153self.grid_index=0
+154self.grid_index,self.run_index=self.skip_existing_runs(self.grid_index)
+155elifself.run_index<self.runs-1:
+156self.run_index+=1
+157else:
+158self.grid_index+=1
+159self.grid_index,self.run_index=self.skip_existing_runs(self.grid_index)
+160# If we have reached the end of the grid, raise an error
+161ifself.grid_index==len(self.grid):
+162raiseIndexError('Reached end of grid, no more configurations to try.')
+163# Return the next configuration to try
+164next_config=dict_to_strings(self.grid[self.grid_index])
+165returnnext_config
+
+
+
+
Returns the next configuration to try.
+
+
Will skip existing runs if check_existing_runs has been called.
+For more information on how this works check the methods descriptions for check_existing_runs and skip_existing_runs.
+Will raise an error if we have reached the end of the grid.
+To iterate through all configurations, use a for loop like so:
+ for config in searcher: ...
+
+
Returns:
+
+
+
+
next_config (dict): The next configuration to try.
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/.html/src/slune/slune.html b/docs/.html/src/slune/slune.html
new file mode 100644
index 0000000..668107c
--- /dev/null
+++ b/docs/.html/src/slune/slune.html
@@ -0,0 +1,614 @@
+
+
+
+
+
+
+ src.slune.slune API documentation
+
+
+
+
+
+
+
+
+
+
+
+
1fromtypingimportList,Optional,Union
+ 2fromslune.baseimportBaseSearcher,BaseSaver
+ 3importsubprocess
+ 4importsys
+ 5fromslune.savers.csvimportSaverCsv
+ 6fromslune.loggers.defaultimportLoggerDefault
+ 7
+ 8defsubmit_job(sh_path:str,args:List[str]):
+ 9""" Submits a job using specified Bash script
+ 10
+ 11 Args:
+ 12 - sh_path (string): Path to the Bash script to be run.
+ 13
+ 14 - args (list of str): List of strings containing the arguments to be passed to the Bash script.
+ 15
+ 16 """
+ 17
+ 18try:
+ 19# Run the Bash script using subprocess
+ 20command=[sh_path]+args
+ 21subprocess.run(['sbatch']+command,check=True)
+ 22exceptsubprocess.CalledProcessErrorase:
+ 23print(f"Error running sbatch: {e}")
+ 24
+ 25defsbatchit(script_path:str,sbatch_path:str,searcher:BaseSearcher,cargs:Optional[List]=[],saver:Optional[BaseSaver]=None):
+ 26""" Submits jobs based on arguments given by searcher.
+ 27
+ 28 For each job runs the script stored at script_path with selected parameter values given by searcher
+ 29 and the arguments given by cargs.
+ 30
+ 31 Uses the sbatch script with path sbatch_path to submit each job to the cluster.
+ 32
+ 33 If given a Saver object, uses it to check if there are existing runs for each job and skips them,
+ 34 based on the number of runs we would like for each job (which is stored in the saver).
+ 35
+ 36 Args:
+ 37 - script_path (str): Path to the script (of the model) to be run for each job.
+ 38
+ 39 - sbatch_path (str): Path to the sbatch script that will be used to submit each job.
+ 40 Examples of sbatch scripts can be found in the templates folder.
+ 41
+ 42 - searcher (Searcher): Searcher object used to retrieve changing arguments for each job.
+ 43
+ 44 - cargs (list, optional): Contains arguments to be passed to the script for every job.
+ 45
+ 46 - saver (Saver, optional): Saver object used if we want to check if there are existing runs so we don't rerun.
+ 47 Can simply not give a Saver object if you want to rerun all jobs.
+ 48
+ 49 """
+ 50
+ 51ifsaver!=None:
+ 52searcher.check_existing_runs(saver)
+ 53# Create sbatch script for each job
+ 54forargsinsearcher:
+ 55# Submit job
+ 56submit_job(sbatch_path,[script_path]+cargs+args)
+ 57
+ 58deflsargs()->(str,List[str]):
+ 59""" Returns the script name and a list of the arguments passed to the script."""
+ 60args=sys.argv
+ 61returnargs[0],args[1:]
+ 62
+ 63defgarg(args:List[str],arg_names:Union[str,List[str]])->Union[str,List[str]]:
+ 64""" Finds the argument/s with name arg_names in the list of arguments args_ls and returns its value/s.
+ 65
+ 66 Args:
+ 67 - args (list of str): List of strings containing the arguments to be searched.
+ 68
+ 69 - arg_names (str or list of str): String or list of strings containing the names of the arguments to be searched for.
+ 70
+ 71 Returns:
+ 72 - arg_value (str or list of str): String or list of strings containing the values of the arguments found.
+ 73
+ 74 """
+ 75
+ 76defsingle_garg(arg_name):
+ 77# Check if arg_name is a string
+ 78iftype(arg_name)!=str:
+ 79raiseTypeError(f"arg_name must be a string, got {type(arg_name)}")
+ 80# Find index of argument
+ 81arg_index=[ifori,arginenumerate(args)ifarg_nameinarg]
+ 82# Return value error if argument not found
+ 83ifnotarg_index:
+ 84raiseValueError(f"Argument {arg_name} not found in arguments {args}")
+ 85# Return value of argument
+ 86iflen(arg_index)>1:
+ 87raiseValueError(f"Multiple arguments with name {arg_name} found in arguments {args}")
+ 88returnargs[arg_index[0]].split("=")[1]
+ 89iftype(arg_names)==list:
+ 90return[single_garg(arg_name)forarg_nameinarg_names]
+ 91else:
+ 92returnsingle_garg(arg_names)
+ 93
+ 94defget_csv_slog(params:Optional[dict]=None,root_dir:Optional[str]='slune_results')->BaseSaver:
+ 95""" Returns a SaverCsv object with the given parameters and root directory.
+ 96
+ 97 Args:
+ 98 - params (dict, optional): Dictionary of parameters to be passed to the SaverCsv object, default is None.
+ 99
+100 - root_dir (str, optional): Path to the root directory to be used by the SaverCsv object, default is 'slune_results'.
+101
+102 Returns:
+103 - SaverCsv (Saver): Saver object with the given parameters and root directory.
+104 Initialized with a LoggerDefault object as its logger.
+105
+106 """
+107
+108returnSaverCsv(LoggerDefault(),params=params,root_dir=root_dir)
+
9defsubmit_job(sh_path:str,args:List[str]):
+10""" Submits a job using specified Bash script
+11
+12 Args:
+13 - sh_path (string): Path to the Bash script to be run.
+14
+15 - args (list of str): List of strings containing the arguments to be passed to the Bash script.
+16
+17 """
+18
+19try:
+20# Run the Bash script using subprocess
+21command=[sh_path]+args
+22subprocess.run(['sbatch']+command,check=True)
+23exceptsubprocess.CalledProcessErrorase:
+24print(f"Error running sbatch: {e}")
+
+
+
+
Submits a job using specified Bash script
+
+
Arguments:
+
+
+
- sh_path (string): Path to the Bash script to be run.
+
- args (list of str): List of strings containing the arguments to be passed to the Bash script.
26defsbatchit(script_path:str,sbatch_path:str,searcher:BaseSearcher,cargs:Optional[List]=[],saver:Optional[BaseSaver]=None):
+27""" Submits jobs based on arguments given by searcher.
+28
+29 For each job runs the script stored at script_path with selected parameter values given by searcher
+30 and the arguments given by cargs.
+31
+32 Uses the sbatch script with path sbatch_path to submit each job to the cluster.
+33
+34 If given a Saver object, uses it to check if there are existing runs for each job and skips them,
+35 based on the number of runs we would like for each job (which is stored in the saver).
+36
+37 Args:
+38 - script_path (str): Path to the script (of the model) to be run for each job.
+39
+40 - sbatch_path (str): Path to the sbatch script that will be used to submit each job.
+41 Examples of sbatch scripts can be found in the templates folder.
+42
+43 - searcher (Searcher): Searcher object used to retrieve changing arguments for each job.
+44
+45 - cargs (list, optional): Contains arguments to be passed to the script for every job.
+46
+47 - saver (Saver, optional): Saver object used if we want to check if there are existing runs so we don't rerun.
+48 Can simply not give a Saver object if you want to rerun all jobs.
+49
+50 """
+51
+52ifsaver!=None:
+53searcher.check_existing_runs(saver)
+54# Create sbatch script for each job
+55forargsinsearcher:
+56# Submit job
+57submit_job(sbatch_path,[script_path]+cargs+args)
+
+
+
+
Submits jobs based on arguments given by searcher.
+
+
For each job runs the script stored at script_path with selected parameter values given by searcher
+and the arguments given by cargs.
+
+
Uses the sbatch script with path sbatch_path to submit each job to the cluster.
+
+
If given a Saver object, uses it to check if there are existing runs for each job and skips them,
+based on the number of runs we would like for each job (which is stored in the saver).
+
+
Arguments:
+
+
+
- script_path (str): Path to the script (of the model) to be run for each job.
+
- sbatch_path (str): Path to the sbatch script that will be used to submit each job.
+Examples of sbatch scripts can be found in the templates folder.
+
- searcher (Searcher): Searcher object used to retrieve changing arguments for each job.
+
- cargs (list, optional): Contains arguments to be passed to the script for every job.
+
- saver (Saver, optional): Saver object used if we want to check if there are existing runs so we don't rerun.
+Can simply not give a Saver object if you want to rerun all jobs.
59deflsargs()->(str,List[str]):
+60""" Returns the script name and a list of the arguments passed to the script."""
+61args=sys.argv
+62returnargs[0],args[1:]
+
+
+
+
Returns the script name and a list of the arguments passed to the script.
64defgarg(args:List[str],arg_names:Union[str,List[str]])->Union[str,List[str]]:
+65""" Finds the argument/s with name arg_names in the list of arguments args_ls and returns its value/s.
+66
+67 Args:
+68 - args (list of str): List of strings containing the arguments to be searched.
+69
+70 - arg_names (str or list of str): String or list of strings containing the names of the arguments to be searched for.
+71
+72 Returns:
+73 - arg_value (str or list of str): String or list of strings containing the values of the arguments found.
+74
+75 """
+76
+77defsingle_garg(arg_name):
+78# Check if arg_name is a string
+79iftype(arg_name)!=str:
+80raiseTypeError(f"arg_name must be a string, got {type(arg_name)}")
+81# Find index of argument
+82arg_index=[ifori,arginenumerate(args)ifarg_nameinarg]
+83# Return value error if argument not found
+84ifnotarg_index:
+85raiseValueError(f"Argument {arg_name} not found in arguments {args}")
+86# Return value of argument
+87iflen(arg_index)>1:
+88raiseValueError(f"Multiple arguments with name {arg_name} found in arguments {args}")
+89returnargs[arg_index[0]].split("=")[1]
+90iftype(arg_names)==list:
+91return[single_garg(arg_name)forarg_nameinarg_names]
+92else:
+93returnsingle_garg(arg_names)
+
+
+
+
Finds the argument/s with name arg_names in the list of arguments args_ls and returns its value/s.
+
+
Arguments:
+
+
+
- args (list of str): List of strings containing the arguments to be searched.
+
- arg_names (str or list of str): String or list of strings containing the names of the arguments to be searched for.
+
+
+
Returns:
+
+
+
+
arg_value (str or list of str): String or list of strings containing the values of the arguments found.
95defget_csv_slog(params:Optional[dict]=None,root_dir:Optional[str]='slune_results')->BaseSaver:
+ 96""" Returns a SaverCsv object with the given parameters and root directory.
+ 97
+ 98 Args:
+ 99 - params (dict, optional): Dictionary of parameters to be passed to the SaverCsv object, default is None.
+100
+101 - root_dir (str, optional): Path to the root directory to be used by the SaverCsv object, default is 'slune_results'.
+102
+103 Returns:
+104 - SaverCsv (Saver): Saver object with the given parameters and root directory.
+105 Initialized with a LoggerDefault object as its logger.
+106
+107 """
+108
+109returnSaverCsv(LoggerDefault(),params=params,root_dir=root_dir)
+
+
+
+
Returns a SaverCsv object with the given parameters and root directory.
+
+
Arguments:
+
+
+
- params (dict, optional): Dictionary of parameters to be passed to the SaverCsv object, default is None.
+
- root_dir (str, optional): Path to the root directory to be used by the SaverCsv object, default is 'slune_results'.
+
+
+
Returns:
+
+
+
+
SaverCsv (Saver): Saver object with the given parameters and root directory.
+ Initialized with a LoggerDefault object as its logger.
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/.html/src/slune/utils.html b/docs/.html/src/slune/utils.html
new file mode 100644
index 0000000..5a28ba6
--- /dev/null
+++ b/docs/.html/src/slune/utils.html
@@ -0,0 +1,721 @@
+
+
+
+
+
+
+ src.slune.utils API documentation
+
+
+
+
+
+
+
+
+
+
+
+
1importos
+ 2fromtypingimportList,Optional,Tuple
+ 3
+ 4deffind_directory_path(strings:List[str],root_directory:Optional[str]='.')->Tuple[int,str]:
+ 5""" Searches the root directory for a path of directories that matches the strings given in any order.
+ 6 If only a partial match is found, returns the deepest matching path.
+ 7 If no matches are found returns root_directory.
+ 8 Returns a stripped matching path of directories, ie. where we convert '--string=value' to '--string='.
+ 9
+ 10 Args:
+ 11 - strings (list of str): List of strings to be matched in any order. Each string in list must be in the form '--string='.
+ 12 - root_directory (string, optional): Path to the root directory to be searched, default is current working directory.
+ 13
+ 14 Returns:
+ 15 - max_depth (int): Depth of the deepest matching path.
+ 16 - max_path (string): Path of the deepest matching path.
+ 17
+ 18 """
+ 19
+ 20def_find_directory_path(curr_strings,curr_root,depth,max_depth,max_path):
+ 21dir_list=[entry.nameforentryinos.scandir(curr_root)ifentry.is_dir()]
+ 22stripped_dir_list=[d.split('=')[0].strip()+"="fordindir_list]
+ 23stripped_dir_list=list(set(stripped_dir_list))
+ 24forstringincurr_strings:
+ 25ifstringinstripped_dir_list:
+ 26dir_list=[dfordindir_listifd.startswith(string)]
+ 27fordindir_list:
+ 28new_depth,new_path=_find_directory_path([sforsincurr_stringsifs!=string],os.path.join(curr_root,d),depth+1,max_depth,max_path)
+ 29ifnew_depth>max_depth:
+ 30max_depth,max_path=new_depth,new_path
+ 31ifdepth>max_depth:
+ 32max_depth,max_path=depth,curr_root
+ 33returnmax_depth,max_path
+ 34
+ 35max_depth,max_path=_find_directory_path(strings,root_directory,0,-1,'')
+ 36ifmax_depth>0:
+ 37max_path=max_path[len(root_directory):]
+ 38dirs=max_path[1:].split(os.path.sep)
+ 39dirs=[d.split('=')[0].strip()+"="fordindirs]
+ 40max_path=os.path.join(*dirs)
+ 41max_path=os.path.join(root_directory,max_path)
+ 42returnmax_path
+ 43
+ 44defget_numeric_equiv(og_path:str,root_directory:Optional[str]='.')->str:
+ 45""" Replaces directories in path with existing directories with the same numerical value.
+ 46
+ 47 Args:
+ 48 - og_path (str): Path we want to check against existing paths, must be a subdirectory of root_directory and each directory must have form '--string=value'.
+ 49 - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
+ 50
+ 51 Returns:
+ 52 - equiv (str): Path with values changed to match existing directories if values are numerically equivalent, with root directory at beginning.
+ 53
+ 54 """
+ 55
+ 56defis_numeric(s):
+ 57try:
+ 58float(s)
+ 59returnTrue
+ 60exceptValueError:
+ 61returnFalse
+ 62
+ 63dirs=og_path.split(os.path.sep)
+ 64equiv=root_directory
+ 65fordindirs:
+ 66next_dir=os.path.join(equiv,d)
+ 67ifos.path.exists(next_dir):
+ 68equiv=next_dir
+ 69else:
+ 70# If the directory doesn't exist, check if there's a directory with the same numerical value
+ 71dir_value=d.split('=')[1]
+ 72ifis_numeric(dir_value):
+ 73dir_value=float(dir_value)
+ 74ifos.path.exists(equiv):
+ 75existing_dirs=[entry.nameforentryinos.scandir(equiv)ifentry.is_dir()]
+ 76forexisting_dirinexisting_dirs:
+ 77existing_dir_value=existing_dir.split('=')[1]
+ 78ifis_numeric(existing_dir_value)andfloat(existing_dir_value)==dir_value:
+ 79equiv=os.path.join(equiv,existing_dir)
+ 80break
+ 81# If there is no directory with the same numerical value
+ 82# we just keep the directory as is and move on to the next one
+ 83else:
+ 84equiv=next_dir
+ 85else:
+ 86# If the directory doesn't exist we just keep the directory as is and move on to the next one
+ 87equiv=next_dir
+ 88# Otherwise we just keep the directory as is and move on to the next one
+ 89else:
+ 90equiv=next_dir
+ 91returnequiv
+ 92
+ 93defdict_to_strings(d:dict)->List[str]:
+ 94""" Converts a dictionary into a list of strings in the form of '--key=value'.
+ 95
+ 96 Args:
+ 97 - d (dict): Dictionary to be converted.
+ 98
+ 99 Returns:
+100 - out (list of str): List of strings in the form of '--key=value'.
+101
+102 """
+103
+104out=[]
+105forkey,valueind.items():
+106ifkey.startswith('--'):
+107out.append('{}={}'.format(key,value))
+108else:
+109out.append('--{}={}'.format(key,value))
+110returnout
+111
+112deffind_csv_files(root_directory:Optional[str]='.')->List[str]:
+113""" Recursively finds all csv files in all subdirectories of the root directory and returns their paths.
+114
+115 Args:
+116 - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
+117
+118 Returns:
+119 - csv_files (list of str): List of strings containing the paths to all csv files found.
+120
+121 """
+122csv_files=[]
+123forroot,dirs,filesinos.walk(root_directory):
+124forfileinfiles:
+125iffile.endswith('.csv'):
+126csv_files.append(os.path.join(root,file))
+127returncsv_files
+128
+129defget_all_paths(dirs:List[str],root_directory:Optional[str]='.')->List[str]:
+130""" Find all possible paths of csv files that have directory matching one of each of all the parameters given.
+131
+132 Finds all paths of csv files in all subdirectories of the root directory that have a directory in their path matching one of each of all the parameters given.
+133
+134 Args:
+135 - dirs (list of str): List of directory names we want returned paths to have in their path.
+136 - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
+137
+138 Returns:
+139 - matches (list of str): List of strings containing the paths to all csv files found.
+140
+141 """
+142
+143all_csv=find_csv_files(root_directory)
+144matches=[]
+145forcsvinall_csv:
+146path=csv.split(os.path.sep)
+147ifall([pinpathforpindirs]):
+148matches.append(csv)
+149returnmatches
+
5deffind_directory_path(strings:List[str],root_directory:Optional[str]='.')->Tuple[int,str]:
+ 6""" Searches the root directory for a path of directories that matches the strings given in any order.
+ 7 If only a partial match is found, returns the deepest matching path.
+ 8 If no matches are found returns root_directory.
+ 9 Returns a stripped matching path of directories, ie. where we convert '--string=value' to '--string='.
+10
+11 Args:
+12 - strings (list of str): List of strings to be matched in any order. Each string in list must be in the form '--string='.
+13 - root_directory (string, optional): Path to the root directory to be searched, default is current working directory.
+14
+15 Returns:
+16 - max_depth (int): Depth of the deepest matching path.
+17 - max_path (string): Path of the deepest matching path.
+18
+19 """
+20
+21def_find_directory_path(curr_strings,curr_root,depth,max_depth,max_path):
+22dir_list=[entry.nameforentryinos.scandir(curr_root)ifentry.is_dir()]
+23stripped_dir_list=[d.split('=')[0].strip()+"="fordindir_list]
+24stripped_dir_list=list(set(stripped_dir_list))
+25forstringincurr_strings:
+26ifstringinstripped_dir_list:
+27dir_list=[dfordindir_listifd.startswith(string)]
+28fordindir_list:
+29new_depth,new_path=_find_directory_path([sforsincurr_stringsifs!=string],os.path.join(curr_root,d),depth+1,max_depth,max_path)
+30ifnew_depth>max_depth:
+31max_depth,max_path=new_depth,new_path
+32ifdepth>max_depth:
+33max_depth,max_path=depth,curr_root
+34returnmax_depth,max_path
+35
+36max_depth,max_path=_find_directory_path(strings,root_directory,0,-1,'')
+37ifmax_depth>0:
+38max_path=max_path[len(root_directory):]
+39dirs=max_path[1:].split(os.path.sep)
+40dirs=[d.split('=')[0].strip()+"="fordindirs]
+41max_path=os.path.join(*dirs)
+42max_path=os.path.join(root_directory,max_path)
+43returnmax_path
+
+
+
+
Searches the root directory for a path of directories that matches the strings given in any order.
+If only a partial match is found, returns the deepest matching path.
+If no matches are found returns root_directory.
+Returns a stripped matching path of directories, ie. where we convert '--string=value' to '--string='.
+
+
Arguments:
+
+
+
- strings (list of str): List of strings to be matched in any order. Each string in list must be in the form '--string='.
+
- root_directory (string, optional): Path to the root directory to be searched, default is current working directory.
+
+
+
Returns:
+
+
+
+
max_depth (int): Depth of the deepest matching path.
+
max_path (string): Path of the deepest matching path.
45defget_numeric_equiv(og_path:str,root_directory:Optional[str]='.')->str:
+46""" Replaces directories in path with existing directories with the same numerical value.
+47
+48 Args:
+49 - og_path (str): Path we want to check against existing paths, must be a subdirectory of root_directory and each directory must have form '--string=value'.
+50 - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
+51
+52 Returns:
+53 - equiv (str): Path with values changed to match existing directories if values are numerically equivalent, with root directory at beginning.
+54
+55 """
+56
+57defis_numeric(s):
+58try:
+59float(s)
+60returnTrue
+61exceptValueError:
+62returnFalse
+63
+64dirs=og_path.split(os.path.sep)
+65equiv=root_directory
+66fordindirs:
+67next_dir=os.path.join(equiv,d)
+68ifos.path.exists(next_dir):
+69equiv=next_dir
+70else:
+71# If the directory doesn't exist, check if there's a directory with the same numerical value
+72dir_value=d.split('=')[1]
+73ifis_numeric(dir_value):
+74dir_value=float(dir_value)
+75ifos.path.exists(equiv):
+76existing_dirs=[entry.nameforentryinos.scandir(equiv)ifentry.is_dir()]
+77forexisting_dirinexisting_dirs:
+78existing_dir_value=existing_dir.split('=')[1]
+79ifis_numeric(existing_dir_value)andfloat(existing_dir_value)==dir_value:
+80equiv=os.path.join(equiv,existing_dir)
+81break
+82# If there is no directory with the same numerical value
+83# we just keep the directory as is and move on to the next one
+84else:
+85equiv=next_dir
+86else:
+87# If the directory doesn't exist we just keep the directory as is and move on to the next one
+88equiv=next_dir
+89# Otherwise we just keep the directory as is and move on to the next one
+90else:
+91equiv=next_dir
+92returnequiv
+
+
+
+
Replaces directories in path with existing directories with the same numerical value.
+
+
Arguments:
+
+
+
- og_path (str): Path we want to check against existing paths, must be a subdirectory of root_directory and each directory must have form '--string=value'.
+
- root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
+
+
+
Returns:
+
+
+
+
equiv (str): Path with values changed to match existing directories if values are numerically equivalent, with root directory at beginning.
94defdict_to_strings(d:dict)->List[str]:
+ 95""" Converts a dictionary into a list of strings in the form of '--key=value'.
+ 96
+ 97 Args:
+ 98 - d (dict): Dictionary to be converted.
+ 99
+100 Returns:
+101 - out (list of str): List of strings in the form of '--key=value'.
+102
+103 """
+104
+105out=[]
+106forkey,valueind.items():
+107ifkey.startswith('--'):
+108out.append('{}={}'.format(key,value))
+109else:
+110out.append('--{}={}'.format(key,value))
+111returnout
+
+
+
+
Converts a dictionary into a list of strings in the form of '--key=value'.
+
+
Arguments:
+
+
+
- d (dict): Dictionary to be converted.
+
+
+
Returns:
+
+
+
+
out (list of str): List of strings in the form of '--key=value'.
113deffind_csv_files(root_directory:Optional[str]='.')->List[str]:
+114""" Recursively finds all csv files in all subdirectories of the root directory and returns their paths.
+115
+116 Args:
+117 - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
+118
+119 Returns:
+120 - csv_files (list of str): List of strings containing the paths to all csv files found.
+121
+122 """
+123csv_files=[]
+124forroot,dirs,filesinos.walk(root_directory):
+125forfileinfiles:
+126iffile.endswith('.csv'):
+127csv_files.append(os.path.join(root,file))
+128returncsv_files
+
+
+
+
Recursively finds all csv files in all subdirectories of the root directory and returns their paths.
+
+
Arguments:
+
+
+
- root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
+
+
+
Returns:
+
+
+
+
csv_files (list of str): List of strings containing the paths to all csv files found.
130defget_all_paths(dirs:List[str],root_directory:Optional[str]='.')->List[str]:
+131""" Find all possible paths of csv files that have directory matching one of each of all the parameters given.
+132
+133 Finds all paths of csv files in all subdirectories of the root directory that have a directory in their path matching one of each of all the parameters given.
+134
+135 Args:
+136 - dirs (list of str): List of directory names we want returned paths to have in their path.
+137 - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
+138
+139 Returns:
+140 - matches (list of str): List of strings containing the paths to all csv files found.
+141
+142 """
+143
+144all_csv=find_csv_files(root_directory)
+145matches=[]
+146forcsvinall_csv:
+147path=csv.split(os.path.sep)
+148ifall([pinpathforpindirs]):
+149matches.append(csv)
+150returnmatches
+
+
+
+
Find all possible paths of csv files that have directory matching one of each of all the parameters given.
+
+
Finds all paths of csv files in all subdirectories of the root directory that have a directory in their path matching one of each of all the parameters given.
+
+
Arguments:
+
+
+
- dirs (list of str): List of directory names we want returned paths to have in their path.
+
- root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
+
+
+
Returns:
+
+
+
+
matches (list of str): List of strings containing the paths to all csv files found.
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/__init__.py b/src/__init__.py
index b1cfe26..08bfb69 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -1 +1,4 @@
-# Empty
\ No newline at end of file
+"""
+.. include:: ../README.md
+.. include:: ../CLASSDESIGN.md
+"""
\ No newline at end of file
diff --git a/src/slune/__init__.py b/src/slune/__init__.py
index 494b924..36709c2 100644
--- a/src/slune/__init__.py
+++ b/src/slune/__init__.py
@@ -1,11 +1,14 @@
# from .slune import submit_job, sbatchit
# __all__ = ['slune', 'base', 'utils', 'loggers', 'savers', 'searchers' ]
-from .searchers import grid
-from .savers import csv
-from .loggers import default
+from .searchers import *
+from .savers import *
+from .loggers import *
from .slune import submit_job, sbatchit, lsargs, garg, get_csv_slog
from . import base, utils
-__all__ = ['submit_job', 'sbatchit', 'lsargs', 'garg', 'get_csv_slog',
- 'base', 'utils', 'default', 'grid', 'csv']
\ No newline at end of file
+# __all__ = ['submit_job', 'sbatchit', 'lsargs', 'garg', 'get_csv_slog',
+ # 'base', 'utils', 'default', 'grid', 'csv']
+
+import importlib.metadata
+__version__ = importlib.metadata.version("slune-lib")
\ No newline at end of file
diff --git a/src/slune/loggers/__init__.py b/src/slune/loggers/__init__.py
index bbfbd67..00ce26a 100644
--- a/src/slune/loggers/__init__.py
+++ b/src/slune/loggers/__init__.py
@@ -1,3 +1,3 @@
from .default import LoggerDefault
-__all__ = ['LoggerDefault']
\ No newline at end of file
+# __all__ = ['LoggerDefault']
\ No newline at end of file
diff --git a/src/slune/savers/__init__.py b/src/slune/savers/__init__.py
index 09c1bd6..4863312 100644
--- a/src/slune/savers/__init__.py
+++ b/src/slune/savers/__init__.py
@@ -1,3 +1,3 @@
from .csv import SaverCsv
-__all__ = ['SaverCsv']
\ No newline at end of file
+# __all__ = ['SaverCsv']
\ No newline at end of file
diff --git a/src/slune/searchers/__init__.py b/src/slune/searchers/__init__.py
index 91a9141..e084d00 100644
--- a/src/slune/searchers/__init__.py
+++ b/src/slune/searchers/__init__.py
@@ -1,3 +1,3 @@
from .grid import SearcherGrid
-__all__ = ['SearcherGrid']
\ No newline at end of file
+# __all__ = ['SearcherGrid']
\ No newline at end of file
diff --git a/src/slune/slune.py b/src/slune/slune.py
index ad8be97..c3ef5f1 100644
--- a/src/slune/slune.py
+++ b/src/slune/slune.py
@@ -1,5 +1,5 @@
from typing import List, Optional, Union
-from slune.base import Searcher, Saver
+from slune.base import BaseSearcher, BaseSaver
import subprocess
import sys
from slune.savers.csv import SaverCsv
@@ -22,7 +22,7 @@ def submit_job(sh_path: str, args: List[str]):
except subprocess.CalledProcessError as e:
print(f"Error running sbatch: {e}")
-def sbatchit(script_path: str, sbatch_path: str, searcher: Searcher, cargs: Optional[List]=[], saver: Optional[Saver]=None):
+def sbatchit(script_path: str, sbatch_path: str, searcher: BaseSearcher, cargs: Optional[List]=[], saver: Optional[BaseSaver]=None):
""" Submits jobs based on arguments given by searcher.
For each job runs the script stored at script_path with selected parameter values given by searcher
@@ -91,7 +91,7 @@ def single_garg(arg_name):
else:
return single_garg(arg_names)
-def get_csv_slog(params: Optional[dict]= None, root_dir: Optional[str]='slune_results') -> Saver:
+def get_csv_slog(params: Optional[dict]= None, root_dir: Optional[str]='slune_results') -> BaseSaver:
""" Returns a SaverCsv object with the given parameters and root directory.
Args: