diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..513cabe --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,50 @@ +name: website + +# build the documentation whenever there are new commits on main +on: + push: + branches: + - main + # Alternative: only build for tags. + # tags: + # - '*' + +# security: restrict permissions for CI jobs. +permissions: + contents: read + +jobs: + # Build the documentation and upload the static HTML files as an artifact. + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: '3.12' + + # ADJUST THIS: install all dependencies (including pdoc) + - run: pip install -e . + - run: pip install pdoc + # ADJUST THIS: build your documentation into docs/. + # We use a custom build script for pdoc itself, ideally you just run `pdoc -o docs/ ...` here. + - run: pdoc --docformat google -o docs/.html src + + - uses: actions/upload-pages-artifact@v2 + with: + path: docs/ + + # Deploy the artifact to GitHub pages. + # This is a separate job so that only actions/deploy-pages has the necessary permissions. + deploy: + needs: build + runs-on: ubuntu-latest + permissions: + pages: write + id-token: write + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - id: deployment + uses: actions/deploy-pages@v2 \ No newline at end of file diff --git a/CLASSDESIGN.md b/CLASSDESIGN.md new file mode 100644 index 0000000..cae0d99 --- /dev/null +++ b/CLASSDESIGN.md @@ -0,0 +1,14 @@ +# Class Design +Here we will outline the different kind of classes that are used in slune and how they interact with each other. There are 3 types: +- 'Searcher' classes - these are the classes that are used to define and traverse a search space. +- 'Logger' classes - these are the classes that are used to create and read log files. +- 'Saver' classes - these are the classes that are used to save logs to files and read logs from files. + +The base module is where the base classes for each of these types are defined. The base classes are: +- BaseSearcher +- BaseLogger +- BaseSaver + +To create a new searcher, logger or saver, you must inherit from the appropriate base class and implement the required methods. The required methods will have the '@abc.abstractmethod' decorator above them and will throw errors if they are not implemented. The compulsory methods allow for well-defined interactions between the different classes and should allow for any combination of searcher, logger and saver to be used together. + +Please read the docs for the base classes to see what methods are required to be implemented and how they should be implemented. \ No newline at end of file diff --git a/README.md b/README.md index 92aa1ce..c83f99f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +![PyPI - Version](https://img.shields.io/pypi/v/:slune-lib) [![license](https://img.shields.io/badge/License-MIT-purple.svg)](LICENSE) ![badge](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/h-0-0/4aa01e058fee448070c587f6967037e4/raw/CodeCovSlune.json) @@ -111,7 +112,6 @@ Please check out the examples folder for notebooks detailing in more depth some ## Roadmap - Make package user friendly: - - Add documentation. - Go through automation settings. - Code of conduct. - Contributing guidelines. diff --git a/docs/.html/index.html b/docs/.html/index.html new file mode 100644 index 0000000..0ef869b --- /dev/null +++ b/docs/.html/index.html @@ -0,0 +1,7 @@ + + + + + + + diff --git a/docs/.html/search.js b/docs/.html/search.js new file mode 100644 index 0000000..bcd0f25 --- /dev/null +++ b/docs/.html/search.js @@ -0,0 +1,46 @@ +window.pdocSearch = (function(){ +/** elasticlunr - http://weixsong.github.io * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song * MIT Licensed */!function(){function e(e){if(null===e||"object"!=typeof e)return e;var t=e.constructor();for(var n in e)e.hasOwnProperty(n)&&(t[n]=e[n]);return t}var t=function(e){var n=new t.Index;return n.pipeline.add(t.trimmer,t.stopWordFilter,t.stemmer),e&&e.call(n,n),n};t.version="0.9.5",lunr=t,t.utils={},t.utils.warn=function(e){return function(t){e.console&&console.warn&&console.warn(t)}}(this),t.utils.toString=function(e){return void 0===e||null===e?"":e.toString()},t.EventEmitter=function(){this.events={}},t.EventEmitter.prototype.addListener=function(){var e=Array.prototype.slice.call(arguments),t=e.pop(),n=e;if("function"!=typeof t)throw new TypeError("last argument must be a function");n.forEach(function(e){this.hasHandler(e)||(this.events[e]=[]),this.events[e].push(t)},this)},t.EventEmitter.prototype.removeListener=function(e,t){if(this.hasHandler(e)){var n=this.events[e].indexOf(t);-1!==n&&(this.events[e].splice(n,1),0==this.events[e].length&&delete this.events[e])}},t.EventEmitter.prototype.emit=function(e){if(this.hasHandler(e)){var t=Array.prototype.slice.call(arguments,1);this.events[e].forEach(function(e){e.apply(void 0,t)},this)}},t.EventEmitter.prototype.hasHandler=function(e){return e in this.events},t.tokenizer=function(e){if(!arguments.length||null===e||void 0===e)return[];if(Array.isArray(e)){var n=e.filter(function(e){return null===e||void 0===e?!1:!0});n=n.map(function(e){return t.utils.toString(e).toLowerCase()});var i=[];return n.forEach(function(e){var n=e.split(t.tokenizer.seperator);i=i.concat(n)},this),i}return e.toString().trim().toLowerCase().split(t.tokenizer.seperator)},t.tokenizer.defaultSeperator=/[\s\-]+/,t.tokenizer.seperator=t.tokenizer.defaultSeperator,t.tokenizer.setSeperator=function(e){null!==e&&void 0!==e&&"object"==typeof e&&(t.tokenizer.seperator=e)},t.tokenizer.resetSeperator=function(){t.tokenizer.seperator=t.tokenizer.defaultSeperator},t.tokenizer.getSeperator=function(){return t.tokenizer.seperator},t.Pipeline=function(){this._queue=[]},t.Pipeline.registeredFunctions={},t.Pipeline.registerFunction=function(e,n){n in t.Pipeline.registeredFunctions&&t.utils.warn("Overwriting existing registered function: "+n),e.label=n,t.Pipeline.registeredFunctions[n]=e},t.Pipeline.getRegisteredFunction=function(e){return e in t.Pipeline.registeredFunctions!=!0?null:t.Pipeline.registeredFunctions[e]},t.Pipeline.warnIfFunctionNotRegistered=function(e){var n=e.label&&e.label in this.registeredFunctions;n||t.utils.warn("Function is not registered with pipeline. This may cause problems when serialising the index.\n",e)},t.Pipeline.load=function(e){var n=new t.Pipeline;return e.forEach(function(e){var i=t.Pipeline.getRegisteredFunction(e);if(!i)throw new Error("Cannot load un-registered function: "+e);n.add(i)}),n},t.Pipeline.prototype.add=function(){var e=Array.prototype.slice.call(arguments);e.forEach(function(e){t.Pipeline.warnIfFunctionNotRegistered(e),this._queue.push(e)},this)},t.Pipeline.prototype.after=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i+1,0,n)},t.Pipeline.prototype.before=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i,0,n)},t.Pipeline.prototype.remove=function(e){var t=this._queue.indexOf(e);-1!==t&&this._queue.splice(t,1)},t.Pipeline.prototype.run=function(e){for(var t=[],n=e.length,i=this._queue.length,o=0;n>o;o++){for(var r=e[o],s=0;i>s&&(r=this._queue[s](r,o,e),void 0!==r&&null!==r);s++);void 0!==r&&null!==r&&t.push(r)}return t},t.Pipeline.prototype.reset=function(){this._queue=[]},t.Pipeline.prototype.get=function(){return this._queue},t.Pipeline.prototype.toJSON=function(){return this._queue.map(function(e){return t.Pipeline.warnIfFunctionNotRegistered(e),e.label})},t.Index=function(){this._fields=[],this._ref="id",this.pipeline=new t.Pipeline,this.documentStore=new t.DocumentStore,this.index={},this.eventEmitter=new t.EventEmitter,this._idfCache={},this.on("add","remove","update",function(){this._idfCache={}}.bind(this))},t.Index.prototype.on=function(){var e=Array.prototype.slice.call(arguments);return this.eventEmitter.addListener.apply(this.eventEmitter,e)},t.Index.prototype.off=function(e,t){return this.eventEmitter.removeListener(e,t)},t.Index.load=function(e){e.version!==t.version&&t.utils.warn("version mismatch: current "+t.version+" importing "+e.version);var n=new this;n._fields=e.fields,n._ref=e.ref,n.documentStore=t.DocumentStore.load(e.documentStore),n.pipeline=t.Pipeline.load(e.pipeline),n.index={};for(var i in e.index)n.index[i]=t.InvertedIndex.load(e.index[i]);return n},t.Index.prototype.addField=function(e){return this._fields.push(e),this.index[e]=new t.InvertedIndex,this},t.Index.prototype.setRef=function(e){return this._ref=e,this},t.Index.prototype.saveDocument=function(e){return this.documentStore=new t.DocumentStore(e),this},t.Index.prototype.addDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.addDoc(i,e),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));this.documentStore.addFieldLength(i,n,o.length);var r={};o.forEach(function(e){e in r?r[e]+=1:r[e]=1},this);for(var s in r){var u=r[s];u=Math.sqrt(u),this.index[n].addToken(s,{ref:i,tf:u})}},this),n&&this.eventEmitter.emit("add",e,this)}},t.Index.prototype.removeDocByRef=function(e){if(e&&this.documentStore.isDocStored()!==!1&&this.documentStore.hasDoc(e)){var t=this.documentStore.getDoc(e);this.removeDoc(t,!1)}},t.Index.prototype.removeDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.hasDoc(i)&&(this.documentStore.removeDoc(i),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));o.forEach(function(e){this.index[n].removeToken(e,i)},this)},this),n&&this.eventEmitter.emit("remove",e,this))}},t.Index.prototype.updateDoc=function(e,t){var t=void 0===t?!0:t;this.removeDocByRef(e[this._ref],!1),this.addDoc(e,!1),t&&this.eventEmitter.emit("update",e,this)},t.Index.prototype.idf=function(e,t){var n="@"+t+"/"+e;if(Object.prototype.hasOwnProperty.call(this._idfCache,n))return this._idfCache[n];var i=this.index[t].getDocFreq(e),o=1+Math.log(this.documentStore.length/(i+1));return this._idfCache[n]=o,o},t.Index.prototype.getFields=function(){return this._fields.slice()},t.Index.prototype.search=function(e,n){if(!e)return[];e="string"==typeof e?{any:e}:JSON.parse(JSON.stringify(e));var i=null;null!=n&&(i=JSON.stringify(n));for(var o=new t.Configuration(i,this.getFields()).get(),r={},s=Object.keys(e),u=0;u0&&t.push(e);for(var i in n)"docs"!==i&&"df"!==i&&this.expandToken(e+i,t,n[i]);return t},t.InvertedIndex.prototype.toJSON=function(){return{root:this.root}},t.Configuration=function(e,n){var e=e||"";if(void 0==n||null==n)throw new Error("fields should not be null");this.config={};var i;try{i=JSON.parse(e),this.buildUserConfig(i,n)}catch(o){t.utils.warn("user configuration parse failed, will use default configuration"),this.buildDefaultConfig(n)}},t.Configuration.prototype.buildDefaultConfig=function(e){this.reset(),e.forEach(function(e){this.config[e]={boost:1,bool:"OR",expand:!1}},this)},t.Configuration.prototype.buildUserConfig=function(e,n){var i="OR",o=!1;if(this.reset(),"bool"in e&&(i=e.bool||i),"expand"in e&&(o=e.expand||o),"fields"in e)for(var r in e.fields)if(n.indexOf(r)>-1){var s=e.fields[r],u=o;void 0!=s.expand&&(u=s.expand),this.config[r]={boost:s.boost||0===s.boost?s.boost:1,bool:s.bool||i,expand:u}}else t.utils.warn("field name in user configuration not found in index instance fields");else this.addAllFields2UserConfig(i,o,n)},t.Configuration.prototype.addAllFields2UserConfig=function(e,t,n){n.forEach(function(n){this.config[n]={boost:1,bool:e,expand:t}},this)},t.Configuration.prototype.get=function(){return this.config},t.Configuration.prototype.reset=function(){this.config={}},lunr.SortedSet=function(){this.length=0,this.elements=[]},lunr.SortedSet.load=function(e){var t=new this;return t.elements=e,t.length=e.length,t},lunr.SortedSet.prototype.add=function(){var e,t;for(e=0;e1;){if(r===e)return o;e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o]}return r===e?o:-1},lunr.SortedSet.prototype.locationFor=function(e){for(var t=0,n=this.elements.length,i=n-t,o=t+Math.floor(i/2),r=this.elements[o];i>1;)e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o];return r>e?o:e>r?o+1:void 0},lunr.SortedSet.prototype.intersect=function(e){for(var t=new lunr.SortedSet,n=0,i=0,o=this.length,r=e.length,s=this.elements,u=e.elements;;){if(n>o-1||i>r-1)break;s[n]!==u[i]?s[n]u[i]&&i++:(t.add(s[n]),n++,i++)}return t},lunr.SortedSet.prototype.clone=function(){var e=new lunr.SortedSet;return e.elements=this.toArray(),e.length=e.elements.length,e},lunr.SortedSet.prototype.union=function(e){var t,n,i;this.length>=e.length?(t=this,n=e):(t=e,n=this),i=t.clone();for(var o=0,r=n.toArray();o\"PyPI\n\"license\"\n\"badge\"

\n\n

\"badge\"\n\"badge\"\n\"badge\"

\n\n

slune (= slurm + tune!)

\n\n

A super simplistic python package for performing hyperparameter tuning (or more generally launching jobs and saving results) on a cluster using SLURM. Takes advantage of the fact that lots of jobs (including hyperparameter tuning) are embarrassingly parallel! With slune you can divide your compute into lots of separately scheduled jobs meaning that each small job can get running on your cluster more quickly, speeding up your workflow! Often significantly!

\n\n

Slune is super-easy to use! We have helper functions which can execute everything you need done for you. Letting you speed up your work without wasting time.

\n\n

Slune is barebones by design. This means that you can easily write code to integrate with slune if you want to do something a bit different! You can also workout what each function is doing pretty easily.

\n\n

Slune is flexible. In designing this package I've tried to make as few assumptions as possible meaning that it can be used for lots of stuff outside hyperparameter tuning! (or also within!) For example, you can get slune to give you paths for where to save things, submit lots of jobs in parallel for any sort of script and do grid search! and there's more to come!

\n\n

Usage

\n\n

Let's go through a quick example of how we can use slune ... first let's define a model that we want to train:

\n\n
\n
# Simple Regularized Linear Regression without using external libraries\n\n# Function to compute the mean of a list\ndef mean(values):\n    return sum(values) / float(len(values))\n\n# Function to compute the covariance between two lists\ndef covariance(x, mean_x, y, mean_y):\n    covar = 0.0\n    for i in range(len(x)):\n        covar += (x[i] - mean_x) * (y[i] - mean_y)\n    return covar\n\n# Function to compute the variance of a list\ndef variance(values, mean):\n    return sum((x - mean) ** 2 for x in values)\n\n# Function to compute coefficients for a simple regularized linear regression\ndef coefficients_regularized(x, y, alpha):\n    mean_x, mean_y = mean(x), mean(y)\n    var_x = variance(x, mean_x)\n    covar = covariance(x, mean_x, y, mean_y)\n    b1 = (covar + alpha * var_x) / (var_x + alpha)\n    b0 = mean_y - b1 * mean_x\n    return b0, b1\n\n# Function to make predictions with a simple regularized linear regression model\ndef linear_regression_regularized(train_X, train_y, test_X, alpha):\n    b0, b1 = coefficients_regularized(train_X, train_y, alpha)\n    predictions = [b0 + b1 * x for x in test_X]\n    return predictions\n\n# ------------------\n# The above is code for a simple normalized linear regression model that we want to train.\n# Now let's fit the model and use slune to save how well our model performs!\n# ------------------\n\nif __name__ == "__main__":\n    # First let's load in the value for the regularization parameter alpha that has been passed to this script from the command line. We will use the slune helper function lsargs to do this. \n    # lsargs returns a tuple of the python path and a list of arguments passed to the script. We can then use this to get the alpha value.\n    from slune import lsargs\n    python_path, args = lsargs()\n    alpha = float(args[0])\n\n    # Mock training dataset, function is y = 1 + 1 * x\n    X = [1, 2, 3, 4, 5]\n    y = [2, 3, 4, 5, 6]\n\n    # Mock test dataset\n    test_X = [6, 7, 8]\n    test_y = [7, 8, 9]\n    test_predictions = linear_regression_regularized(X, y, test_X, alpha)\n\n    # First let's load in a function that we can use to get a saver object that uses the default method of logging (we call this object a slog = saver + logger). The saving will be coordinated by a csv saver object which saves and reads results from csv files stored in a hierarchy of directories.\n    from slune import get_csv_slog\n    csv_slog = get_csv_slog(params = args)\n\n    # Let's now calculate the mean squared error of our predictions and log it!\n    mse = mean((test_y[i] - test_predictions[i])**2 for i in range(len(test_y)))\n    csv_slog.log({'mse': mse})\n\n    # Let's now save our logged results!\n    slog.save_collated()\n
\n
\n\n

Now let's write some code that will submit some jobs to train our model using different hyperparameters!!

\n\n
\n
# Let's now load in a function that will coordinate our search! We're going to do a grid search.\n# SearcherGrid is the class we can use to coordinate a grid search. We pass it a dictionary of hyperparameters and the values we want to try for each hyperparameter. We also pass it the number of runs we want to do for each combination of hyperparameters.\nfrom slune.searchers import SearcherGrid\ngrid_searcher = SearcherGrid({'alpha' : [0.25, 0.5, 0.75]}, runs = 1)\n\n# Let's now import a function which will submit a job for our model, the script_path specifies the path to the script that contains the model we want to train. The template_path specifies the path to the template script that we want to specify the job with, cargs is a list of constant arguments we want to pass to the script for each tuning. \n# We set slog to None as we don't want to not run jobs if we have already run them before.\nfrom slune import sbatchit\nscript_path = 'model.py'\ntemplate_path = 'template.sh'\nsbatchit(script_path, template_path, grid_searcher, cargs=[], slog=None)\n
\n
\n\n

Now we've submitted our jobs we will wait for them to finish \ud83d\udd5b\ud83d\udd50\ud83d\udd51\ud83d\udd52\ud83d\udd53\ud83d\udd54\ud83d\udd55\ud83d\udd56\ud83d\udd57\ud83d\udd58\ud83d\udd59\ud83d\udd5a\ud83d\udd5b, now that they are finished we can read the results!

\n\n
\n
from slune import get_csv_slog\ncsv_slog = get_csv_slog(params = None)\nparams, value = csv_slog.read(params = [], metric_name = 'mse', select_by ='min')\nprint(f'Best hyperparameters: {params}')\nprint(f'Their MSE: {value}')\n
\n
\n\n

Amazing! \ud83e\udd73 We have successfully used slune to train our model. I hope this gives you a good flavour of how you can use slune and how easy it is to use!

\n\n

Please check out the examples folder for notebooks detailing in more depth some potential ways you can use slune. The docs are not yet up and running \ud83d\ude22 but they are coming soon!

\n\n

Roadmap

\n\n
    \n
  • Make package user friendly:\n
      \n
    • Go through automation settings.
    • \n
    • Code of conduct.
    • \n
    • Contributing guidelines.
    • \n
    • Add to pypi.\nStill in early stages! First thing on the horizon is better integration with SLURM:
    • \n
  • \n
  • Set-up notifications for job completion, failure, etc.
  • \n
  • Auto job naming, job output naming and job output location saving.
  • \n
  • Auto save logged results when finishing a job.
  • \n
  • Automatically re-submit failed jobs.
  • \n
  • Tools for monitoring and cancelling jobs. \nThen it will be looking at adding more savers, loggers and searchers! For example integration with tensorboard, saving to one csv file (as opposed to a hierarchy of csv files in different directories) and different search methods like random search and cross validation. It would perhaps also be beneficial to be able to interface with other languages like R and Julia. Finally, more helper functions!
  • \n
\n\n

However, I am trying to keep this package as bloatless as possible to make it easy for you to tweak and configure to your individual needs. It's written in a simple and compartmentalized manner for this reason. You can of course use the helper functions and let slune handle everything under the hood, but, you can also very quickly and easily write your own classes to work with other savers, loggers and searchers to do as you please.

\n\n

Installation

\n\n

To install latest version use:

\n\n
\n
pip install slune-lib\n
\n
\n\n

To install latest dev version use (CURRENTLY RECOMENDED):

\n\n
\n
# With https\npip install "git+https://github.com/h-aze/slune.git#egg=slune-lib"\n
\n
\n\n

Class Design

\n\n

Here we will outline the different kind of classes that are used in slune and how they interact with each other. There are 3 types:

\n\n
    \n
  • 'Searcher' classes - these are the classes that are used to define and traverse a search space.
  • \n
  • 'Logger' classes - these are the classes that are used to create and read log files.
  • \n
  • 'Saver' classes - these are the classes that are used to save logs to files and read logs from files.
  • \n
\n\n

The base module is where the base classes for each of these types are defined. The base classes are:

\n\n
    \n
  • BaseSearcher
  • \n
  • BaseLogger
  • \n
  • BaseSaver
  • \n
\n\n

To create a new searcher, logger or saver, you must inherit from the appropriate base class and implement the required methods. The required methods will have the '@abc.abstractmethod' decorator above them and will throw errors if they are not implemented. The compulsory methods allow for well-defined interactions between the different classes and should allow for any combination of searcher, logger and saver to be used together.

\n\n

Please read the docs for the base classes to see what methods are required to be implemented and how they should be implemented.

\n"}, {"fullname": "src.slune", "modulename": "src.slune", "kind": "module", "doc": "

\n"}, {"fullname": "src.slune.base", "modulename": "src.slune.base", "kind": "module", "doc": "

\n"}, {"fullname": "src.slune.base.BaseSearcher", "modulename": "src.slune.base", "qualname": "BaseSearcher", "kind": "class", "doc": "

Base class for all Searchers.

\n\n

This must be subclassed to create different Searcher classes.\nPlease name your searcher class Searcher\nOutlines a protocol for creating a search space and creating configurations from it.\nMethods document what they should do once implemented.

\n"}, {"fullname": "src.slune.base.BaseSearcher.__init__", "modulename": "src.slune.base", "qualname": "BaseSearcher.__init__", "kind": "function", "doc": "

Initialises the searcher.

\n", "signature": "(*args, **kwargs)"}, {"fullname": "src.slune.base.BaseSearcher.next_tune", "modulename": "src.slune.base", "qualname": "BaseSearcher.next_tune", "kind": "function", "doc": "

Returns the next configuration to try.

\n", "signature": "(self, *args, **kwargs):", "funcdef": "def"}, {"fullname": "src.slune.base.BaseSearcher.check_existing_runs", "modulename": "src.slune.base", "qualname": "BaseSearcher.check_existing_runs", "kind": "function", "doc": "

Used to tell searcher to check if there are existing runs in storage.

\n\n

If there are existing runs, the searcher should skip them \nbased on the number of runs we would like for each job.\nThis may require a 'runs' attribute to be set in the searcher.\nIt will probably also require access to a Saver object,\nso we can use it's saving protocol to check if there are existing runs.\nIn this case is advised that this function takes a Saver object as an argument,\nand that the searcher is initialized with a 'runs' attribute.

\n", "signature": "(self, *args, **kwargs):", "funcdef": "def"}, {"fullname": "src.slune.base.BaseLogger", "modulename": "src.slune.base", "qualname": "BaseLogger", "kind": "class", "doc": "

Base class for all Loggers.

\n\n

This must be subclassed to implement different Logger classes.\nPlease name your logger class Logger.\nOutlines a protocol for logging metrics and reading from the logs.\nMethods document what they should do once implemented.

\n"}, {"fullname": "src.slune.base.BaseLogger.__init__", "modulename": "src.slune.base", "qualname": "BaseLogger.__init__", "kind": "function", "doc": "

Initialises the logger.

\n", "signature": "(*args, **kwargs)"}, {"fullname": "src.slune.base.BaseLogger.log", "modulename": "src.slune.base", "qualname": "BaseLogger.log", "kind": "function", "doc": "

Logs the metric/s for the current hyperparameter configuration.

\n\n

Should store metrics in some way so we can later save it using a Saver.

\n", "signature": "(self, *args, **kwargs):", "funcdef": "def"}, {"fullname": "src.slune.base.BaseLogger.read_log", "modulename": "src.slune.base", "qualname": "BaseLogger.read_log", "kind": "function", "doc": "

Returns value of a metric from the log based on a selection criteria.

\n", "signature": "(self, *args, **kwargs):", "funcdef": "def"}, {"fullname": "src.slune.base.BaseSaver", "modulename": "src.slune.base", "qualname": "BaseSaver", "kind": "class", "doc": "

Base class for all savers.

\n\n

This must be subclassed to implement different Saver classes.\nPlease name your saver class Saver.\nOutlines a protocol for saving and reading results to/from storage.\nMethods document what they should do once implemented.

\n"}, {"fullname": "src.slune.base.BaseSaver.__init__", "modulename": "src.slune.base", "qualname": "BaseSaver.__init__", "kind": "function", "doc": "

Initialises the saver.

\n\n

Assigns the logger instance to self.logger and makes its methods accessible through self.log and self.read_log.

\n\n
Arguments:
\n\n
    \n
  • - logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
  • \n
\n", "signature": "(logger_instance: src.slune.base.BaseLogger, *args, **kwargs)"}, {"fullname": "src.slune.base.BaseSaver.logger", "modulename": "src.slune.base", "qualname": "BaseSaver.logger", "kind": "variable", "doc": "

\n"}, {"fullname": "src.slune.base.BaseSaver.log", "modulename": "src.slune.base", "qualname": "BaseSaver.log", "kind": "variable", "doc": "

\n"}, {"fullname": "src.slune.base.BaseSaver.read_log", "modulename": "src.slune.base", "qualname": "BaseSaver.read_log", "kind": "variable", "doc": "

\n"}, {"fullname": "src.slune.base.BaseSaver.save_collated", "modulename": "src.slune.base", "qualname": "BaseSaver.save_collated", "kind": "function", "doc": "

Saves the current results in logger to storage.

\n", "signature": "(self, *args, **kwargs):", "funcdef": "def"}, {"fullname": "src.slune.base.BaseSaver.read", "modulename": "src.slune.base", "qualname": "BaseSaver.read", "kind": "function", "doc": "

Reads results from storage.

\n", "signature": "(self, *args, **kwargs):", "funcdef": "def"}, {"fullname": "src.slune.base.BaseSaver.exists", "modulename": "src.slune.base", "qualname": "BaseSaver.exists", "kind": "function", "doc": "

Checks if results already exist in storage.

\n\n

Should return integer indicating the number of runs that exist in storage for the given parameters.

\n", "signature": "(self, *args, **kwargs):", "funcdef": "def"}, {"fullname": "src.slune.loggers", "modulename": "src.slune.loggers", "kind": "module", "doc": "

\n"}, {"fullname": "src.slune.loggers.default", "modulename": "src.slune.loggers.default", "kind": "module", "doc": "

\n"}, {"fullname": "src.slune.loggers.default.LoggerDefault", "modulename": "src.slune.loggers.default", "qualname": "LoggerDefault", "kind": "class", "doc": "

Logs metric/s in a data frame.

\n\n

Stores the metric/s in a data frame that we can later save in storage.\nLogs by creating data frame out of the metrics and then appending it to the current results data frame.

\n\n
Attributes:
\n\n
    \n
  • - results (pd.DataFrame): Data frame containing all the metrics logged so far.\nEach row stores all the metrics that were given in a call to the 'log' method,\neach column title is a metric name.\nThe first column is always the time stamp at which 'log' is called.
  • \n
\n", "bases": "slune.base.BaseLogger"}, {"fullname": "src.slune.loggers.default.LoggerDefault.__init__", "modulename": "src.slune.loggers.default", "qualname": "LoggerDefault.__init__", "kind": "function", "doc": "

Initialises the logger.

\n", "signature": "(*args, **kwargs)"}, {"fullname": "src.slune.loggers.default.LoggerDefault.results", "modulename": "src.slune.loggers.default", "qualname": "LoggerDefault.results", "kind": "variable", "doc": "

\n"}, {"fullname": "src.slune.loggers.default.LoggerDefault.log", "modulename": "src.slune.loggers.default", "qualname": "LoggerDefault.log", "kind": "function", "doc": "

Logs the metric/s given.

\n\n

Stores them in a data frame that we can later save in storage.\nAll metrics provided will be saved as a row in the results data frame,\nthe first column is always the time stamp at which log is called.

\n\n
Arguments:
\n\n
    \n
  • - metrics (dict): Metrics to be logged, keys are metric names and values are metric values.\nEach metric should only have one value! So please log as soon as you get a metric.
  • \n
\n", "signature": "(self, metrics: dict):", "funcdef": "def"}, {"fullname": "src.slune.loggers.default.LoggerDefault.read_log", "modulename": "src.slune.loggers.default", "qualname": "LoggerDefault.read_log", "kind": "function", "doc": "

Reads log and returns value according to select_by.

\n\n

Reads the values for given metric for given log and chooses metric value to return based on select_by.

\n\n
Arguments:
\n\n
    \n
  • - data_frame (pd.DataFrame): Data frame containing the metric to be read.
  • \n
  • - metric_name (str): Name of the metric to be read.
  • \n
  • - select_by (str, optional): How to select the 'best' metric, currently can select by 'min' or 'max'.
  • \n
\n\n
Returns:
\n\n
\n
    \n
  • value (float): Minimum or maximum value of the metric.
  • \n
\n
\n\n

TODO: \n - Add more options for select_by.\n - Should be able to return other types than float?

\n", "signature": "(\tself,\tdata_frame: pandas.core.frame.DataFrame,\tmetric_name: str,\tselect_by: str = 'max') -> float:", "funcdef": "def"}, {"fullname": "src.slune.savers", "modulename": "src.slune.savers", "kind": "module", "doc": "

\n"}, {"fullname": "src.slune.savers.csv", "modulename": "src.slune.savers.csv", "kind": "module", "doc": "

\n"}, {"fullname": "src.slune.savers.csv.SaverCsv", "modulename": "src.slune.savers.csv", "qualname": "SaverCsv", "kind": "class", "doc": "

Saves the results of each run in a CSV file in hierarchy of directories.

\n\n

Each directory is named after a parameter - value pair in the form \"--parameter_name=value\".\nThe paths to csv files then define the configuration under which the results were obtained,\nfor example if we only have one parameter \"learning_rate\" with value 0.01 used to obtain the results,\nto save those results we would create a directory named \"--learning_rate=0.01\" and save the results in a csv file in that directory.

\n\n

If we have multiple parameters, for example \"learning_rate\" with value 0.01 and \"batch_size\" with value 32,\nwe would create a directory named \"--learning_rate=0.01\" with a subdirectory named \"--batch_size=32\",\nand save the results in a csv file in that subdirectory.

\n\n

We use this structure to then read the results from the csv files by searching for the directory that matches the parameters we want,\nand then reading the csv file in that directory.

\n\n

The order in which we create the directories is determined by the order in which the parameters are given,\nso if we are given [\"--learning_rate=0.01\", \"--batch_size=32\"] we would create the directories in the following order:\n\"--learning_rate=0.01/--batch_size=32\".

\n\n

The directory structure generated will also depend on existing directories in the root directory,\nif there are existing directories in the root directory that match some subset of the parameters given,\nwe will create the directory tree from the deepest matching directory.

\n\n

For example if we only have the following path in the root directory:\n\"--learning_rate=0.01/--batch_size=32\"\nand we are given the parameters [\"--learning_rate=0.01\", \"--batch_size=32\", \"--num_epochs=10\"],\nwe will create the path:\n\"--learning_rate=0.01/--batch_size=32/--num_epochs=10\".\non the other hand if we are given the parameters [\"--learning_rate=0.02\", \"--num_epochs=10\", \"--batch_size=32\"],\nwe will create the path:\n\"--learning_rate=0.02/--batch_size=32/--num_epochs=10\".

\n\n

Handles parallel runs trying to create the same directories by waiting a random time (under 1 second) before creating the directory.\nShould work pretty well in practice, however, may occasionally fail depending on the number of jobs launched at the same time.

\n\n
Attributes:
\n\n
    \n
  • - root_dir (str): Path to the root directory where we will store the csv files.
  • \n
  • - current_path (str): Path to the csv file where we will store the results for the current run.
  • \n
\n", "bases": "slune.base.BaseSaver"}, {"fullname": "src.slune.savers.csv.SaverCsv.__init__", "modulename": "src.slune.savers.csv", "qualname": "SaverCsv.__init__", "kind": "function", "doc": "

Initialises the csv saver.

\n\n
Arguments:
\n\n
    \n
  • - logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
  • \n
  • - params (list, optional): List of strings containing the parameters used, in form [\"--parameter_name=parameter_value\", ...], default is None.\nIf None, we will create a path using the parameters given in the log.
  • \n
  • - root_dir (str, optional): Path to the root directory where we will store the csv files, default is './tuning_results'.
  • \n
\n", "signature": "(\tlogger_instance: slune.base.BaseLogger,\tparams: List[str] = None,\troot_dir: Optional[str] = './tuning_results')"}, {"fullname": "src.slune.savers.csv.SaverCsv.root_dir", "modulename": "src.slune.savers.csv", "qualname": "SaverCsv.root_dir", "kind": "variable", "doc": "

\n"}, {"fullname": "src.slune.savers.csv.SaverCsv.strip_params", "modulename": "src.slune.savers.csv", "qualname": "SaverCsv.strip_params", "kind": "function", "doc": "

Strips the parameter values.

\n\n

Strips the parameter values from the list of parameters given,\nie. [\"--parameter_name=parameter_value\", ...] -> [\"--parameter_name=\", ...]

\n\n

Also gets rid of blank spaces.

\n\n
Arguments:
\n\n
    \n
  • - params (list of str): List of strings containing the parameters used, in form [\"--parameter_name=parameter_value\", ...].
  • \n
\n\n
Returns:
\n\n
\n
    \n
  • stripped_params (list of str): List of strings containing the parameters used, in form [\"--parameter_name=\", ...].
  • \n
\n
\n", "signature": "(self, params: List[str]) -> List[str]:", "funcdef": "def"}, {"fullname": "src.slune.savers.csv.SaverCsv.get_match", "modulename": "src.slune.savers.csv", "qualname": "SaverCsv.get_match", "kind": "function", "doc": "

Searches the root directory for a path that matches the parameters given.

\n\n

If only partial matches are found, returns the deepest matching directory with the missing parameters appended.\nBy deepest we mean the directory with the most parameters matching.\nIf no matches are found creates a path using the parameters.\nCreates path using parameters in the order they are given, \nie. [\"--learning_rate=0.01\", \"--batch_size=32\"] -> \"--learning_rate=0.01/--batch_size=32\".

\n\n

If we find a partial match, we add the missing parameters to the end of the path,\nie. if we have the path \"--learning_rate=0.01\" in the root \nand are given the parameters [\"--learning_rate=0.01\", \"--batch_size=32\"],\nwe will create the path \"--learning_rate=0.01/--batch_size=32\".

\n\n
Arguments:
\n\n
    \n
  • - params (list of str): List of strings containing the arguments used, in form [\"--argument_name=argument_value\", ...].
  • \n
\n\n
Returns:
\n\n
\n
    \n
  • match (str): Path to the directory that matches the parameters given.
  • \n
\n
\n", "signature": "(self, params: List[str]) -> str:", "funcdef": "def"}, {"fullname": "src.slune.savers.csv.SaverCsv.get_path", "modulename": "src.slune.savers.csv", "qualname": "SaverCsv.get_path", "kind": "function", "doc": "

Creates a path using the parameters.

\n\n

Does this by first checking for existing paths in the root directory that match the parameters given.

\n\n

Check get_match for how we create the path, \nonce we have the path we check if there is already a csv file with results in that path,\nif there is we increment the number of the results file name that we will use.

\n\n

For example if we get back the path \"--learning_rate=0.01/--batch_size=32\",\nand there exists a csv file named \"results_0.csv\" in the final directory,\nwe will name our csv file \"results_1.csv\".

\n\n
Arguments:
\n\n
    \n
  • - params (list of str): List of strings containing the arguments used, in form [\"--argument_name=argument_value\", ...].
  • \n
\n\n
Returns:
\n\n
\n
    \n
  • csv_file_path (str): Path to the csv file where we will store the results for the current run.
  • \n
\n
\n", "signature": "(self, params: List[str]) -> str:", "funcdef": "def"}, {"fullname": "src.slune.savers.csv.SaverCsv.save_collated_from_results", "modulename": "src.slune.savers.csv", "qualname": "SaverCsv.save_collated_from_results", "kind": "function", "doc": "

Saves results to csv file.

\n\n

If the csv file already exists, \nwe append the collated results from the logger to the end of the csv file.\nIf the csv file does not exist,\nwe create it and save the results to it.

\n\n
Arguments:
\n\n
    \n
  • - results (pd.DataFrame): Data frame containing the results to be saved.
  • \n
\n\n

TODO: \n - Could be making to many assumptions about the format in which we get the results from the logger,\n should be able to work with any logger.\n We should only be assuming that we are saving results to a csv file.

\n", "signature": "(self, results: pandas.core.frame.DataFrame):", "funcdef": "def"}, {"fullname": "src.slune.savers.csv.SaverCsv.save_collated", "modulename": "src.slune.savers.csv", "qualname": "SaverCsv.save_collated", "kind": "function", "doc": "

Saves results to csv file.

\n", "signature": "(self):", "funcdef": "def"}, {"fullname": "src.slune.savers.csv.SaverCsv.read", "modulename": "src.slune.savers.csv", "qualname": "SaverCsv.read", "kind": "function", "doc": "

Finds the min/max value of a metric from all csv files in the root directory that match the parameters given.

\n\n
Arguments:
\n\n
    \n
  • - params (list of str): Contains the parameters used, in form [\"--parameter_name=parameter_value\", ...].
  • \n
  • - metric_name (string): Name of the metric to be read.
  • \n
  • - select_by (string, optional): How to select the 'best' value for the metric from a log file, currently can select by 'min' or 'max'.
  • \n
  • - avg (bool, optional): Whether to average the metric over all runs, default is True.
  • \n
\n\n
Returns:
\n\n
\n
    \n
  • best_params (list of str): Contains the arguments used to get the 'best' value of the metric (determined by select_by).
  • \n
  • best_value (float): Best value of the metric (determined by select_by).
  • \n
\n
\n", "signature": "(\tself,\tparams: List[str],\tmetric_name: str,\tselect_by: str = 'max',\tavg: bool = True) -> (typing.List[str], <class 'float'>):", "funcdef": "def"}, {"fullname": "src.slune.savers.csv.SaverCsv.exists", "modulename": "src.slune.savers.csv", "qualname": "SaverCsv.exists", "kind": "function", "doc": "

Checks if results already exist in storage.

\n\n
Arguments:
\n\n
    \n
  • - params (list of str): Contains the parameters used, in form [\"--parameter_name=parameter_value\", ...].
  • \n
\n\n
Returns:
\n\n
\n
    \n
  • num_runs (int): Number of runs that exist in storage for the given parameters.
  • \n
\n
\n", "signature": "(self, params: List[str]) -> int:", "funcdef": "def"}, {"fullname": "src.slune.savers.csv.SaverCsv.get_current_path", "modulename": "src.slune.savers.csv", "qualname": "SaverCsv.get_current_path", "kind": "function", "doc": "

Getter function for the current_path attribute.

\n\n
Returns:
\n\n
\n
    \n
  • current_path (str): Path to the csv file where we will store the results for the current run.
  • \n
\n
\n", "signature": "(self) -> str:", "funcdef": "def"}, {"fullname": "src.slune.searchers", "modulename": "src.slune.searchers", "kind": "module", "doc": "

\n"}, {"fullname": "src.slune.searchers.grid", "modulename": "src.slune.searchers.grid", "kind": "module", "doc": "

\n"}, {"fullname": "src.slune.searchers.grid.SearcherGrid", "modulename": "src.slune.searchers.grid", "qualname": "SearcherGrid", "kind": "class", "doc": "

Searcher for grid search.

\n\n

Given dictionary of parameters and values to try, creates grid of all possible configurations,\nand returns them one by one for each call to next_tune.

\n\n
Attributes:
\n\n
    \n
  • - configs (dict): Parameters and values to create grid from.\nStructure of dictionary should be: { \"--parameter_name\" : [Value_1, Value_2, ...], ... }
  • \n
  • - runs (int): Controls search based on number of runs we want for each config.\nif runs > 0 -> run each config 'runs' times.\nif runs = 0 -> run each config once even if it already exists.\nThis behavior is modified if we want to (use) check_existing_runs, see methods description.
  • \n
  • - grid (list of dict): List of dictionaries, each containing one combination of argument values.
  • \n
  • - grid_index (int): Index of the current configuration in the grid.
  • \n
  • - saver_exists (function): Pointer to the savers exists method, used to check if there are existing runs.
  • \n
\n", "bases": "slune.base.BaseSearcher"}, {"fullname": "src.slune.searchers.grid.SearcherGrid.__init__", "modulename": "src.slune.searchers.grid", "qualname": "SearcherGrid.__init__", "kind": "function", "doc": "

Initialises the searcher.

\n\n
Arguments:
\n\n
    \n
  • - configs (dict): Dictionary of parameters and values to try.\nStructure of dictionary should be: { \"--parameter_name\" : [Value_1, Value_2, ...], ... }
  • \n
  • - runs (int, optional): Controls search based on number of runs we want for each config.\nif runs > 0 -> run each config 'runs' times.\nif runs = 0 -> run each config once even if it already exists.\nThis behavior is modified if we want to (use) check_existing_runs, see methods description.
  • \n
\n", "signature": "(configs: dict, runs: int = 0)"}, {"fullname": "src.slune.searchers.grid.SearcherGrid.runs", "modulename": "src.slune.searchers.grid", "qualname": "SearcherGrid.runs", "kind": "variable", "doc": "

\n"}, {"fullname": "src.slune.searchers.grid.SearcherGrid.configs", "modulename": "src.slune.searchers.grid", "qualname": "SearcherGrid.configs", "kind": "variable", "doc": "

\n"}, {"fullname": "src.slune.searchers.grid.SearcherGrid.grid", "modulename": "src.slune.searchers.grid", "qualname": "SearcherGrid.grid", "kind": "variable", "doc": "

\n"}, {"fullname": "src.slune.searchers.grid.SearcherGrid.grid_index", "modulename": "src.slune.searchers.grid", "qualname": "SearcherGrid.grid_index", "kind": "variable", "doc": "

\n"}, {"fullname": "src.slune.searchers.grid.SearcherGrid.saver_exists", "modulename": "src.slune.searchers.grid", "qualname": "SearcherGrid.saver_exists", "kind": "variable", "doc": "

\n"}, {"fullname": "src.slune.searchers.grid.SearcherGrid.get_grid", "modulename": "src.slune.searchers.grid", "qualname": "SearcherGrid.get_grid", "kind": "function", "doc": "

Creates search grid.

\n\n

Generates all possible combinations of values for each argument in the given dictionary using recursion.

\n\n
Arguments:
\n\n
    \n
  • - param_dict (dict): A dictionary where keys are argument names and values are lists of values.
  • \n
\n\n
Returns:
\n\n
\n
    \n
  • all_combinations (list): A list of dictionaries, each containing one combination of argument values.
  • \n
\n
\n", "signature": "(self, param_dict: dict) -> List:", "funcdef": "def"}, {"fullname": "src.slune.searchers.grid.SearcherGrid.check_existing_runs", "modulename": "src.slune.searchers.grid", "qualname": "SearcherGrid.check_existing_runs", "kind": "function", "doc": "

We save a pointer to the savers exists method to check if there are existing runs.

\n\n
If there are n existing runs:
\n\n
\n

n < runs -> run the remaining runs\n n >= runs -> skip all runs

\n
\n\n
Arguments:
\n\n
    \n
  • - saver (BaseSaver): Pointer to the savers exists method, used to check if there are existing runs.
  • \n
\n", "signature": "(self, saver: slune.base.BaseSaver):", "funcdef": "def"}, {"fullname": "src.slune.searchers.grid.SearcherGrid.skip_existing_runs", "modulename": "src.slune.searchers.grid", "qualname": "SearcherGrid.skip_existing_runs", "kind": "function", "doc": "

Skips runs if they are in storage already.

\n\n

Will check if there are existing runs for the current configuration,\nif there are existing runs we tally them up \nand skip configs or runs of a config based on the number of runs we want for each config.

\n\n
Arguments:
\n\n
    \n
  • - grid_index (int): Index of the current configuration in the grid.
  • \n
\n\n
Returns:
\n\n
\n
    \n
  • grid_index (int): Index of the next configuration in the grid.
  • \n
  • run_index (int): Index of the next run for the current configuration.
  • \n
\n
\n", "signature": "(self, grid_index: int) -> Tuple[int, int]:", "funcdef": "def"}, {"fullname": "src.slune.searchers.grid.SearcherGrid.next_tune", "modulename": "src.slune.searchers.grid", "qualname": "SearcherGrid.next_tune", "kind": "function", "doc": "

Returns the next configuration to try.

\n\n

Will skip existing runs if check_existing_runs has been called.\nFor more information on how this works check the methods descriptions for check_existing_runs and skip_existing_runs.\nWill raise an error if we have reached the end of the grid.\nTo iterate through all configurations, use a for loop like so: \n for config in searcher: ...

\n\n
Returns:
\n\n
\n
    \n
  • next_config (dict): The next configuration to try.
  • \n
\n
\n", "signature": "(self) -> dict:", "funcdef": "def"}, {"fullname": "src.slune.slune", "modulename": "src.slune.slune", "kind": "module", "doc": "

\n"}, {"fullname": "src.slune.slune.submit_job", "modulename": "src.slune.slune", "qualname": "submit_job", "kind": "function", "doc": "

Submits a job using specified Bash script

\n\n
Arguments:
\n\n
    \n
  • - sh_path (string): Path to the Bash script to be run.
  • \n
  • - args (list of str): List of strings containing the arguments to be passed to the Bash script.
  • \n
\n", "signature": "(sh_path: str, args: List[str]):", "funcdef": "def"}, {"fullname": "src.slune.slune.sbatchit", "modulename": "src.slune.slune", "qualname": "sbatchit", "kind": "function", "doc": "

Submits jobs based on arguments given by searcher.

\n\n

For each job runs the script stored at script_path with selected parameter values given by searcher\nand the arguments given by cargs.

\n\n

Uses the sbatch script with path sbatch_path to submit each job to the cluster.

\n\n

If given a Saver object, uses it to check if there are existing runs for each job and skips them,\nbased on the number of runs we would like for each job (which is stored in the saver).

\n\n
Arguments:
\n\n
    \n
  • - script_path (str): Path to the script (of the model) to be run for each job.
  • \n
  • - sbatch_path (str): Path to the sbatch script that will be used to submit each job.\nExamples of sbatch scripts can be found in the templates folder.
  • \n
  • - searcher (Searcher): Searcher object used to retrieve changing arguments for each job.
  • \n
  • - cargs (list, optional): Contains arguments to be passed to the script for every job.
  • \n
  • - saver (Saver, optional): Saver object used if we want to check if there are existing runs so we don't rerun.\nCan simply not give a Saver object if you want to rerun all jobs.
  • \n
\n", "signature": "(\tscript_path: str,\tsbatch_path: str,\tsearcher: slune.base.BaseSearcher,\tcargs: Optional[List] = [],\tsaver: Optional[slune.base.BaseSaver] = None):", "funcdef": "def"}, {"fullname": "src.slune.slune.lsargs", "modulename": "src.slune.slune", "qualname": "lsargs", "kind": "function", "doc": "

Returns the script name and a list of the arguments passed to the script.

\n", "signature": "() -> (<class 'str'>, typing.List[str]):", "funcdef": "def"}, {"fullname": "src.slune.slune.garg", "modulename": "src.slune.slune", "qualname": "garg", "kind": "function", "doc": "

Finds the argument/s with name arg_names in the list of arguments args_ls and returns its value/s.

\n\n
Arguments:
\n\n
    \n
  • - args (list of str): List of strings containing the arguments to be searched.
  • \n
  • - arg_names (str or list of str): String or list of strings containing the names of the arguments to be searched for.
  • \n
\n\n
Returns:
\n\n
\n
    \n
  • arg_value (str or list of str): String or list of strings containing the values of the arguments found.
  • \n
\n
\n", "signature": "(\targs: List[str],\targ_names: Union[str, List[str]]) -> Union[str, List[str]]:", "funcdef": "def"}, {"fullname": "src.slune.slune.get_csv_slog", "modulename": "src.slune.slune", "qualname": "get_csv_slog", "kind": "function", "doc": "

Returns a SaverCsv object with the given parameters and root directory.

\n\n
Arguments:
\n\n
    \n
  • - params (dict, optional): Dictionary of parameters to be passed to the SaverCsv object, default is None.
  • \n
  • - root_dir (str, optional): Path to the root directory to be used by the SaverCsv object, default is 'slune_results'.
  • \n
\n\n
Returns:
\n\n
\n
    \n
  • SaverCsv (Saver): Saver object with the given parameters and root directory.\n Initialized with a LoggerDefault object as its logger.
  • \n
\n
\n", "signature": "(\tparams: Optional[dict] = None,\troot_dir: Optional[str] = 'slune_results') -> slune.base.BaseSaver:", "funcdef": "def"}, {"fullname": "src.slune.utils", "modulename": "src.slune.utils", "kind": "module", "doc": "

\n"}, {"fullname": "src.slune.utils.find_directory_path", "modulename": "src.slune.utils", "qualname": "find_directory_path", "kind": "function", "doc": "

Searches the root directory for a path of directories that matches the strings given in any order.\nIf only a partial match is found, returns the deepest matching path.\nIf no matches are found returns root_directory.\nReturns a stripped matching path of directories, ie. where we convert '--string=value' to '--string='.

\n\n
Arguments:
\n\n
    \n
  • - strings (list of str): List of strings to be matched in any order. Each string in list must be in the form '--string='.
  • \n
  • - root_directory (string, optional): Path to the root directory to be searched, default is current working directory.
  • \n
\n\n
Returns:
\n\n
\n
    \n
  • max_depth (int): Depth of the deepest matching path.
  • \n
  • max_path (string): Path of the deepest matching path.
  • \n
\n
\n", "signature": "(\tstrings: List[str],\troot_directory: Optional[str] = '.') -> Tuple[int, str]:", "funcdef": "def"}, {"fullname": "src.slune.utils.get_numeric_equiv", "modulename": "src.slune.utils", "qualname": "get_numeric_equiv", "kind": "function", "doc": "

Replaces directories in path with existing directories with the same numerical value.

\n\n
Arguments:
\n\n
    \n
  • - og_path (str): Path we want to check against existing paths, must be a subdirectory of root_directory and each directory must have form '--string=value'.
  • \n
  • - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
  • \n
\n\n
Returns:
\n\n
\n
    \n
  • equiv (str): Path with values changed to match existing directories if values are numerically equivalent, with root directory at beginning.
  • \n
\n
\n", "signature": "(og_path: str, root_directory: Optional[str] = '.') -> str:", "funcdef": "def"}, {"fullname": "src.slune.utils.dict_to_strings", "modulename": "src.slune.utils", "qualname": "dict_to_strings", "kind": "function", "doc": "

Converts a dictionary into a list of strings in the form of '--key=value'.

\n\n
Arguments:
\n\n
    \n
  • - d (dict): Dictionary to be converted.
  • \n
\n\n
Returns:
\n\n
\n
    \n
  • out (list of str): List of strings in the form of '--key=value'.
  • \n
\n
\n", "signature": "(d: dict) -> List[str]:", "funcdef": "def"}, {"fullname": "src.slune.utils.find_csv_files", "modulename": "src.slune.utils", "qualname": "find_csv_files", "kind": "function", "doc": "

Recursively finds all csv files in all subdirectories of the root directory and returns their paths.

\n\n
Arguments:
\n\n
    \n
  • - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
  • \n
\n\n
Returns:
\n\n
\n
    \n
  • csv_files (list of str): List of strings containing the paths to all csv files found.
  • \n
\n
\n", "signature": "(root_directory: Optional[str] = '.') -> List[str]:", "funcdef": "def"}, {"fullname": "src.slune.utils.get_all_paths", "modulename": "src.slune.utils", "qualname": "get_all_paths", "kind": "function", "doc": "

Find all possible paths of csv files that have directory matching one of each of all the parameters given.

\n\n

Finds all paths of csv files in all subdirectories of the root directory that have a directory in their path matching one of each of all the parameters given.

\n\n
Arguments:
\n\n
    \n
  • - dirs (list of str): List of directory names we want returned paths to have in their path.
  • \n
  • - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
  • \n
\n\n
Returns:
\n\n
\n
    \n
  • matches (list of str): List of strings containing the paths to all csv files found.
  • \n
\n
\n", "signature": "(dirs: List[str], root_directory: Optional[str] = '.') -> List[str]:", "funcdef": "def"}]; + + // mirrored in build-search-index.js (part 1) + // Also split on html tags. this is a cheap heuristic, but good enough. + elasticlunr.tokenizer.setSeperator(/[\s\-.;&_'"=,()]+|<[^>]*>/); + + let searchIndex; + if (docs._isPrebuiltIndex) { + console.info("using precompiled search index"); + searchIndex = elasticlunr.Index.load(docs); + } else { + console.time("building search index"); + // mirrored in build-search-index.js (part 2) + searchIndex = elasticlunr(function () { + this.pipeline.remove(elasticlunr.stemmer); + this.pipeline.remove(elasticlunr.stopWordFilter); + this.addField("qualname"); + this.addField("fullname"); + this.addField("annotation"); + this.addField("default_value"); + this.addField("signature"); + this.addField("bases"); + this.addField("doc"); + this.setRef("fullname"); + }); + for (let doc of docs) { + searchIndex.addDoc(doc); + } + console.timeEnd("building search index"); + } + + return (term) => searchIndex.search(term, { + fields: { + qualname: {boost: 4}, + fullname: {boost: 2}, + annotation: {boost: 2}, + default_value: {boost: 2}, + signature: {boost: 2}, + bases: {boost: 2}, + doc: {boost: 1}, + }, + expand: true + }); +})(); \ No newline at end of file diff --git a/docs/.html/src.html b/docs/.html/src.html new file mode 100644 index 0000000..81d7a74 --- /dev/null +++ b/docs/.html/src.html @@ -0,0 +1,432 @@ + + + + + + + src API documentation + + + + + + + + + +
+
+

+src

+ +

PyPI - Version +license +badge

+ +

badge +badge +badge

+ +

slune (= slurm + tune!)

+ +

A super simplistic python package for performing hyperparameter tuning (or more generally launching jobs and saving results) on a cluster using SLURM. Takes advantage of the fact that lots of jobs (including hyperparameter tuning) are embarrassingly parallel! With slune you can divide your compute into lots of separately scheduled jobs meaning that each small job can get running on your cluster more quickly, speeding up your workflow! Often significantly!

+ +

Slune is super-easy to use! We have helper functions which can execute everything you need done for you. Letting you speed up your work without wasting time.

+ +

Slune is barebones by design. This means that you can easily write code to integrate with slune if you want to do something a bit different! You can also workout what each function is doing pretty easily.

+ +

Slune is flexible. In designing this package I've tried to make as few assumptions as possible meaning that it can be used for lots of stuff outside hyperparameter tuning! (or also within!) For example, you can get slune to give you paths for where to save things, submit lots of jobs in parallel for any sort of script and do grid search! and there's more to come!

+ +

Usage

+ +

Let's go through a quick example of how we can use slune ... first let's define a model that we want to train:

+ +
+
# Simple Regularized Linear Regression without using external libraries
+
+# Function to compute the mean of a list
+def mean(values):
+    return sum(values) / float(len(values))
+
+# Function to compute the covariance between two lists
+def covariance(x, mean_x, y, mean_y):
+    covar = 0.0
+    for i in range(len(x)):
+        covar += (x[i] - mean_x) * (y[i] - mean_y)
+    return covar
+
+# Function to compute the variance of a list
+def variance(values, mean):
+    return sum((x - mean) ** 2 for x in values)
+
+# Function to compute coefficients for a simple regularized linear regression
+def coefficients_regularized(x, y, alpha):
+    mean_x, mean_y = mean(x), mean(y)
+    var_x = variance(x, mean_x)
+    covar = covariance(x, mean_x, y, mean_y)
+    b1 = (covar + alpha * var_x) / (var_x + alpha)
+    b0 = mean_y - b1 * mean_x
+    return b0, b1
+
+# Function to make predictions with a simple regularized linear regression model
+def linear_regression_regularized(train_X, train_y, test_X, alpha):
+    b0, b1 = coefficients_regularized(train_X, train_y, alpha)
+    predictions = [b0 + b1 * x for x in test_X]
+    return predictions
+
+# ------------------
+# The above is code for a simple normalized linear regression model that we want to train.
+# Now let's fit the model and use slune to save how well our model performs!
+# ------------------
+
+if __name__ == "__main__":
+    # First let's load in the value for the regularization parameter alpha that has been passed to this script from the command line. We will use the slune helper function lsargs to do this. 
+    # lsargs returns a tuple of the python path and a list of arguments passed to the script. We can then use this to get the alpha value.
+    from slune import lsargs
+    python_path, args = lsargs()
+    alpha = float(args[0])
+
+    # Mock training dataset, function is y = 1 + 1 * x
+    X = [1, 2, 3, 4, 5]
+    y = [2, 3, 4, 5, 6]
+
+    # Mock test dataset
+    test_X = [6, 7, 8]
+    test_y = [7, 8, 9]
+    test_predictions = linear_regression_regularized(X, y, test_X, alpha)
+
+    # First let's load in a function that we can use to get a saver object that uses the default method of logging (we call this object a slog = saver + logger). The saving will be coordinated by a csv saver object which saves and reads results from csv files stored in a hierarchy of directories.
+    from slune import get_csv_slog
+    csv_slog = get_csv_slog(params = args)
+
+    # Let's now calculate the mean squared error of our predictions and log it!
+    mse = mean((test_y[i] - test_predictions[i])**2 for i in range(len(test_y)))
+    csv_slog.log({'mse': mse})
+
+    # Let's now save our logged results!
+    slog.save_collated()
+
+
+ +

Now let's write some code that will submit some jobs to train our model using different hyperparameters!!

+ +
+
# Let's now load in a function that will coordinate our search! We're going to do a grid search.
+# SearcherGrid is the class we can use to coordinate a grid search. We pass it a dictionary of hyperparameters and the values we want to try for each hyperparameter. We also pass it the number of runs we want to do for each combination of hyperparameters.
+from slune.searchers import SearcherGrid
+grid_searcher = SearcherGrid({'alpha' : [0.25, 0.5, 0.75]}, runs = 1)
+
+# Let's now import a function which will submit a job for our model, the script_path specifies the path to the script that contains the model we want to train. The template_path specifies the path to the template script that we want to specify the job with, cargs is a list of constant arguments we want to pass to the script for each tuning. 
+# We set slog to None as we don't want to not run jobs if we have already run them before.
+from slune import sbatchit
+script_path = 'model.py'
+template_path = 'template.sh'
+sbatchit(script_path, template_path, grid_searcher, cargs=[], slog=None)
+
+
+ +

Now we've submitted our jobs we will wait for them to finish 🕛🕐🕑🕒🕓🕔🕕🕖🕗🕘🕙🕚🕛, now that they are finished we can read the results!

+ +
+
from slune import get_csv_slog
+csv_slog = get_csv_slog(params = None)
+params, value = csv_slog.read(params = [], metric_name = 'mse', select_by ='min')
+print(f'Best hyperparameters: {params}')
+print(f'Their MSE: {value}')
+
+
+ +

Amazing! 🥳 We have successfully used slune to train our model. I hope this gives you a good flavour of how you can use slune and how easy it is to use!

+ +

Please check out the examples folder for notebooks detailing in more depth some potential ways you can use slune. The docs are not yet up and running 😢 but they are coming soon!

+ +

Roadmap

+ +
    +
  • Make package user friendly: +
      +
    • Go through automation settings.
    • +
    • Code of conduct.
    • +
    • Contributing guidelines.
    • +
    • Add to pypi. +Still in early stages! First thing on the horizon is better integration with SLURM:
    • +
  • +
  • Set-up notifications for job completion, failure, etc.
  • +
  • Auto job naming, job output naming and job output location saving.
  • +
  • Auto save logged results when finishing a job.
  • +
  • Automatically re-submit failed jobs.
  • +
  • Tools for monitoring and cancelling jobs. +Then it will be looking at adding more savers, loggers and searchers! For example integration with tensorboard, saving to one csv file (as opposed to a hierarchy of csv files in different directories) and different search methods like random search and cross validation. It would perhaps also be beneficial to be able to interface with other languages like R and Julia. Finally, more helper functions!
  • +
+ +

However, I am trying to keep this package as bloatless as possible to make it easy for you to tweak and configure to your individual needs. It's written in a simple and compartmentalized manner for this reason. You can of course use the helper functions and let slune handle everything under the hood, but, you can also very quickly and easily write your own classes to work with other savers, loggers and searchers to do as you please.

+ +

Installation

+ +

To install latest version use:

+ +
+
pip install slune-lib
+
+
+ +

To install latest dev version use (CURRENTLY RECOMENDED):

+ +
+
# With https
+pip install "git+https://github.com/h-aze/slune.git#egg=slune-lib"
+
+
+ +

Class Design

+ +

Here we will outline the different kind of classes that are used in slune and how they interact with each other. There are 3 types:

+ +
    +
  • 'Searcher' classes - these are the classes that are used to define and traverse a search space.
  • +
  • 'Logger' classes - these are the classes that are used to create and read log files.
  • +
  • 'Saver' classes - these are the classes that are used to save logs to files and read logs from files.
  • +
+ +

The base module is where the base classes for each of these types are defined. The base classes are:

+ +
    +
  • BaseSearcher
  • +
  • BaseLogger
  • +
  • BaseSaver
  • +
+ +

To create a new searcher, logger or saver, you must inherit from the appropriate base class and implement the required methods. The required methods will have the '@abc.abstractmethod' decorator above them and will throw errors if they are not implemented. The compulsory methods allow for well-defined interactions between the different classes and should allow for any combination of searcher, logger and saver to be used together.

+ +

Please read the docs for the base classes to see what methods are required to be implemented and how they should be implemented.

+
+ + + + + +
1"""
+2.. include:: ../README.md
+3.. include:: ../CLASSDESIGN.md
+4"""
+
+ + +
+
+ + \ No newline at end of file diff --git a/docs/.html/src/slune.html b/docs/.html/src/slune.html new file mode 100644 index 0000000..43ca94f --- /dev/null +++ b/docs/.html/src/slune.html @@ -0,0 +1,264 @@ + + + + + + + src.slune API documentation + + + + + + + + + +
+
+

+src.slune

+ + + + + + +
 1# from .slune import submit_job, sbatchit
+ 2# __all__ = ['slune', 'base', 'utils', 'loggers', 'savers', 'searchers' ]
+ 3
+ 4from .searchers import *
+ 5from .savers import *
+ 6from .loggers import *
+ 7from .slune import submit_job, sbatchit, lsargs, garg, get_csv_slog
+ 8from . import base, utils
+ 9
+10# __all__ = ['submit_job', 'sbatchit', 'lsargs', 'garg', 'get_csv_slog',
+11        #    'base', 'utils', 'default', 'grid', 'csv']
+12
+13import importlib.metadata
+14__version__ = importlib.metadata.version("slune-lib")
+
+ + +
+
+ + \ No newline at end of file diff --git a/docs/.html/src/slune/base.html b/docs/.html/src/slune/base.html new file mode 100644 index 0000000..43a847e --- /dev/null +++ b/docs/.html/src/slune/base.html @@ -0,0 +1,982 @@ + + + + + + + src.slune.base API documentation + + + + + + + + + +
+
+

+src.slune.base

+ + + + + + +
  1import abc 
+  2
+  3class BaseSearcher(metaclass=abc.ABCMeta):
+  4    """ Base class for all Searchers. 
+  5    
+  6    This must be subclassed to create different Searcher classes.
+  7    Please name your searcher class Searcher<SearcherName>
+  8    Outlines a protocol for creating a search space and creating configurations from it.
+  9    Methods document what they should do once implemented. 
+ 10
+ 11    """
+ 12    @abc.abstractmethod
+ 13    def __init__(self, *args, **kwargs):
+ 14        """ Initialises the searcher. """
+ 15
+ 16        pass
+ 17    
+ 18    @abc.abstractmethod
+ 19    def __len__(self, *args, **kwargs):
+ 20        """ Returns the number of configurations defined by the search space of the searcher. """
+ 21
+ 22        pass
+ 23    
+ 24    @abc.abstractmethod
+ 25    def next_tune(self, *args, **kwargs):
+ 26        """ Returns the next configuration to try. """
+ 27
+ 28        pass
+ 29
+ 30    @abc.abstractmethod
+ 31    def check_existing_runs(self, *args, **kwargs):
+ 32        """ Used to tell searcher to check if there are existing runs in storage.
+ 33
+ 34        If there are existing runs, the searcher should skip them 
+ 35        based on the number of runs we would like for each job.
+ 36        This may require a 'runs' attribute to be set in the searcher.
+ 37        It will probably also require access to a Saver object,
+ 38        so we can use it's saving protocol to check if there are existing runs.
+ 39        In this case is advised that this function takes a Saver object as an argument,
+ 40        and that the searcher is initialized with a 'runs' attribute.
+ 41
+ 42        """
+ 43
+ 44        pass
+ 45
+ 46    def __iter__(self):
+ 47        """ Makes the searcher iterable, so we can use it in a for loop.
+ 48        
+ 49        Feel free to override this method if needed.
+ 50
+ 51        """
+ 52
+ 53        return self
+ 54    
+ 55    def __next__(self):
+ 56        """ Makes the searcher iterable, so we can use it in a for loop.
+ 57
+ 58        Feel free to override this method if needed.
+ 59
+ 60        """
+ 61
+ 62        try:
+ 63            return self.next_tune()
+ 64        except:
+ 65            raise StopIteration
+ 66
+ 67class BaseLogger(metaclass=abc.ABCMeta):
+ 68    """ Base class for all Loggers. 
+ 69    
+ 70    This must be subclassed to implement different Logger classes.
+ 71    Please name your logger class Logger<LoggerName>.
+ 72    Outlines a protocol for logging metrics and reading from the logs.
+ 73    Methods document what they should do once implemented. 
+ 74
+ 75    """
+ 76    @abc.abstractmethod
+ 77    def __init__(self, *args, **kwargs):
+ 78        """ Initialises the logger. """
+ 79
+ 80        pass
+ 81    
+ 82    @abc.abstractmethod
+ 83    def log(self, *args, **kwargs):
+ 84        """ Logs the metric/s for the current hyperparameter configuration.
+ 85
+ 86        Should store metrics in some way so we can later save it using a Saver.
+ 87
+ 88        """
+ 89
+ 90        pass
+ 91    
+ 92    @abc.abstractmethod
+ 93    def read_log(self, *args, **kwargs):
+ 94        """ Returns value of a metric from the log based on a selection criteria. """
+ 95
+ 96        pass
+ 97
+ 98class BaseSaver(metaclass=abc.ABCMeta):
+ 99    """ Base class for all savers. 
+100    
+101    This must be subclassed to implement different Saver classes.
+102    Please name your saver class Saver<SaverName>.
+103    Outlines a protocol for saving and reading results to/from storage.
+104    Methods document what they should do once implemented. 
+105
+106    """
+107
+108    @abc.abstractmethod
+109    def __init__(self, logger_instance: BaseLogger, *args, **kwargs):
+110        """ Initialises the saver.
+111
+112        Assigns the logger instance to self.logger and makes its methods accessible through self.log and self.read_log.
+113
+114        Args:
+115            - logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
+116        
+117        """
+118
+119        # Given a class that inherits from BaseLogger we make it accessible through self.logger and make its methods accessible through self.log and self.read_log
+120        self.logger = logger_instance
+121        self.log = self.logger.log
+122        self.read_log = self.logger.read_log
+123
+124    @abc.abstractmethod
+125    def save_collated(self, *args, **kwargs):
+126        """ Saves the current results in logger to storage. """
+127
+128        pass
+129    
+130    @abc.abstractmethod
+131    def read(self, *args, **kwargs):
+132        """ Reads results from storage. """
+133
+134        pass
+135
+136    @abc.abstractmethod
+137    def exists(self, *args, **kwargs):
+138        """ Checks if results already exist in storage.
+139         
+140        Should return integer indicating the number of runs that exist in storage for the given parameters. 
+141
+142        """
+143
+144        pass
+
+ + +
+
+ +
+ + class + BaseSearcher: + + + +
+ +
 4class BaseSearcher(metaclass=abc.ABCMeta):
+ 5    """ Base class for all Searchers. 
+ 6    
+ 7    This must be subclassed to create different Searcher classes.
+ 8    Please name your searcher class Searcher<SearcherName>
+ 9    Outlines a protocol for creating a search space and creating configurations from it.
+10    Methods document what they should do once implemented. 
+11
+12    """
+13    @abc.abstractmethod
+14    def __init__(self, *args, **kwargs):
+15        """ Initialises the searcher. """
+16
+17        pass
+18    
+19    @abc.abstractmethod
+20    def __len__(self, *args, **kwargs):
+21        """ Returns the number of configurations defined by the search space of the searcher. """
+22
+23        pass
+24    
+25    @abc.abstractmethod
+26    def next_tune(self, *args, **kwargs):
+27        """ Returns the next configuration to try. """
+28
+29        pass
+30
+31    @abc.abstractmethod
+32    def check_existing_runs(self, *args, **kwargs):
+33        """ Used to tell searcher to check if there are existing runs in storage.
+34
+35        If there are existing runs, the searcher should skip them 
+36        based on the number of runs we would like for each job.
+37        This may require a 'runs' attribute to be set in the searcher.
+38        It will probably also require access to a Saver object,
+39        so we can use it's saving protocol to check if there are existing runs.
+40        In this case is advised that this function takes a Saver object as an argument,
+41        and that the searcher is initialized with a 'runs' attribute.
+42
+43        """
+44
+45        pass
+46
+47    def __iter__(self):
+48        """ Makes the searcher iterable, so we can use it in a for loop.
+49        
+50        Feel free to override this method if needed.
+51
+52        """
+53
+54        return self
+55    
+56    def __next__(self):
+57        """ Makes the searcher iterable, so we can use it in a for loop.
+58
+59        Feel free to override this method if needed.
+60
+61        """
+62
+63        try:
+64            return self.next_tune()
+65        except:
+66            raise StopIteration
+
+ + +

Base class for all Searchers.

+ +

This must be subclassed to create different Searcher classes. +Please name your searcher class Searcher +Outlines a protocol for creating a search space and creating configurations from it. +Methods document what they should do once implemented.

+
+ + +
+ +
+
@abc.abstractmethod
+ + BaseSearcher(*args, **kwargs) + + + +
+ +
13    @abc.abstractmethod
+14    def __init__(self, *args, **kwargs):
+15        """ Initialises the searcher. """
+16
+17        pass
+
+ + +

Initialises the searcher.

+
+ + +
+
+ +
+
@abc.abstractmethod
+ + def + next_tune(self, *args, **kwargs): + + + +
+ +
25    @abc.abstractmethod
+26    def next_tune(self, *args, **kwargs):
+27        """ Returns the next configuration to try. """
+28
+29        pass
+
+ + +

Returns the next configuration to try.

+
+ + +
+
+ +
+
@abc.abstractmethod
+ + def + check_existing_runs(self, *args, **kwargs): + + + +
+ +
31    @abc.abstractmethod
+32    def check_existing_runs(self, *args, **kwargs):
+33        """ Used to tell searcher to check if there are existing runs in storage.
+34
+35        If there are existing runs, the searcher should skip them 
+36        based on the number of runs we would like for each job.
+37        This may require a 'runs' attribute to be set in the searcher.
+38        It will probably also require access to a Saver object,
+39        so we can use it's saving protocol to check if there are existing runs.
+40        In this case is advised that this function takes a Saver object as an argument,
+41        and that the searcher is initialized with a 'runs' attribute.
+42
+43        """
+44
+45        pass
+
+ + +

Used to tell searcher to check if there are existing runs in storage.

+ +

If there are existing runs, the searcher should skip them +based on the number of runs we would like for each job. +This may require a 'runs' attribute to be set in the searcher. +It will probably also require access to a Saver object, +so we can use it's saving protocol to check if there are existing runs. +In this case is advised that this function takes a Saver object as an argument, +and that the searcher is initialized with a 'runs' attribute.

+
+ + +
+
+
+ +
+ + class + BaseLogger: + + + +
+ +
68class BaseLogger(metaclass=abc.ABCMeta):
+69    """ Base class for all Loggers. 
+70    
+71    This must be subclassed to implement different Logger classes.
+72    Please name your logger class Logger<LoggerName>.
+73    Outlines a protocol for logging metrics and reading from the logs.
+74    Methods document what they should do once implemented. 
+75
+76    """
+77    @abc.abstractmethod
+78    def __init__(self, *args, **kwargs):
+79        """ Initialises the logger. """
+80
+81        pass
+82    
+83    @abc.abstractmethod
+84    def log(self, *args, **kwargs):
+85        """ Logs the metric/s for the current hyperparameter configuration.
+86
+87        Should store metrics in some way so we can later save it using a Saver.
+88
+89        """
+90
+91        pass
+92    
+93    @abc.abstractmethod
+94    def read_log(self, *args, **kwargs):
+95        """ Returns value of a metric from the log based on a selection criteria. """
+96
+97        pass
+
+ + +

Base class for all Loggers.

+ +

This must be subclassed to implement different Logger classes. +Please name your logger class Logger. +Outlines a protocol for logging metrics and reading from the logs. +Methods document what they should do once implemented.

+
+ + +
+ +
+
@abc.abstractmethod
+ + BaseLogger(*args, **kwargs) + + + +
+ +
77    @abc.abstractmethod
+78    def __init__(self, *args, **kwargs):
+79        """ Initialises the logger. """
+80
+81        pass
+
+ + +

Initialises the logger.

+
+ + +
+
+ +
+
@abc.abstractmethod
+ + def + log(self, *args, **kwargs): + + + +
+ +
83    @abc.abstractmethod
+84    def log(self, *args, **kwargs):
+85        """ Logs the metric/s for the current hyperparameter configuration.
+86
+87        Should store metrics in some way so we can later save it using a Saver.
+88
+89        """
+90
+91        pass
+
+ + +

Logs the metric/s for the current hyperparameter configuration.

+ +

Should store metrics in some way so we can later save it using a Saver.

+
+ + +
+
+ +
+
@abc.abstractmethod
+ + def + read_log(self, *args, **kwargs): + + + +
+ +
93    @abc.abstractmethod
+94    def read_log(self, *args, **kwargs):
+95        """ Returns value of a metric from the log based on a selection criteria. """
+96
+97        pass
+
+ + +

Returns value of a metric from the log based on a selection criteria.

+
+ + +
+
+
+ +
+ + class + BaseSaver: + + + +
+ +
 99class BaseSaver(metaclass=abc.ABCMeta):
+100    """ Base class for all savers. 
+101    
+102    This must be subclassed to implement different Saver classes.
+103    Please name your saver class Saver<SaverName>.
+104    Outlines a protocol for saving and reading results to/from storage.
+105    Methods document what they should do once implemented. 
+106
+107    """
+108
+109    @abc.abstractmethod
+110    def __init__(self, logger_instance: BaseLogger, *args, **kwargs):
+111        """ Initialises the saver.
+112
+113        Assigns the logger instance to self.logger and makes its methods accessible through self.log and self.read_log.
+114
+115        Args:
+116            - logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
+117        
+118        """
+119
+120        # Given a class that inherits from BaseLogger we make it accessible through self.logger and make its methods accessible through self.log and self.read_log
+121        self.logger = logger_instance
+122        self.log = self.logger.log
+123        self.read_log = self.logger.read_log
+124
+125    @abc.abstractmethod
+126    def save_collated(self, *args, **kwargs):
+127        """ Saves the current results in logger to storage. """
+128
+129        pass
+130    
+131    @abc.abstractmethod
+132    def read(self, *args, **kwargs):
+133        """ Reads results from storage. """
+134
+135        pass
+136
+137    @abc.abstractmethod
+138    def exists(self, *args, **kwargs):
+139        """ Checks if results already exist in storage.
+140         
+141        Should return integer indicating the number of runs that exist in storage for the given parameters. 
+142
+143        """
+144
+145        pass
+
+ + +

Base class for all savers.

+ +

This must be subclassed to implement different Saver classes. +Please name your saver class Saver. +Outlines a protocol for saving and reading results to/from storage. +Methods document what they should do once implemented.

+
+ + +
+ +
+
@abc.abstractmethod
+ + BaseSaver(logger_instance: BaseLogger, *args, **kwargs) + + + +
+ +
109    @abc.abstractmethod
+110    def __init__(self, logger_instance: BaseLogger, *args, **kwargs):
+111        """ Initialises the saver.
+112
+113        Assigns the logger instance to self.logger and makes its methods accessible through self.log and self.read_log.
+114
+115        Args:
+116            - logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
+117        
+118        """
+119
+120        # Given a class that inherits from BaseLogger we make it accessible through self.logger and make its methods accessible through self.log and self.read_log
+121        self.logger = logger_instance
+122        self.log = self.logger.log
+123        self.read_log = self.logger.read_log
+
+ + +

Initialises the saver.

+ +

Assigns the logger instance to self.logger and makes its methods accessible through self.log and self.read_log.

+ +
Arguments:
+ +
    +
  • - logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
  • +
+
+ + +
+
+
+ logger + + +
+ + + + +
+
+
+ log + + +
+ + + + +
+
+
+ read_log + + +
+ + + + +
+
+ +
+
@abc.abstractmethod
+ + def + save_collated(self, *args, **kwargs): + + + +
+ +
125    @abc.abstractmethod
+126    def save_collated(self, *args, **kwargs):
+127        """ Saves the current results in logger to storage. """
+128
+129        pass
+
+ + +

Saves the current results in logger to storage.

+
+ + +
+
+ +
+
@abc.abstractmethod
+ + def + read(self, *args, **kwargs): + + + +
+ +
131    @abc.abstractmethod
+132    def read(self, *args, **kwargs):
+133        """ Reads results from storage. """
+134
+135        pass
+
+ + +

Reads results from storage.

+
+ + +
+
+ +
+
@abc.abstractmethod
+ + def + exists(self, *args, **kwargs): + + + +
+ +
137    @abc.abstractmethod
+138    def exists(self, *args, **kwargs):
+139        """ Checks if results already exist in storage.
+140         
+141        Should return integer indicating the number of runs that exist in storage for the given parameters. 
+142
+143        """
+144
+145        pass
+
+ + +

Checks if results already exist in storage.

+ +

Should return integer indicating the number of runs that exist in storage for the given parameters.

+
+ + +
+
+
+ + \ No newline at end of file diff --git a/docs/.html/src/slune/loggers.html b/docs/.html/src/slune/loggers.html new file mode 100644 index 0000000..455b6c7 --- /dev/null +++ b/docs/.html/src/slune/loggers.html @@ -0,0 +1,244 @@ + + + + + + + src.slune.loggers API documentation + + + + + + + + + +
+
+

+src.slune.loggers

+ + + + + + +
1from .default import LoggerDefault
+2
+3# __all__ = ['LoggerDefault']
+
+ + +
+
+ + \ No newline at end of file diff --git a/docs/.html/src/slune/loggers/default.html b/docs/.html/src/slune/loggers/default.html new file mode 100644 index 0000000..30be80e --- /dev/null +++ b/docs/.html/src/slune/loggers/default.html @@ -0,0 +1,608 @@ + + + + + + + src.slune.loggers.default API documentation + + + + + + + + + +
+
+

+src.slune.loggers.default

+ + + + + + +
 1import pandas as pd
+ 2from slune.base import BaseLogger
+ 3
+ 4class LoggerDefault(BaseLogger):
+ 5    """ Logs metric/s in a data frame.
+ 6    
+ 7    Stores the metric/s in a data frame that we can later save in storage.
+ 8    Logs by creating data frame out of the metrics and then appending it to the current results data frame.
+ 9
+10    Attributes:
+11        - results (pd.DataFrame): Data frame containing all the metrics logged so far.
+12            Each row stores all the metrics that were given in a call to the 'log' method,
+13            each column title is a metric name.
+14            The first column is always the time stamp at which 'log' is called.
+15
+16    """
+17    
+18    def __init__(self, *args, **kwargs):
+19        """ Initialises the logger. """
+20
+21        super(LoggerDefault, self).__init__(*args, **kwargs)
+22        # Raise warning if any arguments are given
+23        if args or kwargs:
+24            raise Warning(f"Arguments {args} and keyword arguments {kwargs} are ignored")
+25        # Initialise results data frame
+26        self.results = pd.DataFrame()
+27    
+28    def log(self, metrics: dict):
+29        """ Logs the metric/s given.
+30
+31        Stores them in a data frame that we can later save in storage.
+32        All metrics provided will be saved as a row in the results data frame,
+33        the first column is always the time stamp at which log is called.
+34
+35        Args:
+36            - metrics (dict): Metrics to be logged, keys are metric names and values are metric values.
+37                Each metric should only have one value! So please log as soon as you get a metric.
+38
+39        """
+40
+41        # Get current time stamp
+42        time_stamp = pd.Timestamp.now()
+43        # Add time stamp to metrics dictionary
+44        metrics['time_stamp'] = time_stamp
+45        # Convert metrics dictionary to a dataframe
+46        metrics_df = pd.DataFrame(metrics, index=[0])
+47        # Append metrics dataframe to results dataframe
+48        self.results = pd.concat([self.results, metrics_df], ignore_index=True)
+49    
+50    def read_log(self, data_frame: pd.DataFrame, metric_name: str, select_by: str ='max') -> float:
+51        """ Reads log and returns value according to select_by.
+52
+53        Reads the values for given metric for given log and chooses metric value to return based on select_by.
+54
+55        Args:
+56            - data_frame (pd.DataFrame): Data frame containing the metric to be read.
+57            - metric_name (str): Name of the metric to be read.
+58            - select_by (str, optional): How to select the 'best' metric, currently can select by 'min' or 'max'.
+59
+60        Returns:
+61            - value (float): Minimum or maximum value of the metric.
+62
+63        TODO: 
+64            - Add more options for select_by.
+65            - Should be able to return other types than float?
+66
+67        """ 
+68
+69        # Get the metric column
+70        metric_col = data_frame[metric_name]
+71        # Get the index of the minimum or maximum value
+72        if select_by == 'max':
+73            index = metric_col.idxmax()
+74        elif select_by == 'min':
+75            index = metric_col.idxmin()
+76        else:
+77            raise ValueError(f"select_by must be 'min' or 'max', got {select_by}")
+78        # Get the value of the metric
+79        value = metric_col.iloc[index]
+80        return value
+
+ + +
+
+ +
+ + class + LoggerDefault(slune.base.BaseLogger): + + + +
+ +
 5class LoggerDefault(BaseLogger):
+ 6    """ Logs metric/s in a data frame.
+ 7    
+ 8    Stores the metric/s in a data frame that we can later save in storage.
+ 9    Logs by creating data frame out of the metrics and then appending it to the current results data frame.
+10
+11    Attributes:
+12        - results (pd.DataFrame): Data frame containing all the metrics logged so far.
+13            Each row stores all the metrics that were given in a call to the 'log' method,
+14            each column title is a metric name.
+15            The first column is always the time stamp at which 'log' is called.
+16
+17    """
+18    
+19    def __init__(self, *args, **kwargs):
+20        """ Initialises the logger. """
+21
+22        super(LoggerDefault, self).__init__(*args, **kwargs)
+23        # Raise warning if any arguments are given
+24        if args or kwargs:
+25            raise Warning(f"Arguments {args} and keyword arguments {kwargs} are ignored")
+26        # Initialise results data frame
+27        self.results = pd.DataFrame()
+28    
+29    def log(self, metrics: dict):
+30        """ Logs the metric/s given.
+31
+32        Stores them in a data frame that we can later save in storage.
+33        All metrics provided will be saved as a row in the results data frame,
+34        the first column is always the time stamp at which log is called.
+35
+36        Args:
+37            - metrics (dict): Metrics to be logged, keys are metric names and values are metric values.
+38                Each metric should only have one value! So please log as soon as you get a metric.
+39
+40        """
+41
+42        # Get current time stamp
+43        time_stamp = pd.Timestamp.now()
+44        # Add time stamp to metrics dictionary
+45        metrics['time_stamp'] = time_stamp
+46        # Convert metrics dictionary to a dataframe
+47        metrics_df = pd.DataFrame(metrics, index=[0])
+48        # Append metrics dataframe to results dataframe
+49        self.results = pd.concat([self.results, metrics_df], ignore_index=True)
+50    
+51    def read_log(self, data_frame: pd.DataFrame, metric_name: str, select_by: str ='max') -> float:
+52        """ Reads log and returns value according to select_by.
+53
+54        Reads the values for given metric for given log and chooses metric value to return based on select_by.
+55
+56        Args:
+57            - data_frame (pd.DataFrame): Data frame containing the metric to be read.
+58            - metric_name (str): Name of the metric to be read.
+59            - select_by (str, optional): How to select the 'best' metric, currently can select by 'min' or 'max'.
+60
+61        Returns:
+62            - value (float): Minimum or maximum value of the metric.
+63
+64        TODO: 
+65            - Add more options for select_by.
+66            - Should be able to return other types than float?
+67
+68        """ 
+69
+70        # Get the metric column
+71        metric_col = data_frame[metric_name]
+72        # Get the index of the minimum or maximum value
+73        if select_by == 'max':
+74            index = metric_col.idxmax()
+75        elif select_by == 'min':
+76            index = metric_col.idxmin()
+77        else:
+78            raise ValueError(f"select_by must be 'min' or 'max', got {select_by}")
+79        # Get the value of the metric
+80        value = metric_col.iloc[index]
+81        return value
+
+ + +

Logs metric/s in a data frame.

+ +

Stores the metric/s in a data frame that we can later save in storage. +Logs by creating data frame out of the metrics and then appending it to the current results data frame.

+ +
Attributes:
+ +
    +
  • - results (pd.DataFrame): Data frame containing all the metrics logged so far. +Each row stores all the metrics that were given in a call to the 'log' method, +each column title is a metric name. +The first column is always the time stamp at which 'log' is called.
  • +
+
+ + +
+ +
+ + LoggerDefault(*args, **kwargs) + + + +
+ +
19    def __init__(self, *args, **kwargs):
+20        """ Initialises the logger. """
+21
+22        super(LoggerDefault, self).__init__(*args, **kwargs)
+23        # Raise warning if any arguments are given
+24        if args or kwargs:
+25            raise Warning(f"Arguments {args} and keyword arguments {kwargs} are ignored")
+26        # Initialise results data frame
+27        self.results = pd.DataFrame()
+
+ + +

Initialises the logger.

+
+ + +
+
+
+ results + + +
+ + + + +
+
+ +
+ + def + log(self, metrics: dict): + + + +
+ +
29    def log(self, metrics: dict):
+30        """ Logs the metric/s given.
+31
+32        Stores them in a data frame that we can later save in storage.
+33        All metrics provided will be saved as a row in the results data frame,
+34        the first column is always the time stamp at which log is called.
+35
+36        Args:
+37            - metrics (dict): Metrics to be logged, keys are metric names and values are metric values.
+38                Each metric should only have one value! So please log as soon as you get a metric.
+39
+40        """
+41
+42        # Get current time stamp
+43        time_stamp = pd.Timestamp.now()
+44        # Add time stamp to metrics dictionary
+45        metrics['time_stamp'] = time_stamp
+46        # Convert metrics dictionary to a dataframe
+47        metrics_df = pd.DataFrame(metrics, index=[0])
+48        # Append metrics dataframe to results dataframe
+49        self.results = pd.concat([self.results, metrics_df], ignore_index=True)
+
+ + +

Logs the metric/s given.

+ +

Stores them in a data frame that we can later save in storage. +All metrics provided will be saved as a row in the results data frame, +the first column is always the time stamp at which log is called.

+ +
Arguments:
+ +
    +
  • - metrics (dict): Metrics to be logged, keys are metric names and values are metric values. +Each metric should only have one value! So please log as soon as you get a metric.
  • +
+
+ + +
+
+ +
+ + def + read_log( self, data_frame: pandas.core.frame.DataFrame, metric_name: str, select_by: str = 'max') -> float: + + + +
+ +
51    def read_log(self, data_frame: pd.DataFrame, metric_name: str, select_by: str ='max') -> float:
+52        """ Reads log and returns value according to select_by.
+53
+54        Reads the values for given metric for given log and chooses metric value to return based on select_by.
+55
+56        Args:
+57            - data_frame (pd.DataFrame): Data frame containing the metric to be read.
+58            - metric_name (str): Name of the metric to be read.
+59            - select_by (str, optional): How to select the 'best' metric, currently can select by 'min' or 'max'.
+60
+61        Returns:
+62            - value (float): Minimum or maximum value of the metric.
+63
+64        TODO: 
+65            - Add more options for select_by.
+66            - Should be able to return other types than float?
+67
+68        """ 
+69
+70        # Get the metric column
+71        metric_col = data_frame[metric_name]
+72        # Get the index of the minimum or maximum value
+73        if select_by == 'max':
+74            index = metric_col.idxmax()
+75        elif select_by == 'min':
+76            index = metric_col.idxmin()
+77        else:
+78            raise ValueError(f"select_by must be 'min' or 'max', got {select_by}")
+79        # Get the value of the metric
+80        value = metric_col.iloc[index]
+81        return value
+
+ + +

Reads log and returns value according to select_by.

+ +

Reads the values for given metric for given log and chooses metric value to return based on select_by.

+ +
Arguments:
+ +
    +
  • - data_frame (pd.DataFrame): Data frame containing the metric to be read.
  • +
  • - metric_name (str): Name of the metric to be read.
  • +
  • - select_by (str, optional): How to select the 'best' metric, currently can select by 'min' or 'max'.
  • +
+ +
Returns:
+ +
+
    +
  • value (float): Minimum or maximum value of the metric.
  • +
+
+ +

TODO: + - Add more options for select_by. + - Should be able to return other types than float?

+
+ + +
+
+
+ + \ No newline at end of file diff --git a/docs/.html/src/slune/savers.html b/docs/.html/src/slune/savers.html new file mode 100644 index 0000000..6f8b8fc --- /dev/null +++ b/docs/.html/src/slune/savers.html @@ -0,0 +1,244 @@ + + + + + + + src.slune.savers API documentation + + + + + + + + + +
+
+

+src.slune.savers

+ + + + + + +
1from .csv import SaverCsv
+2
+3# __all__ = ['SaverCsv']
+
+ + +
+
+ + \ No newline at end of file diff --git a/docs/.html/src/slune/savers/csv.html b/docs/.html/src/slune/savers/csv.html new file mode 100644 index 0000000..4393de5 --- /dev/null +++ b/docs/.html/src/slune/savers/csv.html @@ -0,0 +1,1465 @@ + + + + + + + src.slune.savers.csv API documentation + + + + + + + + + +
+
+

+src.slune.savers.csv

+ + + + + + +
  1from typing import List,  Optional
+  2import os 
+  3import pandas as pd
+  4from slune.utils import find_directory_path, get_all_paths, get_numeric_equiv
+  5from slune.base import BaseSaver, BaseLogger
+  6import random
+  7import time
+  8
+  9class SaverCsv(BaseSaver):
+ 10    """ Saves the results of each run in a CSV file in hierarchy of directories.
+ 11     
+ 12    Each directory is named after a parameter - value pair in the form "--parameter_name=value".
+ 13    The paths to csv files then define the configuration under which the results were obtained,
+ 14    for example if we only have one parameter "learning_rate" with value 0.01 used to obtain the results,
+ 15    to save those results we would create a directory named "--learning_rate=0.01" and save the results in a csv file in that directory.
+ 16
+ 17    If we have multiple parameters, for example "learning_rate" with value 0.01 and "batch_size" with value 32,
+ 18    we would create a directory named "--learning_rate=0.01" with a subdirectory named "--batch_size=32",
+ 19    and save the results in a csv file in that subdirectory.
+ 20
+ 21    We use this structure to then read the results from the csv files by searching for the directory that matches the parameters we want,
+ 22    and then reading the csv file in that directory.
+ 23
+ 24    The order in which we create the directories is determined by the order in which the parameters are given,
+ 25    so if we are given ["--learning_rate=0.01", "--batch_size=32"] we would create the directories in the following order:
+ 26    "--learning_rate=0.01/--batch_size=32".
+ 27
+ 28    The directory structure generated will also depend on existing directories in the root directory,
+ 29    if there are existing directories in the root directory that match some subset of the parameters given,
+ 30    we will create the directory tree from the deepest matching directory.
+ 31
+ 32    For example if we only have the following path in the root directory:
+ 33    "--learning_rate=0.01/--batch_size=32"
+ 34    and we are given the parameters ["--learning_rate=0.01", "--batch_size=32", "--num_epochs=10"],
+ 35    we will create the path:
+ 36    "--learning_rate=0.01/--batch_size=32/--num_epochs=10".
+ 37    on the other hand if we are given the parameters ["--learning_rate=0.02", "--num_epochs=10", "--batch_size=32"],
+ 38    we will create the path:
+ 39    "--learning_rate=0.02/--batch_size=32/--num_epochs=10".
+ 40
+ 41    Handles parallel runs trying to create the same directories by waiting a random time (under 1 second) before creating the directory.
+ 42    Should work pretty well in practice, however, may occasionally fail depending on the number of jobs launched at the same time. 
+ 43
+ 44    Attributes:
+ 45        - root_dir (str): Path to the root directory where we will store the csv files.
+ 46        - current_path (str): Path to the csv file where we will store the results for the current run.
+ 47
+ 48    """
+ 49
+ 50    def __init__(self, logger_instance: BaseLogger, params: List[str] = None, root_dir: Optional[str] = os.path.join('.', 'tuning_results')):
+ 51        """ Initialises the csv saver. 
+ 52
+ 53        Args:
+ 54            - logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
+ 55            - params (list, optional): List of strings containing the parameters used, in form ["--parameter_name=parameter_value", ...], default is None.
+ 56                If None, we will create a path using the parameters given in the log.
+ 57            - root_dir (str, optional): Path to the root directory where we will store the csv files, default is './tuning_results'.
+ 58        
+ 59        """
+ 60
+ 61        super(SaverCsv, self).__init__(logger_instance)
+ 62        self.root_dir = root_dir
+ 63        if params != None:
+ 64            self.current_path = self.get_path(params)
+ 65    
+ 66    def strip_params(self, params: List[str]) -> List[str]:
+ 67        """ Strips the parameter values.
+ 68
+ 69        Strips the parameter values from the list of parameters given,
+ 70        ie. ["--parameter_name=parameter_value", ...] -> ["--parameter_name=", ...]
+ 71
+ 72        Also gets rid of blank spaces.
+ 73
+ 74        Args:
+ 75            - params (list of str): List of strings containing the parameters used, in form ["--parameter_name=parameter_value", ...].
+ 76
+ 77        Returns:
+ 78            - stripped_params (list of str): List of strings containing the parameters used, in form ["--parameter_name=", ...].
+ 79
+ 80        """
+ 81
+ 82        stripped_params = [p.split('=')[0].strip() for p in params]
+ 83        return stripped_params
+ 84
+ 85    def get_match(self, params: List[str]) -> str:
+ 86        """ Searches the root directory for a path that matches the parameters given.
+ 87
+ 88        If only partial matches are found, returns the deepest matching directory with the missing parameters appended.
+ 89        By deepest we mean the directory with the most parameters matching.
+ 90        If no matches are found creates a path using the parameters.
+ 91        Creates path using parameters in the order they are given, 
+ 92        ie. ["--learning_rate=0.01", "--batch_size=32"] -> "--learning_rate=0.01/--batch_size=32".
+ 93
+ 94        If we find a partial match, we add the missing parameters to the end of the path,
+ 95        ie. if we have the path "--learning_rate=0.01" in the root 
+ 96        and are given the parameters ["--learning_rate=0.01", "--batch_size=32"],
+ 97        we will create the path "--learning_rate=0.01/--batch_size=32".
+ 98
+ 99        Args:
+100            - params (list of str): List of strings containing the arguments used, in form ["--argument_name=argument_value", ...].
+101
+102        Returns:
+103            - match (str): Path to the directory that matches the parameters given.
+104
+105        """
+106
+107        # First check if there is a directory with path matching some subset of the arguments
+108        stripped_params = [p.split('=')[0].strip() +'=' for p in params] # Strip the params of whitespace and everything after the '='
+109        if len(set(stripped_params)) != len(stripped_params):
+110            raise ValueError(f"Duplicate parameters found in {stripped_params}")
+111        match = find_directory_path(stripped_params, root_directory=self.root_dir)
+112        # Add on missing parameters
+113        if match == self.root_dir:
+114            match = os.path.join(*stripped_params)
+115        else:
+116            missing_params = [p for p in stripped_params if p not in match]
+117            if missing_params != []:
+118                match = [match] + missing_params
+119                match = os.path.join(*match)
+120        # Take the root directory out of the match
+121        match = match.replace(self.root_dir, '')
+122        if match.startswith(os.path.sep):
+123            match = match[1:]
+124        # Now we add back in the values we stripped out
+125        match = match.split(os.path.sep)
+126        match = [[p for p in params if m in p][0] for m in match]
+127        # Check if there is an existing path with the same numerical values, if so use that instead
+128        match = get_numeric_equiv(os.path.join(*match), root_directory=self.root_dir)
+129        return match
+130
+131    def get_path(self, params: List[str]) -> str:
+132        """ Creates a path using the parameters.
+133        
+134        Does this by first checking for existing paths in the root directory that match the parameters given.
+135
+136        Check get_match for how we create the path, 
+137        once we have the path we check if there is already a csv file with results in that path,
+138        if there is we increment the number of the results file name that we will use.
+139
+140        For example if we get back the path "--learning_rate=0.01/--batch_size=32",
+141        and there exists a csv file named "results_0.csv" in the final directory,
+142        we will name our csv file "results_1.csv".
+143
+144        Args:
+145            - params (list of str): List of strings containing the arguments used, in form ["--argument_name=argument_value", ...].
+146
+147        Returns:
+148            - csv_file_path (str): Path to the csv file where we will store the results for the current run.
+149
+150        """
+151
+152        # Check if root directory exists, if not create it
+153        if not os.path.exists(self.root_dir):
+154            time.sleep(random.random()) # Wait a random amount of time under 1 second to avoid multiple processes creating the same directory
+155            os.makedirs(self.root_dir)
+156        # Get path of directory where we should store our csv of results
+157        dir_path = self.get_match(params)
+158        # Check if directory exists, if not create it
+159        if not os.path.exists(dir_path):
+160            csv_file_number = 0
+161        # If it does exist, check if there is already a csv file with results,
+162        # if there is find the name of the last csv file and increment the number
+163        else:
+164            csv_files = [f for f in os.listdir(dir_path) if f.endswith('.csv')]
+165            if len(csv_files) > 0:
+166                last_csv_file = max(csv_files)
+167                # Check that the last csv file starts with "results_"
+168                if not last_csv_file.startswith('results_'):
+169                    raise ValueError('Found csv file in directory that doesn\'t start with "results_"')
+170                csv_file_number = int(last_csv_file.split('_')[1][:-4]) + 1
+171            else:
+172                csv_file_number = 0
+173        # Create path name for a new csv file where we can later store results
+174        csv_file_path = os.path.join(dir_path, f'results_{csv_file_number}.csv')
+175        return csv_file_path
+176
+177    def save_collated_from_results(self, results: pd.DataFrame):
+178        """ Saves results to csv file.
+179        
+180        If the csv file already exists, 
+181        we append the collated results from the logger to the end of the csv file.
+182        If the csv file does not exist,
+183        we create it and save the results to it.
+184
+185        Args:
+186            - results (pd.DataFrame): Data frame containing the results to be saved.
+187
+188        TODO: 
+189            - Could be making to many assumptions about the format in which we get the results from the logger,
+190            should be able to work with any logger.
+191            We should only be assuming that we are saving results to a csv file. 
+192
+193        """
+194
+195        # If path does not exist, create it
+196        # Remove the csv file name from the path
+197        dir_path = self.current_path.split(os.path.sep)[:-1]
+198        dir_path = os.path.join(*dir_path)
+199        if not os.path.exists(dir_path):
+200            time.sleep(random.random()) # Wait a random amount of time under 1 second to avoid multiple processes creating the same directory
+201            os.makedirs(dir_path)
+202        # If csv file already exists, append results to the end
+203        if os.path.exists(self.current_path):
+204            results = pd.concat([pd.read_csv(self.current_path), results])
+205            results.to_csv(self.current_path, mode='w', index=False)
+206        # If csv file does not exist, create it
+207        else:
+208            results.to_csv(self.current_path, index=False)
+209
+210    def save_collated(self):
+211        """ Saves results to csv file. """
+212
+213        self.save_collated_from_results(self.logger.results)
+214        
+215    def read(self, params: List[str], metric_name: str, select_by: str ='max', avg: bool =True) -> (List[str], float):
+216        """ Finds the min/max value of a metric from all csv files in the root directory that match the parameters given.
+217
+218        Args:
+219            - params (list of str): Contains the parameters used, in form ["--parameter_name=parameter_value", ...].
+220            - metric_name (string): Name of the metric to be read.
+221            - select_by (string, optional): How to select the 'best' value for the metric from a log file, currently can select by 'min' or 'max'.
+222            - avg (bool, optional): Whether to average the metric over all runs, default is True.
+223
+224        Returns:
+225            - best_params (list of str): Contains the arguments used to get the 'best' value of the metric (determined by select_by).
+226            - best_value (float): Best value of the metric (determined by select_by).
+227
+228        """
+229
+230        #  Get all paths that match the parameters given
+231        paths = get_all_paths(params, root_directory=self.root_dir)
+232        if paths == []:
+233            raise ValueError(f"No paths found matching {params}")
+234        # Read the metric from each path
+235        values = {}
+236        # Do averaging for different runs of same params if avg is True, otherwise just read the metric from each path
+237        if avg:
+238            paths_same_params = set([os.path.join(*p.split(os.path.sep)[:-1]) for p in paths])
+239            for path in paths_same_params:
+240                runs = get_all_paths(path.split(os.path.sep), root_directory=self.root_dir)
+241                cumsum = 0
+242                for r in runs:
+243                    df = pd.read_csv(r)
+244                    cumsum += self.read_log(df, metric_name, select_by)
+245                avg_of_runs = cumsum / len(runs)
+246                values[path] = avg_of_runs
+247        else:
+248            for path in paths:
+249                df = pd.read_csv(path)
+250                values[os.path.join(*path.split(os.path.sep)[:-1])] = self.read_log(df, metric_name, select_by)
+251        # Get the key of the min/max value
+252        if select_by == 'min':
+253            best_params = min(values, key=values.get)
+254        elif select_by == 'max':
+255            best_params = max(values, key=values.get)
+256        else:
+257            raise ValueError(f"select_by must be 'min' or 'max', got {select_by}")
+258        # Find the best value of the metric from the key
+259        best_value = values[best_params]
+260        # Format the path into a list of arguments
+261        best_params = best_params.replace(self.root_dir, '')
+262        if best_params.startswith(os.path.sep):
+263            best_params = best_params[1:]
+264        best_params = best_params.split(os.path.sep)
+265        return best_params, best_value       
+266
+267    def exists(self, params: List[str]) -> int:
+268        """ Checks if results already exist in storage.
+269
+270        Args:
+271            - params (list of str): Contains the parameters used, in form ["--parameter_name=parameter_value", ...].
+272
+273        Returns:
+274            - num_runs (int): Number of runs that exist in storage for the given parameters.
+275
+276        """
+277
+278        #  Get all paths that match the parameters given
+279        paths = get_all_paths(params, root_directory=self.root_dir)
+280        return len(paths)
+281
+282    def get_current_path(self) -> str:
+283        """ Getter function for the current_path attribute. 
+284        
+285        Returns:
+286            - current_path (str): Path to the csv file where we will store the results for the current run.
+287        
+288        """
+289
+290        return self.current_path
+
+ + +
+
+ +
+ + class + SaverCsv(slune.base.BaseSaver): + + + +
+ +
 10class SaverCsv(BaseSaver):
+ 11    """ Saves the results of each run in a CSV file in hierarchy of directories.
+ 12     
+ 13    Each directory is named after a parameter - value pair in the form "--parameter_name=value".
+ 14    The paths to csv files then define the configuration under which the results were obtained,
+ 15    for example if we only have one parameter "learning_rate" with value 0.01 used to obtain the results,
+ 16    to save those results we would create a directory named "--learning_rate=0.01" and save the results in a csv file in that directory.
+ 17
+ 18    If we have multiple parameters, for example "learning_rate" with value 0.01 and "batch_size" with value 32,
+ 19    we would create a directory named "--learning_rate=0.01" with a subdirectory named "--batch_size=32",
+ 20    and save the results in a csv file in that subdirectory.
+ 21
+ 22    We use this structure to then read the results from the csv files by searching for the directory that matches the parameters we want,
+ 23    and then reading the csv file in that directory.
+ 24
+ 25    The order in which we create the directories is determined by the order in which the parameters are given,
+ 26    so if we are given ["--learning_rate=0.01", "--batch_size=32"] we would create the directories in the following order:
+ 27    "--learning_rate=0.01/--batch_size=32".
+ 28
+ 29    The directory structure generated will also depend on existing directories in the root directory,
+ 30    if there are existing directories in the root directory that match some subset of the parameters given,
+ 31    we will create the directory tree from the deepest matching directory.
+ 32
+ 33    For example if we only have the following path in the root directory:
+ 34    "--learning_rate=0.01/--batch_size=32"
+ 35    and we are given the parameters ["--learning_rate=0.01", "--batch_size=32", "--num_epochs=10"],
+ 36    we will create the path:
+ 37    "--learning_rate=0.01/--batch_size=32/--num_epochs=10".
+ 38    on the other hand if we are given the parameters ["--learning_rate=0.02", "--num_epochs=10", "--batch_size=32"],
+ 39    we will create the path:
+ 40    "--learning_rate=0.02/--batch_size=32/--num_epochs=10".
+ 41
+ 42    Handles parallel runs trying to create the same directories by waiting a random time (under 1 second) before creating the directory.
+ 43    Should work pretty well in practice, however, may occasionally fail depending on the number of jobs launched at the same time. 
+ 44
+ 45    Attributes:
+ 46        - root_dir (str): Path to the root directory where we will store the csv files.
+ 47        - current_path (str): Path to the csv file where we will store the results for the current run.
+ 48
+ 49    """
+ 50
+ 51    def __init__(self, logger_instance: BaseLogger, params: List[str] = None, root_dir: Optional[str] = os.path.join('.', 'tuning_results')):
+ 52        """ Initialises the csv saver. 
+ 53
+ 54        Args:
+ 55            - logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
+ 56            - params (list, optional): List of strings containing the parameters used, in form ["--parameter_name=parameter_value", ...], default is None.
+ 57                If None, we will create a path using the parameters given in the log.
+ 58            - root_dir (str, optional): Path to the root directory where we will store the csv files, default is './tuning_results'.
+ 59        
+ 60        """
+ 61
+ 62        super(SaverCsv, self).__init__(logger_instance)
+ 63        self.root_dir = root_dir
+ 64        if params != None:
+ 65            self.current_path = self.get_path(params)
+ 66    
+ 67    def strip_params(self, params: List[str]) -> List[str]:
+ 68        """ Strips the parameter values.
+ 69
+ 70        Strips the parameter values from the list of parameters given,
+ 71        ie. ["--parameter_name=parameter_value", ...] -> ["--parameter_name=", ...]
+ 72
+ 73        Also gets rid of blank spaces.
+ 74
+ 75        Args:
+ 76            - params (list of str): List of strings containing the parameters used, in form ["--parameter_name=parameter_value", ...].
+ 77
+ 78        Returns:
+ 79            - stripped_params (list of str): List of strings containing the parameters used, in form ["--parameter_name=", ...].
+ 80
+ 81        """
+ 82
+ 83        stripped_params = [p.split('=')[0].strip() for p in params]
+ 84        return stripped_params
+ 85
+ 86    def get_match(self, params: List[str]) -> str:
+ 87        """ Searches the root directory for a path that matches the parameters given.
+ 88
+ 89        If only partial matches are found, returns the deepest matching directory with the missing parameters appended.
+ 90        By deepest we mean the directory with the most parameters matching.
+ 91        If no matches are found creates a path using the parameters.
+ 92        Creates path using parameters in the order they are given, 
+ 93        ie. ["--learning_rate=0.01", "--batch_size=32"] -> "--learning_rate=0.01/--batch_size=32".
+ 94
+ 95        If we find a partial match, we add the missing parameters to the end of the path,
+ 96        ie. if we have the path "--learning_rate=0.01" in the root 
+ 97        and are given the parameters ["--learning_rate=0.01", "--batch_size=32"],
+ 98        we will create the path "--learning_rate=0.01/--batch_size=32".
+ 99
+100        Args:
+101            - params (list of str): List of strings containing the arguments used, in form ["--argument_name=argument_value", ...].
+102
+103        Returns:
+104            - match (str): Path to the directory that matches the parameters given.
+105
+106        """
+107
+108        # First check if there is a directory with path matching some subset of the arguments
+109        stripped_params = [p.split('=')[0].strip() +'=' for p in params] # Strip the params of whitespace and everything after the '='
+110        if len(set(stripped_params)) != len(stripped_params):
+111            raise ValueError(f"Duplicate parameters found in {stripped_params}")
+112        match = find_directory_path(stripped_params, root_directory=self.root_dir)
+113        # Add on missing parameters
+114        if match == self.root_dir:
+115            match = os.path.join(*stripped_params)
+116        else:
+117            missing_params = [p for p in stripped_params if p not in match]
+118            if missing_params != []:
+119                match = [match] + missing_params
+120                match = os.path.join(*match)
+121        # Take the root directory out of the match
+122        match = match.replace(self.root_dir, '')
+123        if match.startswith(os.path.sep):
+124            match = match[1:]
+125        # Now we add back in the values we stripped out
+126        match = match.split(os.path.sep)
+127        match = [[p for p in params if m in p][0] for m in match]
+128        # Check if there is an existing path with the same numerical values, if so use that instead
+129        match = get_numeric_equiv(os.path.join(*match), root_directory=self.root_dir)
+130        return match
+131
+132    def get_path(self, params: List[str]) -> str:
+133        """ Creates a path using the parameters.
+134        
+135        Does this by first checking for existing paths in the root directory that match the parameters given.
+136
+137        Check get_match for how we create the path, 
+138        once we have the path we check if there is already a csv file with results in that path,
+139        if there is we increment the number of the results file name that we will use.
+140
+141        For example if we get back the path "--learning_rate=0.01/--batch_size=32",
+142        and there exists a csv file named "results_0.csv" in the final directory,
+143        we will name our csv file "results_1.csv".
+144
+145        Args:
+146            - params (list of str): List of strings containing the arguments used, in form ["--argument_name=argument_value", ...].
+147
+148        Returns:
+149            - csv_file_path (str): Path to the csv file where we will store the results for the current run.
+150
+151        """
+152
+153        # Check if root directory exists, if not create it
+154        if not os.path.exists(self.root_dir):
+155            time.sleep(random.random()) # Wait a random amount of time under 1 second to avoid multiple processes creating the same directory
+156            os.makedirs(self.root_dir)
+157        # Get path of directory where we should store our csv of results
+158        dir_path = self.get_match(params)
+159        # Check if directory exists, if not create it
+160        if not os.path.exists(dir_path):
+161            csv_file_number = 0
+162        # If it does exist, check if there is already a csv file with results,
+163        # if there is find the name of the last csv file and increment the number
+164        else:
+165            csv_files = [f for f in os.listdir(dir_path) if f.endswith('.csv')]
+166            if len(csv_files) > 0:
+167                last_csv_file = max(csv_files)
+168                # Check that the last csv file starts with "results_"
+169                if not last_csv_file.startswith('results_'):
+170                    raise ValueError('Found csv file in directory that doesn\'t start with "results_"')
+171                csv_file_number = int(last_csv_file.split('_')[1][:-4]) + 1
+172            else:
+173                csv_file_number = 0
+174        # Create path name for a new csv file where we can later store results
+175        csv_file_path = os.path.join(dir_path, f'results_{csv_file_number}.csv')
+176        return csv_file_path
+177
+178    def save_collated_from_results(self, results: pd.DataFrame):
+179        """ Saves results to csv file.
+180        
+181        If the csv file already exists, 
+182        we append the collated results from the logger to the end of the csv file.
+183        If the csv file does not exist,
+184        we create it and save the results to it.
+185
+186        Args:
+187            - results (pd.DataFrame): Data frame containing the results to be saved.
+188
+189        TODO: 
+190            - Could be making to many assumptions about the format in which we get the results from the logger,
+191            should be able to work with any logger.
+192            We should only be assuming that we are saving results to a csv file. 
+193
+194        """
+195
+196        # If path does not exist, create it
+197        # Remove the csv file name from the path
+198        dir_path = self.current_path.split(os.path.sep)[:-1]
+199        dir_path = os.path.join(*dir_path)
+200        if not os.path.exists(dir_path):
+201            time.sleep(random.random()) # Wait a random amount of time under 1 second to avoid multiple processes creating the same directory
+202            os.makedirs(dir_path)
+203        # If csv file already exists, append results to the end
+204        if os.path.exists(self.current_path):
+205            results = pd.concat([pd.read_csv(self.current_path), results])
+206            results.to_csv(self.current_path, mode='w', index=False)
+207        # If csv file does not exist, create it
+208        else:
+209            results.to_csv(self.current_path, index=False)
+210
+211    def save_collated(self):
+212        """ Saves results to csv file. """
+213
+214        self.save_collated_from_results(self.logger.results)
+215        
+216    def read(self, params: List[str], metric_name: str, select_by: str ='max', avg: bool =True) -> (List[str], float):
+217        """ Finds the min/max value of a metric from all csv files in the root directory that match the parameters given.
+218
+219        Args:
+220            - params (list of str): Contains the parameters used, in form ["--parameter_name=parameter_value", ...].
+221            - metric_name (string): Name of the metric to be read.
+222            - select_by (string, optional): How to select the 'best' value for the metric from a log file, currently can select by 'min' or 'max'.
+223            - avg (bool, optional): Whether to average the metric over all runs, default is True.
+224
+225        Returns:
+226            - best_params (list of str): Contains the arguments used to get the 'best' value of the metric (determined by select_by).
+227            - best_value (float): Best value of the metric (determined by select_by).
+228
+229        """
+230
+231        #  Get all paths that match the parameters given
+232        paths = get_all_paths(params, root_directory=self.root_dir)
+233        if paths == []:
+234            raise ValueError(f"No paths found matching {params}")
+235        # Read the metric from each path
+236        values = {}
+237        # Do averaging for different runs of same params if avg is True, otherwise just read the metric from each path
+238        if avg:
+239            paths_same_params = set([os.path.join(*p.split(os.path.sep)[:-1]) for p in paths])
+240            for path in paths_same_params:
+241                runs = get_all_paths(path.split(os.path.sep), root_directory=self.root_dir)
+242                cumsum = 0
+243                for r in runs:
+244                    df = pd.read_csv(r)
+245                    cumsum += self.read_log(df, metric_name, select_by)
+246                avg_of_runs = cumsum / len(runs)
+247                values[path] = avg_of_runs
+248        else:
+249            for path in paths:
+250                df = pd.read_csv(path)
+251                values[os.path.join(*path.split(os.path.sep)[:-1])] = self.read_log(df, metric_name, select_by)
+252        # Get the key of the min/max value
+253        if select_by == 'min':
+254            best_params = min(values, key=values.get)
+255        elif select_by == 'max':
+256            best_params = max(values, key=values.get)
+257        else:
+258            raise ValueError(f"select_by must be 'min' or 'max', got {select_by}")
+259        # Find the best value of the metric from the key
+260        best_value = values[best_params]
+261        # Format the path into a list of arguments
+262        best_params = best_params.replace(self.root_dir, '')
+263        if best_params.startswith(os.path.sep):
+264            best_params = best_params[1:]
+265        best_params = best_params.split(os.path.sep)
+266        return best_params, best_value       
+267
+268    def exists(self, params: List[str]) -> int:
+269        """ Checks if results already exist in storage.
+270
+271        Args:
+272            - params (list of str): Contains the parameters used, in form ["--parameter_name=parameter_value", ...].
+273
+274        Returns:
+275            - num_runs (int): Number of runs that exist in storage for the given parameters.
+276
+277        """
+278
+279        #  Get all paths that match the parameters given
+280        paths = get_all_paths(params, root_directory=self.root_dir)
+281        return len(paths)
+282
+283    def get_current_path(self) -> str:
+284        """ Getter function for the current_path attribute. 
+285        
+286        Returns:
+287            - current_path (str): Path to the csv file where we will store the results for the current run.
+288        
+289        """
+290
+291        return self.current_path
+
+ + +

Saves the results of each run in a CSV file in hierarchy of directories.

+ +

Each directory is named after a parameter - value pair in the form "--parameter_name=value". +The paths to csv files then define the configuration under which the results were obtained, +for example if we only have one parameter "learning_rate" with value 0.01 used to obtain the results, +to save those results we would create a directory named "--learning_rate=0.01" and save the results in a csv file in that directory.

+ +

If we have multiple parameters, for example "learning_rate" with value 0.01 and "batch_size" with value 32, +we would create a directory named "--learning_rate=0.01" with a subdirectory named "--batch_size=32", +and save the results in a csv file in that subdirectory.

+ +

We use this structure to then read the results from the csv files by searching for the directory that matches the parameters we want, +and then reading the csv file in that directory.

+ +

The order in which we create the directories is determined by the order in which the parameters are given, +so if we are given ["--learning_rate=0.01", "--batch_size=32"] we would create the directories in the following order: +"--learning_rate=0.01/--batch_size=32".

+ +

The directory structure generated will also depend on existing directories in the root directory, +if there are existing directories in the root directory that match some subset of the parameters given, +we will create the directory tree from the deepest matching directory.

+ +

For example if we only have the following path in the root directory: +"--learning_rate=0.01/--batch_size=32" +and we are given the parameters ["--learning_rate=0.01", "--batch_size=32", "--num_epochs=10"], +we will create the path: +"--learning_rate=0.01/--batch_size=32/--num_epochs=10". +on the other hand if we are given the parameters ["--learning_rate=0.02", "--num_epochs=10", "--batch_size=32"], +we will create the path: +"--learning_rate=0.02/--batch_size=32/--num_epochs=10".

+ +

Handles parallel runs trying to create the same directories by waiting a random time (under 1 second) before creating the directory. +Should work pretty well in practice, however, may occasionally fail depending on the number of jobs launched at the same time.

+ +
Attributes:
+ +
    +
  • - root_dir (str): Path to the root directory where we will store the csv files.
  • +
  • - current_path (str): Path to the csv file where we will store the results for the current run.
  • +
+
+ + +
+ +
+ + SaverCsv( logger_instance: slune.base.BaseLogger, params: List[str] = None, root_dir: Optional[str] = './tuning_results') + + + +
+ +
51    def __init__(self, logger_instance: BaseLogger, params: List[str] = None, root_dir: Optional[str] = os.path.join('.', 'tuning_results')):
+52        """ Initialises the csv saver. 
+53
+54        Args:
+55            - logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
+56            - params (list, optional): List of strings containing the parameters used, in form ["--parameter_name=parameter_value", ...], default is None.
+57                If None, we will create a path using the parameters given in the log.
+58            - root_dir (str, optional): Path to the root directory where we will store the csv files, default is './tuning_results'.
+59        
+60        """
+61
+62        super(SaverCsv, self).__init__(logger_instance)
+63        self.root_dir = root_dir
+64        if params != None:
+65            self.current_path = self.get_path(params)
+
+ + +

Initialises the csv saver.

+ +
Arguments:
+ +
    +
  • - logger_instance (BaseLogger): Instance of a logger class that inherits from BaseLogger.
  • +
  • - params (list, optional): List of strings containing the parameters used, in form ["--parameter_name=parameter_value", ...], default is None. +If None, we will create a path using the parameters given in the log.
  • +
  • - root_dir (str, optional): Path to the root directory where we will store the csv files, default is './tuning_results'.
  • +
+
+ + +
+
+
+ root_dir + + +
+ + + + +
+
+ +
+ + def + strip_params(self, params: List[str]) -> List[str]: + + + +
+ +
67    def strip_params(self, params: List[str]) -> List[str]:
+68        """ Strips the parameter values.
+69
+70        Strips the parameter values from the list of parameters given,
+71        ie. ["--parameter_name=parameter_value", ...] -> ["--parameter_name=", ...]
+72
+73        Also gets rid of blank spaces.
+74
+75        Args:
+76            - params (list of str): List of strings containing the parameters used, in form ["--parameter_name=parameter_value", ...].
+77
+78        Returns:
+79            - stripped_params (list of str): List of strings containing the parameters used, in form ["--parameter_name=", ...].
+80
+81        """
+82
+83        stripped_params = [p.split('=')[0].strip() for p in params]
+84        return stripped_params
+
+ + +

Strips the parameter values.

+ +

Strips the parameter values from the list of parameters given, +ie. ["--parameter_name=parameter_value", ...] -> ["--parameter_name=", ...]

+ +

Also gets rid of blank spaces.

+ +
Arguments:
+ +
    +
  • - params (list of str): List of strings containing the parameters used, in form ["--parameter_name=parameter_value", ...].
  • +
+ +
Returns:
+ +
+
    +
  • stripped_params (list of str): List of strings containing the parameters used, in form ["--parameter_name=", ...].
  • +
+
+
+ + +
+
+ +
+ + def + get_match(self, params: List[str]) -> str: + + + +
+ +
 86    def get_match(self, params: List[str]) -> str:
+ 87        """ Searches the root directory for a path that matches the parameters given.
+ 88
+ 89        If only partial matches are found, returns the deepest matching directory with the missing parameters appended.
+ 90        By deepest we mean the directory with the most parameters matching.
+ 91        If no matches are found creates a path using the parameters.
+ 92        Creates path using parameters in the order they are given, 
+ 93        ie. ["--learning_rate=0.01", "--batch_size=32"] -> "--learning_rate=0.01/--batch_size=32".
+ 94
+ 95        If we find a partial match, we add the missing parameters to the end of the path,
+ 96        ie. if we have the path "--learning_rate=0.01" in the root 
+ 97        and are given the parameters ["--learning_rate=0.01", "--batch_size=32"],
+ 98        we will create the path "--learning_rate=0.01/--batch_size=32".
+ 99
+100        Args:
+101            - params (list of str): List of strings containing the arguments used, in form ["--argument_name=argument_value", ...].
+102
+103        Returns:
+104            - match (str): Path to the directory that matches the parameters given.
+105
+106        """
+107
+108        # First check if there is a directory with path matching some subset of the arguments
+109        stripped_params = [p.split('=')[0].strip() +'=' for p in params] # Strip the params of whitespace and everything after the '='
+110        if len(set(stripped_params)) != len(stripped_params):
+111            raise ValueError(f"Duplicate parameters found in {stripped_params}")
+112        match = find_directory_path(stripped_params, root_directory=self.root_dir)
+113        # Add on missing parameters
+114        if match == self.root_dir:
+115            match = os.path.join(*stripped_params)
+116        else:
+117            missing_params = [p for p in stripped_params if p not in match]
+118            if missing_params != []:
+119                match = [match] + missing_params
+120                match = os.path.join(*match)
+121        # Take the root directory out of the match
+122        match = match.replace(self.root_dir, '')
+123        if match.startswith(os.path.sep):
+124            match = match[1:]
+125        # Now we add back in the values we stripped out
+126        match = match.split(os.path.sep)
+127        match = [[p for p in params if m in p][0] for m in match]
+128        # Check if there is an existing path with the same numerical values, if so use that instead
+129        match = get_numeric_equiv(os.path.join(*match), root_directory=self.root_dir)
+130        return match
+
+ + +

Searches the root directory for a path that matches the parameters given.

+ +

If only partial matches are found, returns the deepest matching directory with the missing parameters appended. +By deepest we mean the directory with the most parameters matching. +If no matches are found creates a path using the parameters. +Creates path using parameters in the order they are given, +ie. ["--learning_rate=0.01", "--batch_size=32"] -> "--learning_rate=0.01/--batch_size=32".

+ +

If we find a partial match, we add the missing parameters to the end of the path, +ie. if we have the path "--learning_rate=0.01" in the root +and are given the parameters ["--learning_rate=0.01", "--batch_size=32"], +we will create the path "--learning_rate=0.01/--batch_size=32".

+ +
Arguments:
+ +
    +
  • - params (list of str): List of strings containing the arguments used, in form ["--argument_name=argument_value", ...].
  • +
+ +
Returns:
+ +
+
    +
  • match (str): Path to the directory that matches the parameters given.
  • +
+
+
+ + +
+
+ +
+ + def + get_path(self, params: List[str]) -> str: + + + +
+ +
132    def get_path(self, params: List[str]) -> str:
+133        """ Creates a path using the parameters.
+134        
+135        Does this by first checking for existing paths in the root directory that match the parameters given.
+136
+137        Check get_match for how we create the path, 
+138        once we have the path we check if there is already a csv file with results in that path,
+139        if there is we increment the number of the results file name that we will use.
+140
+141        For example if we get back the path "--learning_rate=0.01/--batch_size=32",
+142        and there exists a csv file named "results_0.csv" in the final directory,
+143        we will name our csv file "results_1.csv".
+144
+145        Args:
+146            - params (list of str): List of strings containing the arguments used, in form ["--argument_name=argument_value", ...].
+147
+148        Returns:
+149            - csv_file_path (str): Path to the csv file where we will store the results for the current run.
+150
+151        """
+152
+153        # Check if root directory exists, if not create it
+154        if not os.path.exists(self.root_dir):
+155            time.sleep(random.random()) # Wait a random amount of time under 1 second to avoid multiple processes creating the same directory
+156            os.makedirs(self.root_dir)
+157        # Get path of directory where we should store our csv of results
+158        dir_path = self.get_match(params)
+159        # Check if directory exists, if not create it
+160        if not os.path.exists(dir_path):
+161            csv_file_number = 0
+162        # If it does exist, check if there is already a csv file with results,
+163        # if there is find the name of the last csv file and increment the number
+164        else:
+165            csv_files = [f for f in os.listdir(dir_path) if f.endswith('.csv')]
+166            if len(csv_files) > 0:
+167                last_csv_file = max(csv_files)
+168                # Check that the last csv file starts with "results_"
+169                if not last_csv_file.startswith('results_'):
+170                    raise ValueError('Found csv file in directory that doesn\'t start with "results_"')
+171                csv_file_number = int(last_csv_file.split('_')[1][:-4]) + 1
+172            else:
+173                csv_file_number = 0
+174        # Create path name for a new csv file where we can later store results
+175        csv_file_path = os.path.join(dir_path, f'results_{csv_file_number}.csv')
+176        return csv_file_path
+
+ + +

Creates a path using the parameters.

+ +

Does this by first checking for existing paths in the root directory that match the parameters given.

+ +

Check get_match for how we create the path, +once we have the path we check if there is already a csv file with results in that path, +if there is we increment the number of the results file name that we will use.

+ +

For example if we get back the path "--learning_rate=0.01/--batch_size=32", +and there exists a csv file named "results_0.csv" in the final directory, +we will name our csv file "results_1.csv".

+ +
Arguments:
+ +
    +
  • - params (list of str): List of strings containing the arguments used, in form ["--argument_name=argument_value", ...].
  • +
+ +
Returns:
+ +
+
    +
  • csv_file_path (str): Path to the csv file where we will store the results for the current run.
  • +
+
+
+ + +
+
+ +
+ + def + save_collated_from_results(self, results: pandas.core.frame.DataFrame): + + + +
+ +
178    def save_collated_from_results(self, results: pd.DataFrame):
+179        """ Saves results to csv file.
+180        
+181        If the csv file already exists, 
+182        we append the collated results from the logger to the end of the csv file.
+183        If the csv file does not exist,
+184        we create it and save the results to it.
+185
+186        Args:
+187            - results (pd.DataFrame): Data frame containing the results to be saved.
+188
+189        TODO: 
+190            - Could be making to many assumptions about the format in which we get the results from the logger,
+191            should be able to work with any logger.
+192            We should only be assuming that we are saving results to a csv file. 
+193
+194        """
+195
+196        # If path does not exist, create it
+197        # Remove the csv file name from the path
+198        dir_path = self.current_path.split(os.path.sep)[:-1]
+199        dir_path = os.path.join(*dir_path)
+200        if not os.path.exists(dir_path):
+201            time.sleep(random.random()) # Wait a random amount of time under 1 second to avoid multiple processes creating the same directory
+202            os.makedirs(dir_path)
+203        # If csv file already exists, append results to the end
+204        if os.path.exists(self.current_path):
+205            results = pd.concat([pd.read_csv(self.current_path), results])
+206            results.to_csv(self.current_path, mode='w', index=False)
+207        # If csv file does not exist, create it
+208        else:
+209            results.to_csv(self.current_path, index=False)
+
+ + +

Saves results to csv file.

+ +

If the csv file already exists, +we append the collated results from the logger to the end of the csv file. +If the csv file does not exist, +we create it and save the results to it.

+ +
Arguments:
+ +
    +
  • - results (pd.DataFrame): Data frame containing the results to be saved.
  • +
+ +

TODO: + - Could be making to many assumptions about the format in which we get the results from the logger, + should be able to work with any logger. + We should only be assuming that we are saving results to a csv file.

+
+ + +
+
+ +
+ + def + save_collated(self): + + + +
+ +
211    def save_collated(self):
+212        """ Saves results to csv file. """
+213
+214        self.save_collated_from_results(self.logger.results)
+
+ + +

Saves results to csv file.

+
+ + +
+
+ +
+ + def + read( self, params: List[str], metric_name: str, select_by: str = 'max', avg: bool = True) -> (typing.List[str], <class 'float'>): + + + +
+ +
216    def read(self, params: List[str], metric_name: str, select_by: str ='max', avg: bool =True) -> (List[str], float):
+217        """ Finds the min/max value of a metric from all csv files in the root directory that match the parameters given.
+218
+219        Args:
+220            - params (list of str): Contains the parameters used, in form ["--parameter_name=parameter_value", ...].
+221            - metric_name (string): Name of the metric to be read.
+222            - select_by (string, optional): How to select the 'best' value for the metric from a log file, currently can select by 'min' or 'max'.
+223            - avg (bool, optional): Whether to average the metric over all runs, default is True.
+224
+225        Returns:
+226            - best_params (list of str): Contains the arguments used to get the 'best' value of the metric (determined by select_by).
+227            - best_value (float): Best value of the metric (determined by select_by).
+228
+229        """
+230
+231        #  Get all paths that match the parameters given
+232        paths = get_all_paths(params, root_directory=self.root_dir)
+233        if paths == []:
+234            raise ValueError(f"No paths found matching {params}")
+235        # Read the metric from each path
+236        values = {}
+237        # Do averaging for different runs of same params if avg is True, otherwise just read the metric from each path
+238        if avg:
+239            paths_same_params = set([os.path.join(*p.split(os.path.sep)[:-1]) for p in paths])
+240            for path in paths_same_params:
+241                runs = get_all_paths(path.split(os.path.sep), root_directory=self.root_dir)
+242                cumsum = 0
+243                for r in runs:
+244                    df = pd.read_csv(r)
+245                    cumsum += self.read_log(df, metric_name, select_by)
+246                avg_of_runs = cumsum / len(runs)
+247                values[path] = avg_of_runs
+248        else:
+249            for path in paths:
+250                df = pd.read_csv(path)
+251                values[os.path.join(*path.split(os.path.sep)[:-1])] = self.read_log(df, metric_name, select_by)
+252        # Get the key of the min/max value
+253        if select_by == 'min':
+254            best_params = min(values, key=values.get)
+255        elif select_by == 'max':
+256            best_params = max(values, key=values.get)
+257        else:
+258            raise ValueError(f"select_by must be 'min' or 'max', got {select_by}")
+259        # Find the best value of the metric from the key
+260        best_value = values[best_params]
+261        # Format the path into a list of arguments
+262        best_params = best_params.replace(self.root_dir, '')
+263        if best_params.startswith(os.path.sep):
+264            best_params = best_params[1:]
+265        best_params = best_params.split(os.path.sep)
+266        return best_params, best_value       
+
+ + +

Finds the min/max value of a metric from all csv files in the root directory that match the parameters given.

+ +
Arguments:
+ +
    +
  • - params (list of str): Contains the parameters used, in form ["--parameter_name=parameter_value", ...].
  • +
  • - metric_name (string): Name of the metric to be read.
  • +
  • - select_by (string, optional): How to select the 'best' value for the metric from a log file, currently can select by 'min' or 'max'.
  • +
  • - avg (bool, optional): Whether to average the metric over all runs, default is True.
  • +
+ +
Returns:
+ +
+
    +
  • best_params (list of str): Contains the arguments used to get the 'best' value of the metric (determined by select_by).
  • +
  • best_value (float): Best value of the metric (determined by select_by).
  • +
+
+
+ + +
+
+ +
+ + def + exists(self, params: List[str]) -> int: + + + +
+ +
268    def exists(self, params: List[str]) -> int:
+269        """ Checks if results already exist in storage.
+270
+271        Args:
+272            - params (list of str): Contains the parameters used, in form ["--parameter_name=parameter_value", ...].
+273
+274        Returns:
+275            - num_runs (int): Number of runs that exist in storage for the given parameters.
+276
+277        """
+278
+279        #  Get all paths that match the parameters given
+280        paths = get_all_paths(params, root_directory=self.root_dir)
+281        return len(paths)
+
+ + +

Checks if results already exist in storage.

+ +
Arguments:
+ +
    +
  • - params (list of str): Contains the parameters used, in form ["--parameter_name=parameter_value", ...].
  • +
+ +
Returns:
+ +
+
    +
  • num_runs (int): Number of runs that exist in storage for the given parameters.
  • +
+
+
+ + +
+
+ +
+ + def + get_current_path(self) -> str: + + + +
+ +
283    def get_current_path(self) -> str:
+284        """ Getter function for the current_path attribute. 
+285        
+286        Returns:
+287            - current_path (str): Path to the csv file where we will store the results for the current run.
+288        
+289        """
+290
+291        return self.current_path
+
+ + +

Getter function for the current_path attribute.

+ +
Returns:
+ +
+
    +
  • current_path (str): Path to the csv file where we will store the results for the current run.
  • +
+
+
+ + +
+
+
Inherited Members
+
+
slune.base.BaseSaver
+
logger
+
log
+
read_log
+ +
+
+
+
+
+ + \ No newline at end of file diff --git a/docs/.html/src/slune/searchers.html b/docs/.html/src/slune/searchers.html new file mode 100644 index 0000000..95b7faf --- /dev/null +++ b/docs/.html/src/slune/searchers.html @@ -0,0 +1,244 @@ + + + + + + + src.slune.searchers API documentation + + + + + + + + + +
+
+

+src.slune.searchers

+ + + + + + +
1from .grid import SearcherGrid
+2
+3# __all__ = ['SearcherGrid']
+
+ + +
+
+ + \ No newline at end of file diff --git a/docs/.html/src/slune/searchers/grid.html b/docs/.html/src/slune/searchers/grid.html new file mode 100644 index 0000000..f7b4d7d --- /dev/null +++ b/docs/.html/src/slune/searchers/grid.html @@ -0,0 +1,982 @@ + + + + + + + src.slune.searchers.grid API documentation + + + + + + + + + +
+
+

+src.slune.searchers.grid

+ + + + + + +
  1from typing import List, Tuple
+  2from slune.base import BaseSearcher, BaseSaver
+  3from slune.utils import dict_to_strings
+  4
+  5class SearcherGrid(BaseSearcher):
+  6    """ Searcher for grid search.
+  7    
+  8    Given dictionary of parameters and values to try, creates grid of all possible configurations,
+  9    and returns them one by one for each call to next_tune.
+ 10
+ 11    Attributes:
+ 12        - configs (dict): Parameters and values to create grid from.
+ 13            Structure of dictionary should be: { "--parameter_name" : [Value_1, Value_2, ...], ... }
+ 14        - runs (int): Controls search based on number of runs we want for each config.
+ 15            if runs > 0 -> run each config 'runs' times.
+ 16            if runs = 0 -> run each config once even if it already exists.
+ 17            This behavior is modified if we want to (use) check_existing_runs, see methods description.
+ 18        - grid (list of dict): List of dictionaries, each containing one combination of argument values.
+ 19        - grid_index (int): Index of the current configuration in the grid.
+ 20        - saver_exists (function): Pointer to the savers exists method, used to check if there are existing runs.
+ 21
+ 22    """
+ 23
+ 24    def __init__(self, configs: dict, runs: int = 0):
+ 25        """ Initialises the searcher.
+ 26
+ 27        Args:
+ 28            - configs (dict): Dictionary of parameters and values to try.
+ 29                Structure of dictionary should be: { "--parameter_name" : [Value_1, Value_2, ...], ... }
+ 30            - runs (int, optional): Controls search based on number of runs we want for each config.
+ 31                if runs > 0 -> run each config 'runs' times.
+ 32                if runs = 0 -> run each config once even if it already exists.
+ 33                This behavior is modified if we want to (use) check_existing_runs, see methods description.
+ 34
+ 35        """
+ 36
+ 37        super().__init__()
+ 38        self.runs = runs
+ 39        self.configs = configs
+ 40        self.grid = self.get_grid(configs)
+ 41        self.grid_index = None
+ 42        self.saver_exists = None
+ 43
+ 44    def __len__(self):
+ 45        """ Returns the number of configurations defined by search space. 
+ 46        
+ 47        This may not be accurate if we want to (use) check_existing_runs,
+ 48        as we may skip configurations, 
+ 49        see methods description.
+ 50
+ 51        Returns:
+ 52            - num_configs (int): Number of configurations defined by search space.
+ 53
+ 54        """
+ 55
+ 56        return len(self.grid) * self.runs
+ 57
+ 58    def get_grid(self, param_dict: dict) -> List:
+ 59        """ Creates search grid.
+ 60        
+ 61        Generates all possible combinations of values for each argument in the given dictionary using recursion.
+ 62
+ 63        Args:
+ 64            - param_dict (dict): A dictionary where keys are argument names and values are lists of values.
+ 65
+ 66        Returns:
+ 67            - all_combinations (list): A list of dictionaries, each containing one combination of argument values.
+ 68        
+ 69        """
+ 70
+ 71        # Helper function to recursively generate combinations
+ 72        def generate_combinations(param_names, current_combination, all_combinations):
+ 73            if not param_names:
+ 74                # If there are no more parameters to combine, add the current combination to the result
+ 75                all_combinations.append(dict(current_combination))
+ 76                return
+ 77
+ 78            param_name = param_names[0]
+ 79            param_values = param_dict[param_name]
+ 80
+ 81            for value in param_values:
+ 82                current_combination[param_name] = value
+ 83                # Recursively generate combinations for the remaining parameters
+ 84                generate_combinations(param_names[1:], current_combination, all_combinations)
+ 85
+ 86        # Start with an empty combination and generate all combinations
+ 87        all_combinations = []
+ 88        generate_combinations(list(param_dict.keys()), {}, all_combinations)
+ 89
+ 90        return all_combinations
+ 91
+ 92    def check_existing_runs(self, saver: BaseSaver):
+ 93        """ We save a pointer to the savers exists method to check if there are existing runs.
+ 94
+ 95        If there are n existing runs:
+ 96            n < runs -> run the remaining runs
+ 97            n >= runs -> skip all runs
+ 98        
+ 99        Args:
+100            - saver (BaseSaver): Pointer to the savers exists method, used to check if there are existing runs.
+101
+102        """
+103
+104        if self.runs != 0:
+105            self.saver_exists = saver.exists
+106        else:
+107            raise ValueError("Won't check for existing runs if runs = 0, Set runs > 0.")
+108    
+109    def skip_existing_runs(self, grid_index: int) -> Tuple[int, int]:
+110        """ Skips runs if they are in storage already.
+111        
+112        Will check if there are existing runs for the current configuration,
+113        if there are existing runs we tally them up 
+114        and skip configs or runs of a config based on the number of runs we want for each config.
+115
+116        Args:
+117            - grid_index (int): Index of the current configuration in the grid.
+118
+119        Returns:
+120            - grid_index (int): Index of the next configuration in the grid.
+121            - run_index (int): Index of the next run for the current configuration.
+122        """
+123        if self.saver_exists != None:
+124            # Check if there are existing runs, if so skip them
+125            existing_runs = self.saver_exists(dict_to_strings(self.grid[grid_index]))
+126            if self.runs - existing_runs > 0:
+127                run_index = existing_runs
+128                return grid_index, run_index
+129            else:
+130                grid_index += 1
+131                run_index = 0
+132                return self.skip_existing_runs(grid_index)
+133        else:
+134            if grid_index == len(self.grid):
+135                raise IndexError('Reached end of grid, no more configurations to try.')
+136            return grid_index, 0
+137
+138    def next_tune(self) -> dict:
+139        """ Returns the next configuration to try.
+140
+141        Will skip existing runs if check_existing_runs has been called.
+142        For more information on how this works check the methods descriptions for check_existing_runs and skip_existing_runs.
+143        Will raise an error if we have reached the end of the grid.
+144        To iterate through all configurations, use a for loop like so: 
+145            for config in searcher: ...
+146            
+147        Returns:
+148            - next_config (dict): The next configuration to try.
+149        """
+150        # If this is the first call to next_tune, set grid_index to 0
+151        if self.grid_index is None:
+152            self.grid_index = 0
+153            self.grid_index, self.run_index = self.skip_existing_runs(self.grid_index)
+154        elif self.run_index < self.runs - 1:
+155            self.run_index += 1
+156        else:
+157            self.grid_index += 1
+158            self.grid_index, self.run_index = self.skip_existing_runs(self.grid_index)
+159        # If we have reached the end of the grid, raise an error
+160        if self.grid_index == len(self.grid):
+161            raise IndexError('Reached end of grid, no more configurations to try.')
+162        # Return the next configuration to try
+163        next_config = dict_to_strings(self.grid[self.grid_index])
+164        return next_config
+
+ + +
+
+ +
+ + class + SearcherGrid(slune.base.BaseSearcher): + + + +
+ +
  6class SearcherGrid(BaseSearcher):
+  7    """ Searcher for grid search.
+  8    
+  9    Given dictionary of parameters and values to try, creates grid of all possible configurations,
+ 10    and returns them one by one for each call to next_tune.
+ 11
+ 12    Attributes:
+ 13        - configs (dict): Parameters and values to create grid from.
+ 14            Structure of dictionary should be: { "--parameter_name" : [Value_1, Value_2, ...], ... }
+ 15        - runs (int): Controls search based on number of runs we want for each config.
+ 16            if runs > 0 -> run each config 'runs' times.
+ 17            if runs = 0 -> run each config once even if it already exists.
+ 18            This behavior is modified if we want to (use) check_existing_runs, see methods description.
+ 19        - grid (list of dict): List of dictionaries, each containing one combination of argument values.
+ 20        - grid_index (int): Index of the current configuration in the grid.
+ 21        - saver_exists (function): Pointer to the savers exists method, used to check if there are existing runs.
+ 22
+ 23    """
+ 24
+ 25    def __init__(self, configs: dict, runs: int = 0):
+ 26        """ Initialises the searcher.
+ 27
+ 28        Args:
+ 29            - configs (dict): Dictionary of parameters and values to try.
+ 30                Structure of dictionary should be: { "--parameter_name" : [Value_1, Value_2, ...], ... }
+ 31            - runs (int, optional): Controls search based on number of runs we want for each config.
+ 32                if runs > 0 -> run each config 'runs' times.
+ 33                if runs = 0 -> run each config once even if it already exists.
+ 34                This behavior is modified if we want to (use) check_existing_runs, see methods description.
+ 35
+ 36        """
+ 37
+ 38        super().__init__()
+ 39        self.runs = runs
+ 40        self.configs = configs
+ 41        self.grid = self.get_grid(configs)
+ 42        self.grid_index = None
+ 43        self.saver_exists = None
+ 44
+ 45    def __len__(self):
+ 46        """ Returns the number of configurations defined by search space. 
+ 47        
+ 48        This may not be accurate if we want to (use) check_existing_runs,
+ 49        as we may skip configurations, 
+ 50        see methods description.
+ 51
+ 52        Returns:
+ 53            - num_configs (int): Number of configurations defined by search space.
+ 54
+ 55        """
+ 56
+ 57        return len(self.grid) * self.runs
+ 58
+ 59    def get_grid(self, param_dict: dict) -> List:
+ 60        """ Creates search grid.
+ 61        
+ 62        Generates all possible combinations of values for each argument in the given dictionary using recursion.
+ 63
+ 64        Args:
+ 65            - param_dict (dict): A dictionary where keys are argument names and values are lists of values.
+ 66
+ 67        Returns:
+ 68            - all_combinations (list): A list of dictionaries, each containing one combination of argument values.
+ 69        
+ 70        """
+ 71
+ 72        # Helper function to recursively generate combinations
+ 73        def generate_combinations(param_names, current_combination, all_combinations):
+ 74            if not param_names:
+ 75                # If there are no more parameters to combine, add the current combination to the result
+ 76                all_combinations.append(dict(current_combination))
+ 77                return
+ 78
+ 79            param_name = param_names[0]
+ 80            param_values = param_dict[param_name]
+ 81
+ 82            for value in param_values:
+ 83                current_combination[param_name] = value
+ 84                # Recursively generate combinations for the remaining parameters
+ 85                generate_combinations(param_names[1:], current_combination, all_combinations)
+ 86
+ 87        # Start with an empty combination and generate all combinations
+ 88        all_combinations = []
+ 89        generate_combinations(list(param_dict.keys()), {}, all_combinations)
+ 90
+ 91        return all_combinations
+ 92
+ 93    def check_existing_runs(self, saver: BaseSaver):
+ 94        """ We save a pointer to the savers exists method to check if there are existing runs.
+ 95
+ 96        If there are n existing runs:
+ 97            n < runs -> run the remaining runs
+ 98            n >= runs -> skip all runs
+ 99        
+100        Args:
+101            - saver (BaseSaver): Pointer to the savers exists method, used to check if there are existing runs.
+102
+103        """
+104
+105        if self.runs != 0:
+106            self.saver_exists = saver.exists
+107        else:
+108            raise ValueError("Won't check for existing runs if runs = 0, Set runs > 0.")
+109    
+110    def skip_existing_runs(self, grid_index: int) -> Tuple[int, int]:
+111        """ Skips runs if they are in storage already.
+112        
+113        Will check if there are existing runs for the current configuration,
+114        if there are existing runs we tally them up 
+115        and skip configs or runs of a config based on the number of runs we want for each config.
+116
+117        Args:
+118            - grid_index (int): Index of the current configuration in the grid.
+119
+120        Returns:
+121            - grid_index (int): Index of the next configuration in the grid.
+122            - run_index (int): Index of the next run for the current configuration.
+123        """
+124        if self.saver_exists != None:
+125            # Check if there are existing runs, if so skip them
+126            existing_runs = self.saver_exists(dict_to_strings(self.grid[grid_index]))
+127            if self.runs - existing_runs > 0:
+128                run_index = existing_runs
+129                return grid_index, run_index
+130            else:
+131                grid_index += 1
+132                run_index = 0
+133                return self.skip_existing_runs(grid_index)
+134        else:
+135            if grid_index == len(self.grid):
+136                raise IndexError('Reached end of grid, no more configurations to try.')
+137            return grid_index, 0
+138
+139    def next_tune(self) -> dict:
+140        """ Returns the next configuration to try.
+141
+142        Will skip existing runs if check_existing_runs has been called.
+143        For more information on how this works check the methods descriptions for check_existing_runs and skip_existing_runs.
+144        Will raise an error if we have reached the end of the grid.
+145        To iterate through all configurations, use a for loop like so: 
+146            for config in searcher: ...
+147            
+148        Returns:
+149            - next_config (dict): The next configuration to try.
+150        """
+151        # If this is the first call to next_tune, set grid_index to 0
+152        if self.grid_index is None:
+153            self.grid_index = 0
+154            self.grid_index, self.run_index = self.skip_existing_runs(self.grid_index)
+155        elif self.run_index < self.runs - 1:
+156            self.run_index += 1
+157        else:
+158            self.grid_index += 1
+159            self.grid_index, self.run_index = self.skip_existing_runs(self.grid_index)
+160        # If we have reached the end of the grid, raise an error
+161        if self.grid_index == len(self.grid):
+162            raise IndexError('Reached end of grid, no more configurations to try.')
+163        # Return the next configuration to try
+164        next_config = dict_to_strings(self.grid[self.grid_index])
+165        return next_config
+
+ + +

Searcher for grid search.

+ +

Given dictionary of parameters and values to try, creates grid of all possible configurations, +and returns them one by one for each call to next_tune.

+ +
Attributes:
+ +
    +
  • - configs (dict): Parameters and values to create grid from. +Structure of dictionary should be: { "--parameter_name" : [Value_1, Value_2, ...], ... }
  • +
  • - runs (int): Controls search based on number of runs we want for each config. +if runs > 0 -> run each config 'runs' times. +if runs = 0 -> run each config once even if it already exists. +This behavior is modified if we want to (use) check_existing_runs, see methods description.
  • +
  • - grid (list of dict): List of dictionaries, each containing one combination of argument values.
  • +
  • - grid_index (int): Index of the current configuration in the grid.
  • +
  • - saver_exists (function): Pointer to the savers exists method, used to check if there are existing runs.
  • +
+
+ + +
+ +
+ + SearcherGrid(configs: dict, runs: int = 0) + + + +
+ +
25    def __init__(self, configs: dict, runs: int = 0):
+26        """ Initialises the searcher.
+27
+28        Args:
+29            - configs (dict): Dictionary of parameters and values to try.
+30                Structure of dictionary should be: { "--parameter_name" : [Value_1, Value_2, ...], ... }
+31            - runs (int, optional): Controls search based on number of runs we want for each config.
+32                if runs > 0 -> run each config 'runs' times.
+33                if runs = 0 -> run each config once even if it already exists.
+34                This behavior is modified if we want to (use) check_existing_runs, see methods description.
+35
+36        """
+37
+38        super().__init__()
+39        self.runs = runs
+40        self.configs = configs
+41        self.grid = self.get_grid(configs)
+42        self.grid_index = None
+43        self.saver_exists = None
+
+ + +

Initialises the searcher.

+ +
Arguments:
+ +
    +
  • - configs (dict): Dictionary of parameters and values to try. +Structure of dictionary should be: { "--parameter_name" : [Value_1, Value_2, ...], ... }
  • +
  • - runs (int, optional): Controls search based on number of runs we want for each config. +if runs > 0 -> run each config 'runs' times. +if runs = 0 -> run each config once even if it already exists. +This behavior is modified if we want to (use) check_existing_runs, see methods description.
  • +
+
+ + +
+
+
+ runs + + +
+ + + + +
+
+
+ configs + + +
+ + + + +
+
+
+ grid + + +
+ + + + +
+
+
+ grid_index + + +
+ + + + +
+
+
+ saver_exists + + +
+ + + + +
+
+ +
+ + def + get_grid(self, param_dict: dict) -> List: + + + +
+ +
59    def get_grid(self, param_dict: dict) -> List:
+60        """ Creates search grid.
+61        
+62        Generates all possible combinations of values for each argument in the given dictionary using recursion.
+63
+64        Args:
+65            - param_dict (dict): A dictionary where keys are argument names and values are lists of values.
+66
+67        Returns:
+68            - all_combinations (list): A list of dictionaries, each containing one combination of argument values.
+69        
+70        """
+71
+72        # Helper function to recursively generate combinations
+73        def generate_combinations(param_names, current_combination, all_combinations):
+74            if not param_names:
+75                # If there are no more parameters to combine, add the current combination to the result
+76                all_combinations.append(dict(current_combination))
+77                return
+78
+79            param_name = param_names[0]
+80            param_values = param_dict[param_name]
+81
+82            for value in param_values:
+83                current_combination[param_name] = value
+84                # Recursively generate combinations for the remaining parameters
+85                generate_combinations(param_names[1:], current_combination, all_combinations)
+86
+87        # Start with an empty combination and generate all combinations
+88        all_combinations = []
+89        generate_combinations(list(param_dict.keys()), {}, all_combinations)
+90
+91        return all_combinations
+
+ + +

Creates search grid.

+ +

Generates all possible combinations of values for each argument in the given dictionary using recursion.

+ +
Arguments:
+ +
    +
  • - param_dict (dict): A dictionary where keys are argument names and values are lists of values.
  • +
+ +
Returns:
+ +
+
    +
  • all_combinations (list): A list of dictionaries, each containing one combination of argument values.
  • +
+
+
+ + +
+
+ +
+ + def + check_existing_runs(self, saver: slune.base.BaseSaver): + + + +
+ +
 93    def check_existing_runs(self, saver: BaseSaver):
+ 94        """ We save a pointer to the savers exists method to check if there are existing runs.
+ 95
+ 96        If there are n existing runs:
+ 97            n < runs -> run the remaining runs
+ 98            n >= runs -> skip all runs
+ 99        
+100        Args:
+101            - saver (BaseSaver): Pointer to the savers exists method, used to check if there are existing runs.
+102
+103        """
+104
+105        if self.runs != 0:
+106            self.saver_exists = saver.exists
+107        else:
+108            raise ValueError("Won't check for existing runs if runs = 0, Set runs > 0.")
+
+ + +

We save a pointer to the savers exists method to check if there are existing runs.

+ +
If there are n existing runs:
+ +
+

n < runs -> run the remaining runs + n >= runs -> skip all runs

+
+ +
Arguments:
+ +
    +
  • - saver (BaseSaver): Pointer to the savers exists method, used to check if there are existing runs.
  • +
+
+ + +
+
+ +
+ + def + skip_existing_runs(self, grid_index: int) -> Tuple[int, int]: + + + +
+ +
110    def skip_existing_runs(self, grid_index: int) -> Tuple[int, int]:
+111        """ Skips runs if they are in storage already.
+112        
+113        Will check if there are existing runs for the current configuration,
+114        if there are existing runs we tally them up 
+115        and skip configs or runs of a config based on the number of runs we want for each config.
+116
+117        Args:
+118            - grid_index (int): Index of the current configuration in the grid.
+119
+120        Returns:
+121            - grid_index (int): Index of the next configuration in the grid.
+122            - run_index (int): Index of the next run for the current configuration.
+123        """
+124        if self.saver_exists != None:
+125            # Check if there are existing runs, if so skip them
+126            existing_runs = self.saver_exists(dict_to_strings(self.grid[grid_index]))
+127            if self.runs - existing_runs > 0:
+128                run_index = existing_runs
+129                return grid_index, run_index
+130            else:
+131                grid_index += 1
+132                run_index = 0
+133                return self.skip_existing_runs(grid_index)
+134        else:
+135            if grid_index == len(self.grid):
+136                raise IndexError('Reached end of grid, no more configurations to try.')
+137            return grid_index, 0
+
+ + +

Skips runs if they are in storage already.

+ +

Will check if there are existing runs for the current configuration, +if there are existing runs we tally them up +and skip configs or runs of a config based on the number of runs we want for each config.

+ +
Arguments:
+ +
    +
  • - grid_index (int): Index of the current configuration in the grid.
  • +
+ +
Returns:
+ +
+
    +
  • grid_index (int): Index of the next configuration in the grid.
  • +
  • run_index (int): Index of the next run for the current configuration.
  • +
+
+
+ + +
+
+ +
+ + def + next_tune(self) -> dict: + + + +
+ +
139    def next_tune(self) -> dict:
+140        """ Returns the next configuration to try.
+141
+142        Will skip existing runs if check_existing_runs has been called.
+143        For more information on how this works check the methods descriptions for check_existing_runs and skip_existing_runs.
+144        Will raise an error if we have reached the end of the grid.
+145        To iterate through all configurations, use a for loop like so: 
+146            for config in searcher: ...
+147            
+148        Returns:
+149            - next_config (dict): The next configuration to try.
+150        """
+151        # If this is the first call to next_tune, set grid_index to 0
+152        if self.grid_index is None:
+153            self.grid_index = 0
+154            self.grid_index, self.run_index = self.skip_existing_runs(self.grid_index)
+155        elif self.run_index < self.runs - 1:
+156            self.run_index += 1
+157        else:
+158            self.grid_index += 1
+159            self.grid_index, self.run_index = self.skip_existing_runs(self.grid_index)
+160        # If we have reached the end of the grid, raise an error
+161        if self.grid_index == len(self.grid):
+162            raise IndexError('Reached end of grid, no more configurations to try.')
+163        # Return the next configuration to try
+164        next_config = dict_to_strings(self.grid[self.grid_index])
+165        return next_config
+
+ + +

Returns the next configuration to try.

+ +

Will skip existing runs if check_existing_runs has been called. +For more information on how this works check the methods descriptions for check_existing_runs and skip_existing_runs. +Will raise an error if we have reached the end of the grid. +To iterate through all configurations, use a for loop like so: + for config in searcher: ...

+ +
Returns:
+ +
+
    +
  • next_config (dict): The next configuration to try.
  • +
+
+
+ + +
+
+
+ + \ No newline at end of file diff --git a/docs/.html/src/slune/slune.html b/docs/.html/src/slune/slune.html new file mode 100644 index 0000000..668107c --- /dev/null +++ b/docs/.html/src/slune/slune.html @@ -0,0 +1,614 @@ + + + + + + + src.slune.slune API documentation + + + + + + + + + +
+
+

+src.slune.slune

+ + + + + + +
  1from typing import List, Optional, Union
+  2from slune.base import BaseSearcher, BaseSaver
+  3import subprocess
+  4import sys
+  5from slune.savers.csv import SaverCsv
+  6from slune.loggers.default import LoggerDefault
+  7
+  8def submit_job(sh_path: str, args: List[str]):
+  9    """ Submits a job using specified Bash script
+ 10
+ 11    Args:
+ 12        - sh_path (string): Path to the Bash script to be run.
+ 13
+ 14        - args (list of str): List of strings containing the arguments to be passed to the Bash script.
+ 15    
+ 16    """
+ 17
+ 18    try:
+ 19        # Run the Bash script using subprocess
+ 20        command = [sh_path] + args
+ 21        subprocess.run(['sbatch'] + command, check=True)
+ 22    except subprocess.CalledProcessError as e:
+ 23        print(f"Error running sbatch: {e}")
+ 24
+ 25def sbatchit(script_path: str, sbatch_path: str, searcher: BaseSearcher, cargs: Optional[List]=[], saver: Optional[BaseSaver]=None):
+ 26    """ Submits jobs based on arguments given by searcher.
+ 27
+ 28    For each job runs the script stored at script_path with selected parameter values given by searcher
+ 29    and the arguments given by cargs.
+ 30
+ 31    Uses the sbatch script with path sbatch_path to submit each job to the cluster. 
+ 32
+ 33    If given a Saver object, uses it to check if there are existing runs for each job and skips them,
+ 34    based on the number of runs we would like for each job (which is stored in the saver).
+ 35
+ 36    Args:
+ 37        - script_path (str): Path to the script (of the model) to be run for each job.
+ 38
+ 39        - sbatch_path (str): Path to the sbatch script that will be used to submit each job.
+ 40            Examples of sbatch scripts can be found in the templates folder.
+ 41
+ 42        - searcher (Searcher): Searcher object used to retrieve changing arguments for each job.
+ 43
+ 44        - cargs (list, optional): Contains arguments to be passed to the script for every job.
+ 45
+ 46        - saver (Saver, optional): Saver object used if we want to check if there are existing runs so we don't rerun.
+ 47            Can simply not give a Saver object if you want to rerun all jobs.
+ 48
+ 49    """
+ 50
+ 51    if saver != None:
+ 52        searcher.check_existing_runs(saver)
+ 53    # Create sbatch script for each job
+ 54    for args in searcher:
+ 55        # Submit job
+ 56        submit_job(sbatch_path, [script_path] + cargs + args)
+ 57
+ 58def lsargs() -> (str, List[str]):
+ 59    """ Returns the script name and a list of the arguments passed to the script."""
+ 60    args = sys.argv
+ 61    return args[0], args[1:]
+ 62
+ 63def garg(args: List[str], arg_names: Union[str, List[str]]) -> Union[str, List[str]]:
+ 64    """ Finds the argument/s with name arg_names in the list of arguments args_ls and returns its value/s.
+ 65    
+ 66    Args:
+ 67        - args (list of str): List of strings containing the arguments to be searched.
+ 68
+ 69        - arg_names (str or list of str): String or list of strings containing the names of the arguments to be searched for.       
+ 70
+ 71    Returns:
+ 72        - arg_value (str or list of str): String or list of strings containing the values of the arguments found.
+ 73
+ 74    """
+ 75
+ 76    def single_garg(arg_name):
+ 77        # Check if arg_name is a string
+ 78        if type(arg_name) != str:
+ 79            raise TypeError(f"arg_name must be a string, got {type(arg_name)}")
+ 80        # Find index of argument
+ 81        arg_index = [i for i, arg in enumerate(args) if arg_name in arg]
+ 82        # Return value error if argument not found
+ 83        if not arg_index:
+ 84            raise ValueError(f"Argument {arg_name} not found in arguments {args}")
+ 85        # Return value of argument
+ 86        if len(arg_index) > 1:
+ 87            raise ValueError(f"Multiple arguments with name {arg_name} found in arguments {args}")
+ 88        return args[arg_index[0]].split("=")[1]
+ 89    if type(arg_names) == list:
+ 90        return [single_garg(arg_name) for arg_name in arg_names]
+ 91    else:
+ 92        return single_garg(arg_names)
+ 93
+ 94def get_csv_slog(params: Optional[dict]= None, root_dir: Optional[str]='slune_results') -> BaseSaver:
+ 95    """ Returns a SaverCsv object with the given parameters and root directory.
+ 96
+ 97    Args:
+ 98        - params (dict, optional): Dictionary of parameters to be passed to the SaverCsv object, default is None.
+ 99
+100        - root_dir (str, optional): Path to the root directory to be used by the SaverCsv object, default is 'slune_results'.
+101
+102    Returns:
+103        - SaverCsv (Saver): Saver object with the given parameters and root directory.
+104            Initialized with a LoggerDefault object as its logger.
+105    
+106    """
+107
+108    return SaverCsv(LoggerDefault(), params = params, root_dir=root_dir)
+
+ + +
+
+ +
+ + def + submit_job(sh_path: str, args: List[str]): + + + +
+ +
 9def submit_job(sh_path: str, args: List[str]):
+10    """ Submits a job using specified Bash script
+11
+12    Args:
+13        - sh_path (string): Path to the Bash script to be run.
+14
+15        - args (list of str): List of strings containing the arguments to be passed to the Bash script.
+16    
+17    """
+18
+19    try:
+20        # Run the Bash script using subprocess
+21        command = [sh_path] + args
+22        subprocess.run(['sbatch'] + command, check=True)
+23    except subprocess.CalledProcessError as e:
+24        print(f"Error running sbatch: {e}")
+
+ + +

Submits a job using specified Bash script

+ +
Arguments:
+ +
    +
  • - sh_path (string): Path to the Bash script to be run.
  • +
  • - args (list of str): List of strings containing the arguments to be passed to the Bash script.
  • +
+
+ + +
+
+ +
+ + def + sbatchit( script_path: str, sbatch_path: str, searcher: slune.base.BaseSearcher, cargs: Optional[List] = [], saver: Optional[slune.base.BaseSaver] = None): + + + +
+ +
26def sbatchit(script_path: str, sbatch_path: str, searcher: BaseSearcher, cargs: Optional[List]=[], saver: Optional[BaseSaver]=None):
+27    """ Submits jobs based on arguments given by searcher.
+28
+29    For each job runs the script stored at script_path with selected parameter values given by searcher
+30    and the arguments given by cargs.
+31
+32    Uses the sbatch script with path sbatch_path to submit each job to the cluster. 
+33
+34    If given a Saver object, uses it to check if there are existing runs for each job and skips them,
+35    based on the number of runs we would like for each job (which is stored in the saver).
+36
+37    Args:
+38        - script_path (str): Path to the script (of the model) to be run for each job.
+39
+40        - sbatch_path (str): Path to the sbatch script that will be used to submit each job.
+41            Examples of sbatch scripts can be found in the templates folder.
+42
+43        - searcher (Searcher): Searcher object used to retrieve changing arguments for each job.
+44
+45        - cargs (list, optional): Contains arguments to be passed to the script for every job.
+46
+47        - saver (Saver, optional): Saver object used if we want to check if there are existing runs so we don't rerun.
+48            Can simply not give a Saver object if you want to rerun all jobs.
+49
+50    """
+51
+52    if saver != None:
+53        searcher.check_existing_runs(saver)
+54    # Create sbatch script for each job
+55    for args in searcher:
+56        # Submit job
+57        submit_job(sbatch_path, [script_path] + cargs + args)
+
+ + +

Submits jobs based on arguments given by searcher.

+ +

For each job runs the script stored at script_path with selected parameter values given by searcher +and the arguments given by cargs.

+ +

Uses the sbatch script with path sbatch_path to submit each job to the cluster.

+ +

If given a Saver object, uses it to check if there are existing runs for each job and skips them, +based on the number of runs we would like for each job (which is stored in the saver).

+ +
Arguments:
+ +
    +
  • - script_path (str): Path to the script (of the model) to be run for each job.
  • +
  • - sbatch_path (str): Path to the sbatch script that will be used to submit each job. +Examples of sbatch scripts can be found in the templates folder.
  • +
  • - searcher (Searcher): Searcher object used to retrieve changing arguments for each job.
  • +
  • - cargs (list, optional): Contains arguments to be passed to the script for every job.
  • +
  • - saver (Saver, optional): Saver object used if we want to check if there are existing runs so we don't rerun. +Can simply not give a Saver object if you want to rerun all jobs.
  • +
+
+ + +
+
+ +
+ + def + lsargs() -> (<class 'str'>, typing.List[str]): + + + +
+ +
59def lsargs() -> (str, List[str]):
+60    """ Returns the script name and a list of the arguments passed to the script."""
+61    args = sys.argv
+62    return args[0], args[1:]
+
+ + +

Returns the script name and a list of the arguments passed to the script.

+
+ + +
+
+ +
+ + def + garg( args: List[str], arg_names: Union[str, List[str]]) -> Union[str, List[str]]: + + + +
+ +
64def garg(args: List[str], arg_names: Union[str, List[str]]) -> Union[str, List[str]]:
+65    """ Finds the argument/s with name arg_names in the list of arguments args_ls and returns its value/s.
+66    
+67    Args:
+68        - args (list of str): List of strings containing the arguments to be searched.
+69
+70        - arg_names (str or list of str): String or list of strings containing the names of the arguments to be searched for.       
+71
+72    Returns:
+73        - arg_value (str or list of str): String or list of strings containing the values of the arguments found.
+74
+75    """
+76
+77    def single_garg(arg_name):
+78        # Check if arg_name is a string
+79        if type(arg_name) != str:
+80            raise TypeError(f"arg_name must be a string, got {type(arg_name)}")
+81        # Find index of argument
+82        arg_index = [i for i, arg in enumerate(args) if arg_name in arg]
+83        # Return value error if argument not found
+84        if not arg_index:
+85            raise ValueError(f"Argument {arg_name} not found in arguments {args}")
+86        # Return value of argument
+87        if len(arg_index) > 1:
+88            raise ValueError(f"Multiple arguments with name {arg_name} found in arguments {args}")
+89        return args[arg_index[0]].split("=")[1]
+90    if type(arg_names) == list:
+91        return [single_garg(arg_name) for arg_name in arg_names]
+92    else:
+93        return single_garg(arg_names)
+
+ + +

Finds the argument/s with name arg_names in the list of arguments args_ls and returns its value/s.

+ +
Arguments:
+ +
    +
  • - args (list of str): List of strings containing the arguments to be searched.
  • +
  • - arg_names (str or list of str): String or list of strings containing the names of the arguments to be searched for.
  • +
+ +
Returns:
+ +
+
    +
  • arg_value (str or list of str): String or list of strings containing the values of the arguments found.
  • +
+
+
+ + +
+
+ +
+ + def + get_csv_slog( params: Optional[dict] = None, root_dir: Optional[str] = 'slune_results') -> slune.base.BaseSaver: + + + +
+ +
 95def get_csv_slog(params: Optional[dict]= None, root_dir: Optional[str]='slune_results') -> BaseSaver:
+ 96    """ Returns a SaverCsv object with the given parameters and root directory.
+ 97
+ 98    Args:
+ 99        - params (dict, optional): Dictionary of parameters to be passed to the SaverCsv object, default is None.
+100
+101        - root_dir (str, optional): Path to the root directory to be used by the SaverCsv object, default is 'slune_results'.
+102
+103    Returns:
+104        - SaverCsv (Saver): Saver object with the given parameters and root directory.
+105            Initialized with a LoggerDefault object as its logger.
+106    
+107    """
+108
+109    return SaverCsv(LoggerDefault(), params = params, root_dir=root_dir)
+
+ + +

Returns a SaverCsv object with the given parameters and root directory.

+ +
Arguments:
+ +
    +
  • - params (dict, optional): Dictionary of parameters to be passed to the SaverCsv object, default is None.
  • +
  • - root_dir (str, optional): Path to the root directory to be used by the SaverCsv object, default is 'slune_results'.
  • +
+ +
Returns:
+ +
+
    +
  • SaverCsv (Saver): Saver object with the given parameters and root directory. + Initialized with a LoggerDefault object as its logger.
  • +
+
+
+ + +
+
+ + \ No newline at end of file diff --git a/docs/.html/src/slune/utils.html b/docs/.html/src/slune/utils.html new file mode 100644 index 0000000..5a28ba6 --- /dev/null +++ b/docs/.html/src/slune/utils.html @@ -0,0 +1,721 @@ + + + + + + + src.slune.utils API documentation + + + + + + + + + +
+
+

+src.slune.utils

+ + + + + + +
  1import os
+  2from typing import List, Optional, Tuple
+  3
+  4def find_directory_path(strings: List[str], root_directory: Optional[str]='.') -> Tuple[int, str]:
+  5    """ Searches the root directory for a path of directories that matches the strings given in any order.
+  6    If only a partial match is found, returns the deepest matching path.
+  7    If no matches are found returns root_directory.
+  8    Returns a stripped matching path of directories, ie. where we convert '--string=value' to '--string='.
+  9
+ 10    Args:
+ 11        - strings (list of str): List of strings to be matched in any order. Each string in list must be in the form '--string='.
+ 12        - root_directory (string, optional): Path to the root directory to be searched, default is current working directory.
+ 13    
+ 14    Returns:
+ 15        - max_depth (int): Depth of the deepest matching path.
+ 16        - max_path (string): Path of the deepest matching path.
+ 17    
+ 18    """
+ 19
+ 20    def _find_directory_path(curr_strings, curr_root, depth, max_depth, max_path):
+ 21        dir_list = [entry.name for entry in os.scandir(curr_root) if entry.is_dir()]
+ 22        stripped_dir_list = [d.split('=')[0].strip() +"=" for d in dir_list]
+ 23        stripped_dir_list = list(set(stripped_dir_list))
+ 24        for string in curr_strings:
+ 25            if string in stripped_dir_list:
+ 26                dir_list = [d for d in dir_list if d.startswith(string)]
+ 27                for d in dir_list:
+ 28                    new_depth, new_path = _find_directory_path([s for s in curr_strings if s != string], os.path.join(curr_root, d), depth + 1, max_depth, max_path)
+ 29                    if new_depth > max_depth:
+ 30                        max_depth, max_path = new_depth, new_path
+ 31        if depth > max_depth:
+ 32            max_depth, max_path = depth, curr_root
+ 33        return max_depth, max_path
+ 34
+ 35    max_depth, max_path = _find_directory_path(strings, root_directory, 0, -1, '')
+ 36    if max_depth > 0:
+ 37        max_path = max_path[len(root_directory):]
+ 38        dirs = max_path[1:].split(os.path.sep)
+ 39        dirs = [d.split('=')[0].strip() +"=" for d in dirs]
+ 40        max_path = os.path.join(*dirs)
+ 41        max_path = os.path.join(root_directory, max_path)
+ 42    return max_path
+ 43
+ 44def get_numeric_equiv(og_path: str, root_directory: Optional[str]='.') -> str:
+ 45    """ Replaces directories in path with existing directories with the same numerical value.
+ 46
+ 47    Args:
+ 48        - og_path (str): Path we want to check against existing paths, must be a subdirectory of root_directory and each directory must have form '--string=value'.
+ 49        - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
+ 50    
+ 51    Returns:
+ 52        - equiv (str): Path with values changed to match existing directories if values are numerically equivalent, with root directory at beginning.
+ 53
+ 54    """
+ 55
+ 56    def is_numeric(s):
+ 57        try:
+ 58            float(s)
+ 59            return True
+ 60        except ValueError:
+ 61            return False
+ 62
+ 63    dirs = og_path.split(os.path.sep)
+ 64    equiv = root_directory
+ 65    for d in dirs:
+ 66        next_dir = os.path.join(equiv, d)
+ 67        if os.path.exists(next_dir):
+ 68            equiv = next_dir
+ 69        else:
+ 70            # If the directory doesn't exist, check if there's a directory with the same numerical value
+ 71            dir_value = d.split('=')[1]
+ 72            if is_numeric(dir_value):
+ 73                dir_value = float(dir_value)
+ 74                if os.path.exists(equiv):
+ 75                    existing_dirs = [entry.name for entry in os.scandir(equiv) if entry.is_dir()]
+ 76                    for existing_dir in existing_dirs:
+ 77                        existing_dir_value = existing_dir.split('=')[1]
+ 78                        if is_numeric(existing_dir_value) and float(existing_dir_value) == dir_value:
+ 79                            equiv = os.path.join(equiv, existing_dir)
+ 80                            break
+ 81                    # If there is no directory with the same numerical value 
+ 82                    # we just keep the directory as is and move on to the next one
+ 83                    else:
+ 84                        equiv = next_dir
+ 85                else:
+ 86                    # If the directory doesn't exist we just keep the directory as is and move on to the next one
+ 87                    equiv = next_dir
+ 88            # Otherwise we just keep the directory as is and move on to the next one
+ 89            else:
+ 90                equiv = next_dir
+ 91    return equiv
+ 92
+ 93def dict_to_strings(d: dict) -> List[str]:
+ 94    """ Converts a dictionary into a list of strings in the form of '--key=value'.
+ 95
+ 96    Args:
+ 97        - d (dict): Dictionary to be converted.
+ 98
+ 99    Returns:
+100        - out (list of str): List of strings in the form of '--key=value'.
+101
+102    """
+103
+104    out = []
+105    for key, value in d.items():
+106        if key.startswith('--'):
+107            out.append('{}={}'.format(key, value))
+108        else:
+109            out.append('--{}={}'.format(key, value))
+110    return out
+111
+112def find_csv_files(root_directory: Optional[str]='.') -> List[str]:
+113    """ Recursively finds all csv files in all subdirectories of the root directory and returns their paths.
+114
+115    Args:
+116        - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
+117
+118    Returns:
+119        - csv_files (list of str): List of strings containing the paths to all csv files found.
+120
+121    """
+122    csv_files = []
+123    for root, dirs, files in os.walk(root_directory):
+124        for file in files:
+125            if file.endswith('.csv'):
+126                csv_files.append(os.path.join(root, file))
+127    return csv_files
+128
+129def get_all_paths(dirs: List[str], root_directory: Optional[str]='.') -> List[str]:
+130    """ Find all possible paths of csv files that have directory matching one of each of all the parameters given.
+131    
+132    Finds all paths of csv files in all subdirectories of the root directory that have a directory in their path matching one of each of all the parameters given.
+133
+134    Args:
+135        - dirs (list of str): List of directory names we want returned paths to have in their path.
+136        - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
+137
+138    Returns:
+139        - matches (list of str): List of strings containing the paths to all csv files found.
+140
+141    """
+142
+143    all_csv = find_csv_files(root_directory)
+144    matches = []
+145    for csv in all_csv:
+146        path = csv.split(os.path.sep)
+147        if all([p in path for p in dirs]):
+148            matches.append(csv)
+149    return matches
+
+ + +
+
+ +
+ + def + find_directory_path( strings: List[str], root_directory: Optional[str] = '.') -> Tuple[int, str]: + + + +
+ +
 5def find_directory_path(strings: List[str], root_directory: Optional[str]='.') -> Tuple[int, str]:
+ 6    """ Searches the root directory for a path of directories that matches the strings given in any order.
+ 7    If only a partial match is found, returns the deepest matching path.
+ 8    If no matches are found returns root_directory.
+ 9    Returns a stripped matching path of directories, ie. where we convert '--string=value' to '--string='.
+10
+11    Args:
+12        - strings (list of str): List of strings to be matched in any order. Each string in list must be in the form '--string='.
+13        - root_directory (string, optional): Path to the root directory to be searched, default is current working directory.
+14    
+15    Returns:
+16        - max_depth (int): Depth of the deepest matching path.
+17        - max_path (string): Path of the deepest matching path.
+18    
+19    """
+20
+21    def _find_directory_path(curr_strings, curr_root, depth, max_depth, max_path):
+22        dir_list = [entry.name for entry in os.scandir(curr_root) if entry.is_dir()]
+23        stripped_dir_list = [d.split('=')[0].strip() +"=" for d in dir_list]
+24        stripped_dir_list = list(set(stripped_dir_list))
+25        for string in curr_strings:
+26            if string in stripped_dir_list:
+27                dir_list = [d for d in dir_list if d.startswith(string)]
+28                for d in dir_list:
+29                    new_depth, new_path = _find_directory_path([s for s in curr_strings if s != string], os.path.join(curr_root, d), depth + 1, max_depth, max_path)
+30                    if new_depth > max_depth:
+31                        max_depth, max_path = new_depth, new_path
+32        if depth > max_depth:
+33            max_depth, max_path = depth, curr_root
+34        return max_depth, max_path
+35
+36    max_depth, max_path = _find_directory_path(strings, root_directory, 0, -1, '')
+37    if max_depth > 0:
+38        max_path = max_path[len(root_directory):]
+39        dirs = max_path[1:].split(os.path.sep)
+40        dirs = [d.split('=')[0].strip() +"=" for d in dirs]
+41        max_path = os.path.join(*dirs)
+42        max_path = os.path.join(root_directory, max_path)
+43    return max_path
+
+ + +

Searches the root directory for a path of directories that matches the strings given in any order. +If only a partial match is found, returns the deepest matching path. +If no matches are found returns root_directory. +Returns a stripped matching path of directories, ie. where we convert '--string=value' to '--string='.

+ +
Arguments:
+ +
    +
  • - strings (list of str): List of strings to be matched in any order. Each string in list must be in the form '--string='.
  • +
  • - root_directory (string, optional): Path to the root directory to be searched, default is current working directory.
  • +
+ +
Returns:
+ +
+
    +
  • max_depth (int): Depth of the deepest matching path.
  • +
  • max_path (string): Path of the deepest matching path.
  • +
+
+
+ + +
+
+ +
+ + def + get_numeric_equiv(og_path: str, root_directory: Optional[str] = '.') -> str: + + + +
+ +
45def get_numeric_equiv(og_path: str, root_directory: Optional[str]='.') -> str:
+46    """ Replaces directories in path with existing directories with the same numerical value.
+47
+48    Args:
+49        - og_path (str): Path we want to check against existing paths, must be a subdirectory of root_directory and each directory must have form '--string=value'.
+50        - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
+51    
+52    Returns:
+53        - equiv (str): Path with values changed to match existing directories if values are numerically equivalent, with root directory at beginning.
+54
+55    """
+56
+57    def is_numeric(s):
+58        try:
+59            float(s)
+60            return True
+61        except ValueError:
+62            return False
+63
+64    dirs = og_path.split(os.path.sep)
+65    equiv = root_directory
+66    for d in dirs:
+67        next_dir = os.path.join(equiv, d)
+68        if os.path.exists(next_dir):
+69            equiv = next_dir
+70        else:
+71            # If the directory doesn't exist, check if there's a directory with the same numerical value
+72            dir_value = d.split('=')[1]
+73            if is_numeric(dir_value):
+74                dir_value = float(dir_value)
+75                if os.path.exists(equiv):
+76                    existing_dirs = [entry.name for entry in os.scandir(equiv) if entry.is_dir()]
+77                    for existing_dir in existing_dirs:
+78                        existing_dir_value = existing_dir.split('=')[1]
+79                        if is_numeric(existing_dir_value) and float(existing_dir_value) == dir_value:
+80                            equiv = os.path.join(equiv, existing_dir)
+81                            break
+82                    # If there is no directory with the same numerical value 
+83                    # we just keep the directory as is and move on to the next one
+84                    else:
+85                        equiv = next_dir
+86                else:
+87                    # If the directory doesn't exist we just keep the directory as is and move on to the next one
+88                    equiv = next_dir
+89            # Otherwise we just keep the directory as is and move on to the next one
+90            else:
+91                equiv = next_dir
+92    return equiv
+
+ + +

Replaces directories in path with existing directories with the same numerical value.

+ +
Arguments:
+ +
    +
  • - og_path (str): Path we want to check against existing paths, must be a subdirectory of root_directory and each directory must have form '--string=value'.
  • +
  • - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
  • +
+ +
Returns:
+ +
+
    +
  • equiv (str): Path with values changed to match existing directories if values are numerically equivalent, with root directory at beginning.
  • +
+
+
+ + +
+
+ +
+ + def + dict_to_strings(d: dict) -> List[str]: + + + +
+ +
 94def dict_to_strings(d: dict) -> List[str]:
+ 95    """ Converts a dictionary into a list of strings in the form of '--key=value'.
+ 96
+ 97    Args:
+ 98        - d (dict): Dictionary to be converted.
+ 99
+100    Returns:
+101        - out (list of str): List of strings in the form of '--key=value'.
+102
+103    """
+104
+105    out = []
+106    for key, value in d.items():
+107        if key.startswith('--'):
+108            out.append('{}={}'.format(key, value))
+109        else:
+110            out.append('--{}={}'.format(key, value))
+111    return out
+
+ + +

Converts a dictionary into a list of strings in the form of '--key=value'.

+ +
Arguments:
+ +
    +
  • - d (dict): Dictionary to be converted.
  • +
+ +
Returns:
+ +
+
    +
  • out (list of str): List of strings in the form of '--key=value'.
  • +
+
+
+ + +
+
+ +
+ + def + find_csv_files(root_directory: Optional[str] = '.') -> List[str]: + + + +
+ +
113def find_csv_files(root_directory: Optional[str]='.') -> List[str]:
+114    """ Recursively finds all csv files in all subdirectories of the root directory and returns their paths.
+115
+116    Args:
+117        - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
+118
+119    Returns:
+120        - csv_files (list of str): List of strings containing the paths to all csv files found.
+121
+122    """
+123    csv_files = []
+124    for root, dirs, files in os.walk(root_directory):
+125        for file in files:
+126            if file.endswith('.csv'):
+127                csv_files.append(os.path.join(root, file))
+128    return csv_files
+
+ + +

Recursively finds all csv files in all subdirectories of the root directory and returns their paths.

+ +
Arguments:
+ +
    +
  • - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
  • +
+ +
Returns:
+ +
+
    +
  • csv_files (list of str): List of strings containing the paths to all csv files found.
  • +
+
+
+ + +
+
+ +
+ + def + get_all_paths(dirs: List[str], root_directory: Optional[str] = '.') -> List[str]: + + + +
+ +
130def get_all_paths(dirs: List[str], root_directory: Optional[str]='.') -> List[str]:
+131    """ Find all possible paths of csv files that have directory matching one of each of all the parameters given.
+132    
+133    Finds all paths of csv files in all subdirectories of the root directory that have a directory in their path matching one of each of all the parameters given.
+134
+135    Args:
+136        - dirs (list of str): List of directory names we want returned paths to have in their path.
+137        - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
+138
+139    Returns:
+140        - matches (list of str): List of strings containing the paths to all csv files found.
+141
+142    """
+143
+144    all_csv = find_csv_files(root_directory)
+145    matches = []
+146    for csv in all_csv:
+147        path = csv.split(os.path.sep)
+148        if all([p in path for p in dirs]):
+149            matches.append(csv)
+150    return matches
+
+ + +

Find all possible paths of csv files that have directory matching one of each of all the parameters given.

+ +

Finds all paths of csv files in all subdirectories of the root directory that have a directory in their path matching one of each of all the parameters given.

+ +
Arguments:
+ +
    +
  • - dirs (list of str): List of directory names we want returned paths to have in their path.
  • +
  • - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
  • +
+ +
Returns:
+ +
+
    +
  • matches (list of str): List of strings containing the paths to all csv files found.
  • +
+
+
+ + +
+
+ + \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py index b1cfe26..08bfb69 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1 +1,4 @@ -# Empty \ No newline at end of file +""" +.. include:: ../README.md +.. include:: ../CLASSDESIGN.md +""" \ No newline at end of file diff --git a/src/slune/__init__.py b/src/slune/__init__.py index 494b924..36709c2 100644 --- a/src/slune/__init__.py +++ b/src/slune/__init__.py @@ -1,11 +1,14 @@ # from .slune import submit_job, sbatchit # __all__ = ['slune', 'base', 'utils', 'loggers', 'savers', 'searchers' ] -from .searchers import grid -from .savers import csv -from .loggers import default +from .searchers import * +from .savers import * +from .loggers import * from .slune import submit_job, sbatchit, lsargs, garg, get_csv_slog from . import base, utils -__all__ = ['submit_job', 'sbatchit', 'lsargs', 'garg', 'get_csv_slog', - 'base', 'utils', 'default', 'grid', 'csv'] \ No newline at end of file +# __all__ = ['submit_job', 'sbatchit', 'lsargs', 'garg', 'get_csv_slog', + # 'base', 'utils', 'default', 'grid', 'csv'] + +import importlib.metadata +__version__ = importlib.metadata.version("slune-lib") \ No newline at end of file diff --git a/src/slune/loggers/__init__.py b/src/slune/loggers/__init__.py index bbfbd67..00ce26a 100644 --- a/src/slune/loggers/__init__.py +++ b/src/slune/loggers/__init__.py @@ -1,3 +1,3 @@ from .default import LoggerDefault -__all__ = ['LoggerDefault'] \ No newline at end of file +# __all__ = ['LoggerDefault'] \ No newline at end of file diff --git a/src/slune/savers/__init__.py b/src/slune/savers/__init__.py index 09c1bd6..4863312 100644 --- a/src/slune/savers/__init__.py +++ b/src/slune/savers/__init__.py @@ -1,3 +1,3 @@ from .csv import SaverCsv -__all__ = ['SaverCsv'] \ No newline at end of file +# __all__ = ['SaverCsv'] \ No newline at end of file diff --git a/src/slune/searchers/__init__.py b/src/slune/searchers/__init__.py index 91a9141..e084d00 100644 --- a/src/slune/searchers/__init__.py +++ b/src/slune/searchers/__init__.py @@ -1,3 +1,3 @@ from .grid import SearcherGrid -__all__ = ['SearcherGrid'] \ No newline at end of file +# __all__ = ['SearcherGrid'] \ No newline at end of file diff --git a/src/slune/slune.py b/src/slune/slune.py index ad8be97..c3ef5f1 100644 --- a/src/slune/slune.py +++ b/src/slune/slune.py @@ -1,5 +1,5 @@ from typing import List, Optional, Union -from slune.base import Searcher, Saver +from slune.base import BaseSearcher, BaseSaver import subprocess import sys from slune.savers.csv import SaverCsv @@ -22,7 +22,7 @@ def submit_job(sh_path: str, args: List[str]): except subprocess.CalledProcessError as e: print(f"Error running sbatch: {e}") -def sbatchit(script_path: str, sbatch_path: str, searcher: Searcher, cargs: Optional[List]=[], saver: Optional[Saver]=None): +def sbatchit(script_path: str, sbatch_path: str, searcher: BaseSearcher, cargs: Optional[List]=[], saver: Optional[BaseSaver]=None): """ Submits jobs based on arguments given by searcher. For each job runs the script stored at script_path with selected parameter values given by searcher @@ -91,7 +91,7 @@ def single_garg(arg_name): else: return single_garg(arg_names) -def get_csv_slog(params: Optional[dict]= None, root_dir: Optional[str]='slune_results') -> Saver: +def get_csv_slog(params: Optional[dict]= None, root_dir: Optional[str]='slune_results') -> BaseSaver: """ Returns a SaverCsv object with the given parameters and root directory. Args: