diff --git a/.github/workflows/publish-docs.yaml b/.github/workflows/publish-docs.yaml deleted file mode 100644 index b72ad15b6..000000000 --- a/.github/workflows/publish-docs.yaml +++ /dev/null @@ -1,16 +0,0 @@ -name: docs - -on: - push: - branches: [ master ] - paths: - - 'README.md' - -jobs: - docs: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@master - - uses: tool3/docsify-action@master - with: - github_token: ${{ secrets.DOCS_TOKEN }} diff --git a/README.md b/README.md index 74a0029af..c77db1ec8 100644 --- a/README.md +++ b/README.md @@ -125,19 +125,6 @@ You can lint or check the deployment with the flags —dry-run —debug. Make sure to assign the chart to the right namespace with —namespace yournamespace (when deploying to the default namespace this can be omitted.) -### 5. Setup a user - -You will need to create a user to login into Studio. Click the login button in the lower left corner, and click register. By default, Keycloak is configured not to require email verification, but this can be changed by logging into the Keycloak admin console and updating the STACKn realm login settings. - -To access the admin page of Studio, you will need to create a Django user with admin rights. First find the pod name to the Studio deployment: -```bash -$ kubectl get pods -n yournamespace -``` -and get the pod id that correspond to the studio pod running. Replace `pod-Id` in the command below. -```bash -$ kubectl exec -it pod-Id python manage.py createsuperuser -``` - ### Additional - Upgrading STACKn Similar to how you install a chart you may also upgrade a chart. @@ -175,4 +162,4 @@ STACKn is used in various places, examples include [SciLifeLab Data Center](http THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ## License -> See [LICENSE](LICENCE.md) for details. +> See [LICENSE](LICENSE) for details. diff --git a/cli/scaleout/auth.py b/cli/scaleout/auth.py index 12ee80cb9..a5f1b75d7 100644 --- a/cli/scaleout/auth.py +++ b/cli/scaleout/auth.py @@ -142,21 +142,29 @@ def get_token(client_id='studio-api', realm='STACKn', secure=True): else: print('Failed to authenticate with token, please login again.') print(res.text) - access_token = login() + access_token = login(deployment=stackn_config['active'], keycloak_host=token_config['keycloak_url'], studio_host=token_config['studio_url'], secure=secure) return access_token, token_config -def login(client_id='studio-api', realm='STACKn', deployment=[], keycloak_host=[], studio_host=[], secure=True): +def login(client_id='studio-api', realm='STACKn', deployment=[], keycloak_host=[], studio_host=[], username=[], secure=True): """ Login to Studio services. """ if not deployment: deployment = input('Name: ') - if not keycloak_host: - keycloak_host = input('Keycloak host: ') if not studio_host: studio_host = input('Studio host: ') - username = input('Username: ') + + url = "{}/api/settings".format(studio_host) + r = requests.get(url) + if (r.status_code >= 200 or r.status_code <= 299): + studio_settings = json.loads(r.content)["data"] + keycloak_host = next(item for item in studio_settings if item["name"] == "keycloak_host")["value"] + + if not keycloak_host: + keycloak_host = input('Keycloak host: ') + if not username: + username = input('Username: ') password = getpass() access_token, refresh_token, public_key = keycloak_user_auth(username, password, keycloak_host, secure=secure) # dirname = base64.urlsafe_b64encode(host.encode("utf-8")).decode("utf-8") diff --git a/cli/scaleout/cli/__init__.py b/cli/scaleout/cli/__init__.py index 4bee1754a..ed05fd670 100644 --- a/cli/scaleout/cli/__init__.py +++ b/cli/scaleout/cli/__init__.py @@ -4,7 +4,7 @@ from .create_cmd import create_cmd from .get_cmd import get_cmd from .delete_cmd import delete_cmd -from .stackn_cmd import setup_cmd, status_cmd, predict_cmd +from .stackn_cmd import setup_cmd, status_cmd, predict_cmd, train_cmd, test_cmd from .set_cmd import set_cmd from .update_cmd import update_cmd from .init_cmd import init_cmd \ No newline at end of file diff --git a/cli/scaleout/cli/create_cmd.py b/cli/scaleout/cli/create_cmd.py index 1fc4c1dcb..79bec7f20 100644 --- a/cli/scaleout/cli/create_cmd.py +++ b/cli/scaleout/cli/create_cmd.py @@ -63,11 +63,26 @@ def create_project_cmd(ctx, name, description='', repository=''): @create_cmd.command('lab') @click.option('-f', '--flavor', required=True) @click.option('-e', '--environment', required=True) +@click.option('-v', '--volumes', required=False, default=[]) @click.pass_context -def create_session(ctx, flavor, environment): +def create_session(ctx, flavor, environment, volumes): client = ctx.obj['CLIENT'] - client.create_session(flavor_slug=flavor, environment_slug=environment) + client.create_session(flavor_slug=flavor, environment_slug=environment, volumes=volumes) +@create_cmd.command('volume') +@click.option('-s', '--size', required=True) +@click.option('-n', '--name', required=True) +@click.pass_context +def create_volume(ctx, size, name): + client = ctx.obj['CLIENT'] + client.create_volume(name=name, size=size) + +@create_cmd.command('job') +@click.option('-c', '--config', required=True) +@click.pass_context +def create_job(ctx, config): + client = ctx.obj['CLIENT'] + client.create_job(config) # Create dataset @@ -85,4 +100,4 @@ def create_dataset(ctx, name, directory=[], filenames=[], release_type='minor', filenames, directory, description=description, - bucket=bucket) \ No newline at end of file + bucket=bucket) diff --git a/cli/scaleout/cli/default-project.tar.gz b/cli/scaleout/cli/default-project.tar.gz index e17990879..e661467e0 100644 Binary files a/cli/scaleout/cli/default-project.tar.gz and b/cli/scaleout/cli/default-project.tar.gz differ diff --git a/cli/scaleout/cli/delete_cmd.py b/cli/scaleout/cli/delete_cmd.py index e1c640bff..bcd2c6c81 100644 --- a/cli/scaleout/cli/delete_cmd.py +++ b/cli/scaleout/cli/delete_cmd.py @@ -44,6 +44,14 @@ def delete_dataset_cmd(ctx, name, version=None): client = ctx.obj['CLIENT'] client.delete_dataset(name, version) +@delete_cmd.command('volume') +@click.option('-n', '--name', required=True) +@click.pass_context +def delete_volume_cmd(ctx, name): + """ Delete a volume """ + client = ctx.obj['CLIENT'] + client.delete_volume(name) + # @delete_cmd.command('deployments') # @click.pass_context # def delete_deployment_cmd(ctx): diff --git a/cli/scaleout/cli/get_cmd.py b/cli/scaleout/cli/get_cmd.py index 34a84956a..dcf024f82 100644 --- a/cli/scaleout/cli/get_cmd.py +++ b/cli/scaleout/cli/get_cmd.py @@ -1,8 +1,8 @@ +import json import click from .main import main import requests -from scaleout.studioclient import StudioClient -from .helpers import create_table +from .helpers import create_table, PrettyTable @click.option('--daemon', is_flag=True, @@ -17,6 +17,32 @@ def get_cmd(ctx, daemon): if daemon: print('{} NYI should run as daemon...'.format(__file__)) +@get_cmd.command('settings') +@click.pass_context +def get_settings_cmd(ctx): + """ + List STACKn settings needed to set up the CLI client. + """ + studio_host = input("Studio host: ") + url = "{}/api/settings".format(studio_host) + try: + r = requests.get(url) + studio_settings = json.loads(r.content)["data"] + + names = ['Setting', 'Value'] + keys = ['name', 'value'] + x = PrettyTable() + x.field_names = names + for item in studio_settings: + row = [item[k] for k in keys] + x.add_row(row) + print(x) + except Exception as e: + print("Couldn't get studio settings.") + print("Returned status code: {}".format(r.status_code)) + print("Reason: {}".format(r.reason)) + print("Error: {}".format(e)) + @get_cmd.command('models') @click.pass_context def get_models_cmd(ctx): @@ -44,6 +70,7 @@ def get_deploymentdefinitions_cmd(ctx): @get_cmd.command('projects') @click.pass_context def get_projects_cmd(ctx): + """ List all projects. """ names = ["Name","Created", "Last updated"] keys = ["name", "created_at", "updated_at"] create_table(ctx, "projects", names, keys) @@ -56,6 +83,22 @@ def lab_list_all_cmd(ctx): keys = ["name", "flavor_slug", "environment_slug", "status", "created_at"] create_table(ctx, "labs", names, keys) +@get_cmd.command('volumes') +@click.pass_context +def get_volumes_cmd(ctx): + """ List all volumes """ + names = ["Name","Size", "Created by","Created"] + keys = ['name', 'size', 'created_by', 'created_on'] + create_table(ctx, 'volumes', names, keys) + +@get_cmd.command('jobs') +@click.pass_context +def get_jobs_cmd(ctx): + """ List all jobs """ + names = ["User","command", "Environment","Schedule"] + keys = ['username', 'command', 'environment', 'schedule'] + create_table(ctx, 'jobs', names, keys) + @get_cmd.command('members') @click.pass_context def members_list_cmd(ctx): diff --git a/cli/scaleout/cli/helpers.py b/cli/scaleout/cli/helpers.py index bc87c75b6..a1521770c 100644 --- a/cli/scaleout/cli/helpers.py +++ b/cli/scaleout/cli/helpers.py @@ -1,5 +1,7 @@ import sys from prettytable import PrettyTable +import click +import uuid def prompt(question, default="yes"): @@ -45,4 +47,47 @@ def _print_table(resource, names, keys): def create_table(ctx, resource, names, keys): client = ctx.obj['CLIENT'] objects = client.create_list(resource) - _print_table(objects, names, keys) \ No newline at end of file + _print_table(objects, names, keys) + +def search_for_model(ctx, resource, name): + client = ctx.obj['CLIENT'] + objects = client.create_list(resource) + model_exists = False + for item in objects: + if item['name'] == name: + model_exists = True + return model_exists + +def new_id(run_id): + new_id = input("A log object with ID = {} already exists in 'src/models/tracking' directory. \n".format(run_id) \ + + "Please provide a unique ID for the current run or press enter to use a randomly generated ID: ") + if new_id: + confirmed = False + question = "Do you want to assign this training run with the ID '{}'?".format(new_id) + while not confirmed: + confirmed = prompt(question) + if confirmed: + return new_id + else: + new_id = input("Assign a new unique ID or press enter to assign a random ID: ") + print(new_id) + if not new_id: + break + new_id = str(uuid.uuid1().hex) + return new_id + +class Determinant(click.Option): + def __init__(self, *args, **kwargs): + self.determinant = kwargs.pop('determinant') + assert self.determinant, "'determinant' parameter required" + super(Determinant, self).__init__(*args, **kwargs) + + def handle_parse_result(self, ctx, opts, args): + unallowed_present = self.name in opts + determinant_present = self.determinant in opts + if determinant_present: + if unallowed_present: + raise click.UsageError("Illegal usage: Cannot pass a value for '{}' together with '{}' when running 'stackn train'".format(self.name, self.determinant)) + else: + self.prompt = None + return super(Determinant, self).handle_parse_result(ctx, opts, args) \ No newline at end of file diff --git a/cli/scaleout/cli/stackn_cmd.py b/cli/scaleout/cli/stackn_cmd.py index f454e1dba..bae435e77 100644 --- a/cli/scaleout/cli/stackn_cmd.py +++ b/cli/scaleout/cli/stackn_cmd.py @@ -2,7 +2,13 @@ from .main import main import requests from scaleout.auth import login, get_stackn_config, get_remote_config, get_token -from .helpers import create_table +from .helpers import create_table, search_for_model, new_id, Determinant +import os +import random +import string +import json +import uuid +from scaleout.details import get_run_details # @click.option('--daemon', # is_flag=True, @@ -57,3 +63,45 @@ def predict_cmd(ctx, model, version, inp): # res = requests.post(url, # headers={"Authorization": "Token "+token}, # json = inp) + + +# ------------------- Question --------------------- +# Is it a good idea to make it possible to pass --log-off as an argument if the user does not want to log the run to Studio? +# In that case, the model name and code version is not possible to pass to the stackn train command and train.py will run without logging. +# Not sure if this is a good idea +# -------------------------------------------------- +@main.command('train') +@click.option('--log-off', flag_value='log-off', default=False) +@click.option('-m', '--model', prompt=True, cls=Determinant, determinant='log_off') +@click.option('-i', '--run-id', required=False, default=str(uuid.uuid1().hex)) +@click.option('-f', '--training-file', required=False, default="src/models/train.py") +@click.option('-v', '--version', prompt=True, cls=Determinant, determinant='log_off') +@click.pass_context +def train_cmd(ctx, log_off, model, run_id, training_file, version): + """ Train a model and log metadata """ + + if os.path.isfile('src/models/tracking/metadata/{}.pkl'.format(run_id)): # Only checks locally. Should we check if there exists a log on Studio with the same ID as well? + run_id = new_id(run_id) + print("Preparing to start training session with '{}' as unique ID.".format(run_id)) + if os.path.isfile(training_file): + if log_off: + import subprocess + subprocess.run(['python', training_file, run_id]) + else: + model_exists = search_for_model(ctx, "models", model) + if model_exists: + client = ctx.obj['CLIENT'] + client.train(model, run_id, training_file, version) + else: + print("The model '{}' does not exist in the active project and cannot be trained.".format(model)) + else: + current_dir = os.getcwd() + print("Could not start a training session. Check that you have initialized a model "\ + + "in '{}' and that the file '{}' exists.".format(current_dir, training_file)) + + + +@main.command('test') +@click.pass_context +def test_cmd(ctx): + get_run_details('12') \ No newline at end of file diff --git a/cli/scaleout/details.py b/cli/scaleout/details.py new file mode 100644 index 000000000..1c38451fb --- /dev/null +++ b/cli/scaleout/details.py @@ -0,0 +1,120 @@ +import platform +import psutil +import logging +import json +import os +import sys +from scaleout.cli.helpers import prompt + +def get_system_details(info): + try: + info['Platform'] = platform.system() + #info['Platform version'] = platform.version() + info['Architecture'] = platform.machine() + info['Processor'] = platform.processor() + info['RAM'] = str(round(psutil.virtual_memory().total / (1024.0 **3))) + " GB" + info['Python version'] = platform.python_version() + json_prep = json.dumps(info) + return json.loads(json_prep) + except Exception as e: + print("Failed to retrieve details about your system.") + logging.exception(e) + + +def get_cpu_details(info): + try: + info['Physical cores'] = psutil.cpu_count(logical=False) + info['Total cores'] = psutil.cpu_count(logical=True) + for i, percentage in enumerate(psutil.cpu_percent(percpu=True, interval=1)): + info[f'Core {i}'] = f'{percentage}%' + info['Total CPU usage'] = f'{psutil.cpu_percent()}%' + json_prep = json.dumps(info) + return json.loads(json_prep) + except Exception as e: + print("Failed to retrieve details about the CPU of your machine.") + logging.exception(e) + + +# Function that pauses the run until the user either commits changed files in the repo, or tells the program to contnue training with uncommitted files +# ---------------- Question ------------------- +# Should all files be committed before training or is it enough if the user commits some files in the repo? +# --------------------------------------------- +def commit_helper(repo, exit_message): # This function needs to be tested and modified. Might note even be necessary to have this function + print('WARNING: Uncommitted files exist in the current Git repository. Training the model with uncommitted files '\ + + 'should be avoided for major experiments since this will negatively impact code versioning. To increase future ' \ + + 'reproducibility of your experiment, please consider committing all files before training the model.\n') + valid = ["1", "2"] + while True: + answer = input("What do you want to do? \n" \ + + " 1) Continue training the model without committing my files (Not recommended). \n"\ + + " 2) Put the training session on hold to commit my files (Highly recommended). \n"\ + + "Choose an option [1 or 2]: ") + if answer in valid: + break + else: + print("\nPlease respond with '1' or '2'. \n") + if answer == "1": + print("\nThe training session will continue with uncommitted files in the repo. This might affect the reproducibility of your experiment.") + question = "Are you sure you want to continue?" + confirmed = prompt(question) + if confirmed: + return False + else: + sys.exit(exit_message.format("commit your files")) + else: + # The user wants to commit files before continuing model training. + # We could let the user add and commit files here with a subprocess operation? E.g. subprocess.run("git add .", check=True, shell=True) + answer = input("\nA good choice! After you commit your files, press enter to continue training the model "\ + + "(or abort the current training session by pressing arbitrary key): ") + if answer: + sys.exit(exit_message.format("commit your files")) + else: # Would be good to check here whether the files have been committed successfully. Maybe the user does not want to commit all files? + print("Perfect, your files have been committed and the training session will continue.") + #while True: + # if not repo.is_dirty(): + + # break + # else: + return True + + +def get_git_details(code_version): + exit_message = "Aborting this training session. Please {} before running 'stackn train' again." + try: + import git + except ImportError: + print('Failed to import Git') + return None + try: + # current_repo = git.Repo(os.getcwd()) # Which one of these should we use? Needs testing + current_repo = git.Repo(search_parent_directories=True) + is_committed = True + if current_repo.is_dirty(): # This should be true if uncommitted files exist + is_committed = commit_helper(current_repo, exit_message) + latest_commit = current_repo.head.object.hexsha + print("Code version {} will be tied to the Git commit hash '{}'.".format(code_version, latest_commit)) + if not is_committed: + print("Since uncommitted files exist in the current repo, it will be noted in the training log that the code " \ + + "used to train the model in this run does not correspond to the recorded commit hash. " \ + + "This is done mainly for the purpose of appropriate code versioning and future reproducibility.") + except (git.InvalidGitRepositoryError, ValueError): + latest_commit = "No recent Git commit to log" + if git.InvalidGitRepositoryError: + print('WARNING: Failed to extract Git repo. Check to see if you are currently working in a Git repository.') + question = "Do you want to continue training the model anyways (not recommended)?" + confirmed = prompt(question) + if confirmed: + current_repo = "No Git repository to log" + else: + sys.exit(exit_message.format('enter an active Git repo')) + elif ValueError and not committed_files: + print("WARNING: Failed to extract latest Git commit hash. No commits seem to have been made yet and you have chosen not to commit them. " \ + + "The training session will continue.") + return (current_repo, latest_commit) + + +def get_run_details(code_version): + system_details = get_system_details({}) + cpu_details = get_cpu_details({}) + git_details = get_git_details(code_version) + return system_details, cpu_details, git_details diff --git a/cli/scaleout/studioclient.py b/cli/scaleout/studioclient.py index 8619a9731..8e2d1647d 100644 --- a/cli/scaleout/studioclient.py +++ b/cli/scaleout/studioclient.py @@ -6,6 +6,8 @@ import json import uuid from urllib.parse import urljoin +from datetime import datetime +from .details import get_run_details def _check_status(r,error_msg="Failed"): if (r.status_code < 200 or r.status_code > 299): @@ -72,9 +74,13 @@ def __init__(self, config=None): def get_endpoints(self): self.endpoints = dict() self.endpoints['models'] = self.api_url+'/projects/{}/models' + self.endpoints['modellogs'] = self.api_url+'/projects/{}/modellogs' + self.endpoints['metadata'] = self.api_url+'/projects/{}/metadata' self.endpoints['labs'] = self.api_url + '/projects/{}/labs' self.endpoints['members'] = self.api_url+'/projects/{}/members' self.endpoints['dataset'] = self.api_url+'/projects/{}/dataset' + self.endpoints['volumes'] = self.api_url+'/projects/{}/volumes/' + self.endpoints['jobs'] = self.api_url+'/projects/{}/jobs/' self.reports_api = self.api_url+'/reports' self.endpoints['projects'] = self.api_url+'/projects/' self.generators_api = self.api_url+'/generators' #endpoints['generators'] @@ -258,6 +264,76 @@ def get_members(self): return [] return members + ### Jobs API ### + def get_jobs(self, data={}): + url = self.endpoints['jobs'].format(self.project['id']) + try: + r = requests.get(url, headers=self.auth_headers, params=data, verify=self.secure_mode) + jobs = json.loads(r.content) + return jobs + except Exception as err: + print('Failed to list jobs.') + print('Status code: {}'.format(r.status_code)) + print('Message: {}'.format(r.text)) + print('Error: {}'.format(err)) + return [] + + def create_job(self, config): + settings_file = open(config, 'r') + job_config = json.loads(settings_file.read()) + url = self.endpoints['jobs'].format(self.project['id']) + try: + r = requests.post(url, headers=self.auth_headers, json=job_config, verify=self.secure_mode) + except Exception as err: + print('Failed to list jobs.') + print('Status code: {}'.format(r.status_code)) + print('Message: {}'.format(r.text)) + print('Error: {}'.format(err)) + return [] + + ### Volumes API ### + + def get_volumes(self, data={}): + url = self.endpoints['volumes'].format(self.project['id']) + try: + r = requests.get(url, headers=self.auth_headers, params=data, verify=self.secure_mode) + volumes = json.loads(r.content) + return volumes + except Exception as err: + print('Failed to list volumes.') + print('Status code: {}'.format(r.status_code)) + print('Message: {}'.format(r.text)) + print('Error: {}'.format(err)) + return [] + + + def create_volume(self, size, name): + url = self.endpoints['volumes'].format(self.project['id']) + data = {'name': name, 'size': size} + r = requests.post(url, headers=self.auth_headers, json=data, verify=self.secure_mode) + if r: + print('Created volume: {}'.format(name)) + else: + print('Failed to create volume.') + print('Status code: {}'.format(r.status_code)) + print(r.text) + + def delete_volume(self, name): + try: + volume = self.get_volumes({"name": name, "project_slug": self.project['slug']})[0] + except: + print('Volume {} not found.'.format(name)) + return + url = self.endpoints['volumes'].format(self.project['id'])+str(volume['id']) + r = requests.delete(url, headers=self.auth_headers, verify=self.secure_mode) + if r: + print('Deleted volume: {}'.format(volume['name'])) + else: + print('Failed to delete volume.') + print('Status code: {}'.format(r.status_code)) + print(r.text) + + ### Datasets API ### def delete_dataset(self, name, version): @@ -294,7 +370,8 @@ def create_dataset(self, name, release_type, filenames, directory=[], descriptio "release_type": release_type, "filenames": filenames, "description": description, - "bucket": bucket + "bucket": bucket, + "url": url } print(payload) r = requests.post(url, json=payload, headers=self.auth_headers, verify=self.secure_mode) @@ -328,7 +405,7 @@ def list_datasets(self): except NoSuchBucket as e: print("The datasets repository has not been initialized yet.", e) return objs - + ### Models API ### @@ -454,6 +531,18 @@ def create_list(self, resource): return dataset else: return [] + if resource == 'volumes': + if self.found_project: + volumes = self.get_volumes() + return volumes + else: + return [] + if resource == 'jobs': + if self.found_project: + jobs = self.get_jobs() + return jobs + else: + return [] url = self.endpoints[resource] @@ -584,9 +673,9 @@ def remove_members(self, users): print('Reason: {}'.format(r.reason)) break - def create_session(self, flavor_slug, environment_slug): + def create_session(self, flavor_slug, environment_slug, volumes=[]): url = self.endpoints['labs'].format(self.project['id']) + '/' - data = {'flavor': flavor_slug, 'environment': environment_slug} + data = {'flavor': flavor_slug, 'environment': environment_slug, 'extraVols': volumes} r = requests.post(url, headers=self.auth_headers, json=data, verify=self.secure_mode) @@ -599,6 +688,109 @@ def create_session(self, flavor_slug, environment_slug): print('Status code: {}'.format(r.status_code)) print('Reason: {} - {}'.format(r.reason, r.text)) + """ + def log_to_db(self, data_to_log): + try: + from pymongo import MongoClient + except ImportError: + print('Failed to import MongoClient') + return None + myclient = MongoClient("localhost:27017", username = 'root', password = 'tvJdjZm6PG') + db = myclient["test"] + Collection = db["testCollection"] + if isinstance(data_to_log, list): + Collection.insert_many(data_to_log) + else: + Collection.insert_one(data_to_log) + """ + + + def retrieve_metadata(self, model, run_id): + """ Retrieve metadata logged during model training """ + + md_file = 'src/models/tracking/metadata/{}.pkl'.format(run_id) + if os.path.isfile(md_file): + print('Retrieving metadata for current training session for storage in Studio...') + try: + import pickle + with open(md_file, 'rb') as metadata_file: + metadata_json = pickle.load(metadata_file) + print("Metadata was retrieved successfully from local file.") + repo = self.get_repository() + repo.bucket = 'metadata' + if not 'model' in metadata_json: + metadata_json['model'] = '' + if not 'params' in metadata_json: + metadata_json['params'] = {} + if not 'metrics' in metadata_json: + metadata_json['metrics'] = {} + + metadata = {"run_id": run_id, + "trained_model": model, + "model_details": metadata_json["model"], + "parameters": metadata_json["params"], + "metrics": metadata_json["metrics"] + } + url = self.endpoints['metadata'].format(self.project['id'])+'/' + r = requests.post(url, json=metadata, headers=self.auth_headers, verify=self.secure_mode) + if not _check_status(r, error_msg="Failed to create metadata log in Studio for run with ID '{}'".format(run_id)): + return + print("Created metadata log in Studio for run with ID '{}'".format(run_id)) + except Exception as e: # Should catch more specific error here + print("Error") + print(e) + return + else: + print("No metadata available for current training session.") + return + + + def run_training_file(self, model, training_file, run_id): + """ Run training file and return date and time for training, and execution time """ + + start_time = datetime.now() + training = subprocess.run(['python3', training_file, run_id]) + end_time = datetime.now() + execution_time = str(end_time - start_time) + start_time = start_time.strftime("%Y/%m/%d, %H:%M:%S") + if training.returncode != 0: + training_status = 'FA' + print("Training of the model was not executed properly.") + else: + training_status = 'DO' + self.retrieve_metadata(model, run_id) + return (start_time, execution_time, training_status) + + + def train(self, model, run_id, training_file, code_version): + """ Train a model and log corresponding data in Studio. """ + + system_details, cpu_details, git_details = get_run_details(code_version) + print('Running training script...') + training_output = self.run_training_file(model, training_file, run_id) # Change output of run_training_file + repo = self.get_repository() + repo.bucket = 'training' + + training_data = {"run_id": run_id, + "trained_model": model, + "training_started_at": training_output[0], + "execution_time": training_output[1], + "code_version": code_version, + "current_git_repo": str(git_details[0]), + "latest_git_commit": git_details[1], + "system_details": system_details, + "cpu_details": cpu_details, + "training_status": training_output[2]} + url = self.endpoints['modellogs'].format(self.project['id'])+'/' + print(git_details) + r = requests.post(url, json=training_data, headers=self.auth_headers, verify=self.secure_mode) + if not _check_status(r, error_msg="Failed to create training session log in Studio for {}".format(model)): + return False + else: + print("Created training log for {}".format(model)) + return True + + def predict(self, model, inp, version=None): if version: params = {'name': model, 'version': version} @@ -628,5 +820,4 @@ def predict(self, model, inp, version=None): data = client._get_repository_conf() print("Minio settings: ", data) - print(client.token) - + print(client.token) \ No newline at end of file diff --git a/cli/scaleout/trackingclient.py b/cli/scaleout/trackingclient.py new file mode 100644 index 000000000..788b413e7 --- /dev/null +++ b/cli/scaleout/trackingclient.py @@ -0,0 +1,47 @@ +import sys +import os +import uuid +import pickle + +class TrackingClient(): + def __init__(self, run_id=None): + self.id = run_id + self.params = {} + self.metrics = {} + self.model = {} + + def log_params(self, params): + self.params = { + **self.params, + **params + } + + def log_metrics(self, metrics): + self.metrics = { + **self.metrics, + **metrics + } + + def log_model(self, model_type, model): + self.model = { + **self.model, + **{'type': model_type, 'fitted_model': model} + } + + def save_tracking(self): + try: + self.id = sys.argv[1] + except IndexError: + self.id = input("WARNING: To save the tracked metadata, please assign a unique ID for this training run: ") + md_file = 'src/models/tracking/metadata/{}.pkl'.format(self.id) + metadata = {} + for attr, value in self.__dict__.items(): + if attr != 'id' and value: + metadata[attr] = value + print("Tracking completed. The following metadata was tracked during training: {} \n".format(list(metadata.keys())) \ + + "Metadata will be saved in 'src/models/tracking/metadata' as '{}.pkl'.".format(self.id)) + try: + with open(md_file, 'wb') as metadata_file: + pickle.dump(metadata, metadata_file, pickle.HIGHEST_PROTOCOL) + except Exception as e: # Should catch more specific error here + print("Error") diff --git a/cli/setup.py b/cli/setup.py index 2053fbee5..cbf8b385f 100644 --- a/cli/setup.py +++ b/cli/setup.py @@ -25,7 +25,8 @@ "six>=1.14.0", "python-slugify", "prettytable", - "pyjwt" + "pyjwt", + "psutil" ], license="Copyright Scaleout Systems AB. See license for details", zip_safe=False, diff --git a/components/chart-controller/controller/controller.py b/components/chart-controller/controller/controller.py index 231c4bf5a..425a5c968 100644 --- a/components/chart-controller/controller/controller.py +++ b/components/chart-controller/controller/controller.py @@ -28,71 +28,42 @@ def __init__(self, cwd): pass def deploy(self, options, action='install'): - # extras = '' - """ - try: - minio = ' --set service.minio=' + str(options['minio_port']) - extras = extras + minio - except KeyError as e: - print("could not get minioport!") - try: - controller = ' --set service.controller=' + str(options['controller_port']) - extras = extras + controller - except KeyError as e: - print("could not get controllerport") - pass - try: - user = ' --set alliance.user=' + str(options['user']) - extras = extras + user - except KeyError as e: - print("could not get user") - pass - try: - project = ' --set alliance.project=' + str(options['project']) - extras = extras + project - except KeyError as e: - print("could not get project") - pass - try: - apiUrl = ' --set alliance.apiUrl=' + str(options['api_url']) - extras = extras + apiUrl - except KeyError as e: - print("could not get apiUrl") - pass - """ - - # for key in options: - # print(key) - # print(options[key]) - # extras = extras + ' --set {}={}'.format(key, options[key]) - volume_root = "/" if "TELEPRESENCE_ROOT" in os.environ: volume_root = os.environ["TELEPRESENCE_ROOT"] kubeconfig = os.path.join(volume_root, 'root/.kube/config') - if 'DEBUG' in os.environ and os.environ['DEBUG'] == 'true': + if not 'chart' in options: + print('Chart option not specified.') + return json.dumps({'status':'failed', 'reason':'Option chart not set.'}) chart = 'charts/scaleout/'+options['chart'] else: refresh_charts(self.branch) fname = self.branch.replace('/', '-') chart = 'charts-{}/scaleout/{}'.format(fname, options['chart']) + if not 'release' in options: + print('Release option not specified.') + return json.dumps({'status':'failed', 'reason':'Option release not set.'}) + args = ['helm', action, '--kubeconfig', kubeconfig, options['release'], chart] - # tmp_file_name = uuid.uuid1().hex+'.yaml' - # tmp_file = open(tmp_file_name, 'w') - # yaml.dump(options, tmp_file, allow_unicode=True) - # tmp_file.close() - # args.append('-f') - # args.append(tmp_file_name) + for key in options: - args.append('--set') - # args.append('{}={}'.format(key, options[key])) - args.append(key+"="+options[key].replace(',', '\,')) + try: + args.append('--set') + # If list, don't escape , + if options[key][0] == '{' and options[key][-1] == '}': + args.append(key+"="+options[key]) + # And if not list, we should escape , + else: + args.append(key+"="+options[key].replace(',', '\,')) + except: + print('Failed to process input arguments.') + return json.dumps({"status": "failed", 'reason':'Failed to process input arguments.'}) print(args) status = subprocess.run(args, cwd=self.cwd) - # os.remove(tmp_file_name) + print(status) return json.dumps({'helm': {'command': args, 'cwd': str(self.cwd), 'status': str(status)}}) def delete(self, options): diff --git a/components/studio/api/APIpermissions.py b/components/studio/api/APIpermissions.py index 8964978cd..7cd4fb495 100644 --- a/components/studio/api/APIpermissions.py +++ b/components/studio/api/APIpermissions.py @@ -1,7 +1,7 @@ from rest_framework.permissions import BasePermission from django.http import QueryDict -from .serializers import Model, MLModelSerializer, Report, ReportSerializer, \ - ReportGenerator, ReportGeneratorSerializer, Project, ProjectSerializer, \ +from .serializers import Model, MLModelSerializer, ModelLog, ModelLogSerializer, Metadata, MetadataSerializer, \ + Report, ReportSerializer, ReportGenerator, ReportGeneratorSerializer, Project, ProjectSerializer, \ DeploymentInstance, DeploymentInstanceSerializer, DeploymentDefinition, \ DeploymentDefinitionSerializer import modules.keycloak_lib as keylib diff --git a/components/studio/api/public_views.py b/components/studio/api/public_views.py new file mode 100644 index 000000000..500550c77 --- /dev/null +++ b/components/studio/api/public_views.py @@ -0,0 +1,24 @@ +from django.conf import settings +from django.http import JsonResponse + + +def get_studio_settings(request): + """ + This view should return a list of settings + needed to set up the CLI client. + """ + studio_settings = [] + + studio_url = { + "name": "studio_host", + "value": settings.STUDIO_HOST + } + kc_url = { + "name": "keycloak_host", + "value": settings.KC_URL + } + + studio_settings.append(studio_url) + studio_settings.append(kc_url) + + return JsonResponse({'data': studio_settings}) diff --git a/components/studio/api/serializers.py b/components/studio/api/serializers.py index 71b75c216..793f52634 100644 --- a/components/studio/api/serializers.py +++ b/components/studio/api/serializers.py @@ -1,10 +1,11 @@ from rest_framework.serializers import ModelSerializer -from models.models import Model +from models.models import Model, ModelLog, Metadata from reports.models import Report, ReportGenerator -from projects.models import Project +from projects.models import Project, Volume from deployments.models import DeploymentInstance, DeploymentDefinition from datasets.models import Dataset, FileModel +from experiments.models import Experiment from labs.models import Session from django.contrib.auth.models import User class MLModelSerializer(ModelSerializer): @@ -13,6 +14,22 @@ class Meta: fields = ( 'id', 'uid', 'name', 'description', 'resource', 'url', 'uploaded_at', 'project', 'status', 'version') + +class ModelLogSerializer(ModelSerializer): + class Meta: + model = ModelLog + fields = ( + 'id', 'run_id', 'trained_model', 'project', 'training_started_at', 'execution_time', 'code_version', + 'current_git_repo', 'latest_git_commit', 'system_details', 'cpu_details', 'training_status') + + +class MetadataSerializer(ModelSerializer): + class Meta: + model = Metadata + fields = ( + 'id', 'run_id', 'trained_model', 'project', 'model_details', 'parameters', 'metrics') + + class DeploymentDefinitionSerializer(ModelSerializer): class Meta: model = DeploymentDefinition @@ -64,9 +81,20 @@ class DatasetSerializer(ModelSerializer): class Meta: model = Dataset fields = ['id', 'name', 'version', 'release_type', 'description', - 'bucket', 'project_slug', 'files', 'created_by', 'created_on'] + 'bucket', 'project_slug', 'files', 'created_by', 'created_on', 'datasheet'] + class FileModelSerializer(ModelSerializer): class Meta: model = FileModel fields = ['id', 'name', 'bucket'] + +class VolumeSerializer(ModelSerializer): + class Meta: + model = Volume + fields = ['id', 'name', 'slug', 'size', 'settings', 'created_by', 'created_on', 'updated_on'] + +class ExperimentSerializer(ModelSerializer): + class Meta: + model = Experiment + fields = ['id', 'username', 'command', 'environment', 'project', 'schedule', 'created_at', 'uploaded_at'] \ No newline at end of file diff --git a/components/studio/api/urls.py b/components/studio/api/urls.py index 25f582876..1142597b3 100644 --- a/components/studio/api/urls.py +++ b/components/studio/api/urls.py @@ -1,8 +1,9 @@ from django.conf.urls import include from django.urls import path import rest_framework.routers as drfrouters -from .views import ModelList, ReportList, ReportGeneratorList, ProjectList, DeploymentInstanceList, \ - DeploymentDefinitionList, LabsList, MembersList, DatasetList +from .views import ModelList, ModelLogList, MetadataList, ReportList, ReportGeneratorList, ProjectList, DeploymentInstanceList, \ + DeploymentDefinitionList, LabsList, MembersList, DatasetList, VolumeList, JobsList +from .public_views import get_studio_settings from rest_framework.authtoken.views import obtain_auth_token from rest_framework_nested import routers @@ -21,6 +22,10 @@ models_router.register(r'labs', LabsList, base_name='lab') models_router.register(r'members', MembersList, base_name='members') models_router.register(r'dataset', DatasetList, base_name='dataset') +models_router.register(r'volumes', VolumeList, base_name='volumes') +models_router.register(r'modellogs', ModelLogList, base_name='modellog') +models_router.register(r'metadata', MetadataList, base_name='metadata') +models_router.register(r'jobs', JobsList, base_name='jobs') router.register(r'deploymentInstances', DeploymentInstanceList, base_name='deploymentInstance') router.register(r'deploymentDefinitions', DeploymentDefinitionList, base_name='deploymentDefinition') @@ -30,4 +35,5 @@ path('', include(router.urls)), path('', include(models_router.urls)), path('api-token-auth', obtain_auth_token, name='api_token_auth'), + path('settings', get_studio_settings) ] diff --git a/components/studio/api/views.py b/components/studio/api/views.py index 7417fcb99..401e7ece2 100644 --- a/components/studio/api/views.py +++ b/components/studio/api/views.py @@ -14,12 +14,14 @@ from django.contrib.auth.models import User from django.conf import settings import modules.keycloak_lib as kc +from projects.models import Environment -from .serializers import Model, MLModelSerializer, Report, ReportSerializer, \ - ReportGenerator, ReportGeneratorSerializer, Project, ProjectSerializer, \ +from .serializers import Model, MLModelSerializer, ModelLog, ModelLogSerializer, Metadata, MetadataSerializer, \ + Report, ReportSerializer, ReportGenerator, ReportGeneratorSerializer, Project, ProjectSerializer, \ DeploymentInstance, DeploymentInstanceSerializer, DeploymentDefinition, \ DeploymentDefinitionSerializer, Session, LabSessionSerializer, UserSerializer, \ - DatasetSerializer, FileModelSerializer, Dataset, FileModel + DatasetSerializer, FileModelSerializer, Dataset, FileModel, Volume, VolumeSerializer, \ + ExperimentSerializer, Experiment class ModelList(GenericViewSet, CreateModelMixin, RetrieveModelMixin, UpdateModelMixin, ListModelMixin): permission_classes = (IsAuthenticated, ProjectPermission,) @@ -59,6 +61,66 @@ def create(self, request, *args, **kwargs): return HttpResponse('ok', 200) +class ModelLogList(GenericViewSet, CreateModelMixin, RetrieveModelMixin, UpdateModelMixin, ListModelMixin): + permission_classes = (IsAuthenticated, ProjectPermission,) + serializer_class = ModelLogSerializer + filter_backends = [DjangoFilterBackend] + #filterset_fields = ['id','name', 'version'] + + # Not sure if this kind of function is needed for ModelLog? + def get_queryset(self): + + return ModelLog.objects.filter(project__pk=self.kwargs['project_pk']) + + def create(self, request, *args, **kwargs): + project = Project.objects.get(id=self.kwargs['project_pk']) + + try: + run_id = request.data['run_id'] + trained_model = request.data['trained_model'] + training_started_at = request.data['training_started_at'] + execution_time = request.data['execution_time'] + code_version = request.data['code_version'] + current_git_repo = request.data['current_git_repo'] + latest_git_commit = request.data['latest_git_commit'] + system_details = request.data['system_details'] + cpu_details = request.data['cpu_details'] + training_status = request.data['training_status'] + except: + return HttpResponse('Failed to create training session log.', 400) + + new_log = ModelLog(run_id=run_id, trained_model=trained_model, project=project.name, training_started_at=training_started_at, execution_time=execution_time, + code_version=code_version, current_git_repo=current_git_repo, latest_git_commit=latest_git_commit, + system_details=system_details, cpu_details=cpu_details, training_status=training_status, ) + new_log.save() + return HttpResponse('ok', 200) + + +class MetadataList(GenericViewSet, CreateModelMixin, RetrieveModelMixin, UpdateModelMixin, ListModelMixin): + permission_classes = (IsAuthenticated, ProjectPermission,) + serializer_class = MetadataSerializer + filter_backends = [DjangoFilterBackend] + #filterset_fields = ['id','name', 'version'] + + def create(self, request, *args, **kwargs): + project = Project.objects.get(id=self.kwargs['project_pk']) + + try: + run_id = request.data['run_id'] + trained_model = request.data['trained_model'] + model_details = request.data['model_details'] + parameters = request.data['parameters'] + metrics = request.data['metrics'] + except: + return HttpResponse('Failed to create metadata log.', 400) + + new_md = Metadata(run_id=run_id, trained_model=trained_model, project=project.name, + model_details=model_details, parameters=parameters, metrics=metrics, ) + new_md.save() + return HttpResponse('ok', 200) + + + class LabsList(GenericViewSet, CreateModelMixin, RetrieveModelMixin, UpdateModelMixin, ListModelMixin): permission_classes = (IsAuthenticated, ProjectPermission,) serializer_class = LabSessionSerializer @@ -87,6 +149,9 @@ def create(self, request, *args, **kwargs): lab_session = Session(id=uid, name=name, flavor_slug=flavor_slug, environment_slug=environment_slug, project=project, lab_session_owner=request.user) + lab_session.extraVols = [] + if 'extraVols' in request.data: + lab_session.extraVols = request.data['extraVols'] lab_session.save() return HttpResponse('Ok.', 200) @@ -174,7 +239,7 @@ def build_instance(self, request): except: return HttpResponse('Deployment environment {} not found.'.format(environment), status=404) - instance = DeploymentInstance(model=mod, deployment=dep, created_by=request.user) + instance = DeploymentInstance(model=mod, deployment=dep, created_by=request.user.username) instance.params = request.data['deploy_config'] # TODO: Verify that the user is allowed to set the parameters in deploy_config. # This whole endpoint needs to be refactored: @@ -313,6 +378,69 @@ def destroy(self, request, *args, **kwargs): return HttpResponse('Cannot remove owner of project.', status=400) return HttpResponse('Failed to remove user.', status=400) +class JobsList(generics.ListAPIView, GenericViewSet, CreateModelMixin, RetrieveModelMixin, UpdateModelMixin, + ListModelMixin): + permission_classes = (IsAuthenticated, ProjectPermission, ) + serializer_class = ExperimentSerializer + filter_backends = [DjangoFilterBackend] + filterset_fields = ['id', 'username', 'project'] + def get_queryset(self): + jobs = Experiment.objects.filter(project__pk=self.kwargs['project_pk']) + return jobs + + def create(self, request, *args, **kwargs): + try: + project = Project.objects.get(id=self.kwargs['project_pk']) + environment = Environment.objects.get(name=request.data['environment']) + job = Experiment(username=request.user.username, + command=request.data['command'], + environment=environment, + project=project, + schedule=request.data['schedule']) + job.options = request.data + job.save() + except Exception as err: + print(err) + return HttpResponse('Failed to create job.', 400) + return HttpResponse('ok', 200) + +class VolumeList(generics.ListAPIView, GenericViewSet, CreateModelMixin, RetrieveModelMixin, UpdateModelMixin, + ListModelMixin): + permission_classes = (IsAuthenticated, ProjectPermission, ) + serializer_class = VolumeSerializer + filter_backends = [DjangoFilterBackend] + filterset_fields = ['name', 'slug', 'created_by'] + def get_queryset(self): + project = Project.objects.get(id=self.kwargs['project_pk']) + volumes = Volume.objects.filter(project_slug=project.slug) + return volumes + + def create(self, request, *args, **kwargs): + try: + project = Project.objects.get(id=self.kwargs['project_pk']) + name = request.data['name'] + size = request.data['size'] + proj_slug = project.slug + created_by = request.user.username + volume = Volume(name=name, size=size, created_by=created_by, project_slug=proj_slug) + volume.save() + except Exception as err: + print(err) + return HttpResponse('Failed to create volume.', 400) + return HttpResponse('ok', 200) + + def destroy(self, request, *args, **kwargs): + project = Project.objects.get(id=self.kwargs['project_pk']) + volume = Volume.objects.get(pk=self.kwargs['pk'], project_slug=project.slug) + try: + volume.helmchart.delete() + print('OK') + return HttpResponse('ok', 200) + except Exception as err: + print('Failed') + print(err) + return HttpResponse('Failed to delete volume', 400) + class DatasetList(generics.ListAPIView, GenericViewSet, CreateModelMixin, RetrieveModelMixin, UpdateModelMixin, ListModelMixin): permission_classes = (IsAuthenticated, ProjectPermission, ) @@ -402,4 +530,3 @@ def create_project(self, request): if success: project.save() return HttpResponse('Ok', status=200) - diff --git a/components/studio/datasets/datasheet_questions.py b/components/studio/datasets/datasheet_questions.py new file mode 100644 index 000000000..2c68fd84b --- /dev/null +++ b/components/studio/datasets/datasheet_questions.py @@ -0,0 +1,4 @@ +q = [ + "What do the instances that comprise the dataset represent (e.g.,documents, photos, people, countries)? Are there multiple types of instances (e.g., movies, users, and ratings; people and interactions between them; nodes and edges)? Please provide a description.", + "Who created the dataset (e.g., which team, research group) and onbehalf of which entity (e.g., company, institution, organization)?" +] \ No newline at end of file diff --git a/components/studio/datasets/forms.py b/components/studio/datasets/forms.py new file mode 100644 index 000000000..978efe602 --- /dev/null +++ b/components/studio/datasets/forms.py @@ -0,0 +1,6 @@ +from django import forms +from .datasheet_questions import q + +class DatasheetForm(forms.Form): + q1 = forms.CharField(label=q[0], max_length=100, widget=forms.Textarea({})) + q2 = forms.CharField(label=q[1], max_length=100, widget=forms.Textarea({})) \ No newline at end of file diff --git a/components/studio/datasets/migrations/0002_auto_20201029_1914.py b/components/studio/datasets/migrations/0002_auto_20201029_1914.py new file mode 100644 index 000000000..2f797f217 --- /dev/null +++ b/components/studio/datasets/migrations/0002_auto_20201029_1914.py @@ -0,0 +1,24 @@ +# Generated by Django 2.2.13 on 2020-10-29 19:14 + +from django.db import migrations +import django.db.models.manager + + +class Migration(migrations.Migration): + + dependencies = [ + ('datasets', '0001_initial'), + ] + + operations = [ + migrations.AlterModelManagers( + name='dataset', + managers=[ + ('objects_version', django.db.models.manager.Manager()), + ], + ), + migrations.AlterUniqueTogether( + name='dataset', + unique_together={('name', 'version', 'project_slug')}, + ), + ] diff --git a/components/studio/datasets/migrations/0003_dataset_datsheet.py b/components/studio/datasets/migrations/0003_dataset_datsheet.py new file mode 100644 index 000000000..ac3255258 --- /dev/null +++ b/components/studio/datasets/migrations/0003_dataset_datsheet.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.13 on 2020-11-25 15:42 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('datasets', '0002_auto_20201029_1914'), + ] + + operations = [ + migrations.AddField( + model_name='dataset', + name='datsheet', + field=models.FileField(default=None, upload_to='datasheets/'), + ), + ] diff --git a/components/studio/datasets/migrations/0004_auto_20201125_1543.py b/components/studio/datasets/migrations/0004_auto_20201125_1543.py new file mode 100644 index 000000000..9944cc5b6 --- /dev/null +++ b/components/studio/datasets/migrations/0004_auto_20201125_1543.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.13 on 2020-11-25 15:43 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('datasets', '0003_dataset_datsheet'), + ] + + operations = [ + migrations.RenameField( + model_name='dataset', + old_name='datsheet', + new_name='datasheet', + ), + ] diff --git a/components/studio/datasets/models.py b/components/studio/datasets/models.py index f76040999..0f41e27f6 100644 --- a/components/studio/datasets/models.py +++ b/components/studio/datasets/models.py @@ -46,6 +46,7 @@ class Dataset(models.Model): files = models.ManyToManyField(FileModel, blank=True) created_by = models.CharField(max_length=255) # Username created_on = models.DateTimeField(auto_now_add=True) + datasheet = models.FileField(upload_to='datasheets/', default=None) class Meta: unique_together = ('name', 'version', 'project_slug') diff --git a/components/studio/datasets/templates/dataset_datasheet.html b/components/studio/datasets/templates/dataset_datasheet.html new file mode 100644 index 000000000..0c389da91 --- /dev/null +++ b/components/studio/datasets/templates/dataset_datasheet.html @@ -0,0 +1,42 @@ +{% extends 'baseproject.html' %} +{% block content %} +
+ +
+

Datasheet

+
+
+

A datasheet is blblblbl

+
+ +
+ + {% csrf_token %} + {{ form }} + +
+
+{% endblock %} \ No newline at end of file diff --git a/components/studio/datasets/templates/dataset_page.html b/components/studio/datasets/templates/dataset_page.html index 3909ba416..87782d15f 100644 --- a/components/studio/datasets/templates/dataset_page.html +++ b/components/studio/datasets/templates/dataset_page.html @@ -35,7 +35,8 @@

Datasets

Location - Name + Name + Datasheet Size Last Modified @@ -53,6 +54,8 @@

Datasets

{{ entry.location }} {{ entry.name }} + {{ entry.datasheet }} {{ entry.size }} MB {{ entry.modified }} {% endif %} diff --git a/components/studio/datasets/urls.py b/components/studio/datasets/urls.py index 4bd9e48a0..9d512aa8b 100644 --- a/components/studio/datasets/urls.py +++ b/components/studio/datasets/urls.py @@ -6,4 +6,5 @@ urlpatterns = [ path('', views.page, name='page'), path('/', views.path_page, name='path_page'), + path('/datasheet', views.datasheet, name='datasheet'), ] diff --git a/components/studio/datasets/views.py b/components/studio/datasets/views.py index cb6c04a5a..f8ee22dd7 100644 --- a/components/studio/datasets/views.py +++ b/components/studio/datasets/views.py @@ -1,12 +1,14 @@ from django.contrib.auth.decorators import login_required from django.shortcuts import render +from django.http import HttpResponseRedirect from projects.models import Project from studio.minio import MinioRepository, ResponseError from django.conf import settings as sett from projects.helpers import get_minio_keys +from .forms import DatasheetForm -@login_required(login_url='/accounts/login') +@login_required def page(request, user, project, page_index): template = 'dataset_page.html' project = Project.objects.filter(slug=project).first() @@ -26,6 +28,7 @@ def page(request, user, project, page_index): datasets.append({'is_dir': obj.is_dir, # remove '/' after the directory name 'name': obj.object_name[:-1] if obj.is_dir else obj.object_name, + 'datasheet': 'datasheet', 'size': round(obj.size / 1000000, 2), 'location': 'minio', 'modified': obj.last_modified}) @@ -47,7 +50,7 @@ def page(request, user, project, page_index): return render(request, template, locals()) -@login_required(login_url='/accounts/login') +@login_required def path_page(request, user, project, path_name, page_index): template = 'dataset_path_page.html' project = Project.objects.filter(slug=project).first() @@ -81,3 +84,82 @@ def path_page(request, user, project, path_name, page_index): next_page = page_index if page_index == pages[-1] else page_index + 1 return render(request, template, locals()) + +#@login_required +#def datasheet(request, user, project): + #template = 'dataset_datasheet.html' + """ + project = Project.objects.filter(slug=project).first() + url_domain = sett.DOMAIN + + minio_keys = get_minio_keys(project) + decrypted_key = minio_keys['project_key'] + decrypted_secret = minio_keys['project_secret'] + """ + + #return render(request, template, locals()) + +@login_required +def datasheet(request, user, project, page_index): + template = 'dataset_datasheet.html' + project = Project.objects.filter(slug=project).first() + url_domain = sett.DOMAIN + + minio_keys = get_minio_keys(project) + decrypted_key = minio_keys['project_key'] + decrypted_secret = minio_keys['project_secret'] + + submitbutton = request.POST.get("submit") + + datasheet_info = [] + + form = DatasheetForm(request.POST or None) + if form.is_valid(): + datasheet_info.append(form.cleaned_data.get("q1")) + datasheet_info.append(form.cleaned_data.get("q2")) + print(datasheet_info) + + datasets = [] + try: + minio_repository = MinioRepository('{}-minio:9000'.format(project.slug), decrypted_key, + decrypted_secret) + + objects = minio_repository.client.list_objects_v2('dataset') + for obj in objects: + datasets.append({'is_dir': obj.is_dir, + # remove '/' after the directory name + 'name': obj.object_name[:-1] if obj.is_dir else obj.object_name, + 'datasheet': 'datasheet', + 'size': round(obj.size / 1000000, 2), + 'location': 'minio', + 'modified': obj.last_modified}) + except ResponseError as err: + print(err) + + previous_page = 1 + next_page = 1 + if len(datasets) > 0: + import math + # allow 10 rows per page in the table + pages = list(map(lambda x: x + 1, range(math.ceil(len(datasets) / 10)))) + + datasets = datasets[page_index * 10 - 10:page_index * 10] + + previous_page = page_index if page_index == 1 else page_index - 1 + next_page = page_index if page_index == pages[-1] else page_index + 1 + + return render(request, template, locals()) + + +""" +def datasheet(request, user, project, page_index): + template = 'dataset_datasheet.html' + if request.method == 'POST': + form = DatasheetForm(request.POST) + if form.is_valid(): + return HttpResponseRedirect('/submitted') + else: + form = DatasheetForm() + + return render(request, template, {'form': form}) +""" \ No newline at end of file diff --git a/components/studio/deployments/migrations/0005_auto_20201029_2008.py b/components/studio/deployments/migrations/0005_auto_20201029_2008.py new file mode 100644 index 000000000..720fbeb46 --- /dev/null +++ b/components/studio/deployments/migrations/0005_auto_20201029_2008.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.13 on 2020-10-29 20:08 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('deployments', '0004_auto_20201021_1037'), + ] + + operations = [ + migrations.AlterField( + model_name='deploymentinstance', + name='created_by', + field=models.CharField(max_length=512), + ), + ] diff --git a/components/studio/deployments/models.py b/components/studio/deployments/models.py index 0a35c1b3a..d549dc8ff 100644 --- a/components/studio/deployments/models.py +++ b/components/studio/deployments/models.py @@ -1,16 +1,14 @@ from django.db import models -from django.contrib.auth.models import User from django.db.models.signals import pre_delete, pre_save from django.dispatch import receiver from django.conf import settings from django.utils.text import slugify -import yaml -import json from projects.helpers import get_minio_keys import os import requests import modules.keycloak_lib as keylib + class HelmResource(models.Model): name = models.CharField(max_length=512, unique=True) namespace = models.CharField(max_length=512) @@ -21,25 +19,30 @@ class HelmResource(models.Model): created = models.DateTimeField(auto_now_add=True) updated = models.DateTimeField(auto_now_add=True) + def __str__(self): + return "{}".format(self.name) + + @receiver(pre_save, sender=HelmResource, dispatch_uid='helmresource_pre_save_signal') def pre_save_helmresource(sender, instance, using, **kwargs): update = HelmResource.objects.filter(name=instance.name) action = 'deploy' if update: action = 'upgrade' - url = settings.CHART_CONTROLLER_URL + '/'+action + url = settings.CHART_CONTROLLER_URL + '/' + action print(instance.params) retval = requests.get(url, instance.params) if retval: - print('Resource: '+instance.name) - print('Action: '+action) + print('Resource: ' + instance.name) + print('Action: ' + action) instance.status = 'OK' else: - print('Failed to deploy resource: '+instance.name) + print('Failed to deploy resource: ' + instance.name) print('Reason: {}'.format(retval.text)) print('Status code: {}'.format(retval.status_code)) instance.status = 'Failed' + @receiver(pre_delete, sender=HelmResource, dispatch_uid='helmresource_pre_delete_signal') def pre_delete_helmresource(sender, instance, using, **kwargs): print('Deleting helm resource.') @@ -48,12 +51,12 @@ def pre_delete_helmresource(sender, instance, using, **kwargs): url = settings.CHART_CONTROLLER_URL + '/delete' retval = requests.get(url, parameters) if retval: - print('Deleted resource: '+instance.name) + print('Deleted resource: ' + instance.name) else: - print('Failed to delete resource: '+instance.name) + print('Failed to delete resource: ' + instance.name) -class DeploymentDefinition(models.Model): +class DeploymentDefinition(models.Model): PRIVATE = 'PR' PUBLIC = 'PU' ACCESS = [ @@ -76,9 +79,8 @@ class DeploymentDefinition(models.Model): def __str__(self): return "{}".format(self.name) -class DeploymentInstance(models.Model): - +class DeploymentInstance(models.Model): PRIVATE = 'PR' LIMITED = 'LI' PUBLIC = 'PU' @@ -96,16 +98,16 @@ class DeploymentInstance(models.Model): path = models.CharField(max_length=512) release = models.CharField(max_length=512) helmchart = models.OneToOneField('deployments.HelmResource', on_delete=models.CASCADE) - created_by = models.ForeignKey(User, on_delete=models.DO_NOTHING) + created_by = models.CharField(max_length=512) created_at = models.DateTimeField(auto_now_add=True) uploaded_at = models.DateTimeField(auto_now=True) def __str__(self): return "{}:{}".format(self.model.name, self.model.version) + @receiver(pre_delete, sender=DeploymentInstance, dispatch_uid='deployment_pre_delete_signal') def pre_delete_deployment(sender, instance, using, **kwargs): - model = instance.model model.status = 'CR' model.save() @@ -115,11 +117,12 @@ def pre_delete_deployment(sender, instance, using, **kwargs): # Clean up in Keycloak print('Cleaning up in Keycloak...') kc = keylib.keycloak_init() - keylib.keycloak_delete_client(kc, instance.release) - scope_id = keylib.keycloak_get_client_scope_id(kc, instance.release+'-scope') + keylib.keycloak_delete_client(kc, instance.release) + scope_id = keylib.keycloak_get_client_scope_id(kc, instance.release + '-scope') keylib.keycloak_delete_client_scope(kc, scope_id) print('Done.') + @receiver(pre_save, sender=DeploymentInstance, dispatch_uid='deployment_pre_save_signal') def pre_save_deployment(sender, instance, using, **kwargs): model = instance.model @@ -129,17 +132,17 @@ def pre_save_deployment(sender, instance, using, **kwargs): model_file = model.uid model_bucket = 'models' - + deployment_name = slugify(model.name) deployment_version = slugify(model.version) deployment_endpoint = '{}-{}.{}'.format(model.name, model.version, - settings.DOMAIN) - + settings.DOMAIN) + deployment_endpoint = settings.DOMAIN deployment_path = '/{}/serve/{}/{}/'.format(model.project.slug, - slugify(model.name), - slugify(model.version)) + slugify(model.name), + slugify(model.version)) instance.endpoint = deployment_endpoint instance.path = deployment_path @@ -156,32 +159,28 @@ def pre_save_deployment(sender, instance, using, **kwargs): minio_access_key = decrypted_key minio_secret_key = decrypted_secret - minio_host = project_slug+'-minio:9000' + minio_host = project_slug + '-minio:9000' global_domain = settings.DOMAIN - HOST = settings.DOMAIN - RELEASE_NAME = slugify(str(project_slug)+'-'+str(deployment_name)+'-'+str(deployment_version)) + RELEASE_NAME = slugify(str(project_slug) + '-' + str(deployment_name) + '-' + str(deployment_version)) burl = os.path.join('https://', HOST) eurl = os.path.join(deployment_endpoint, deployment_path) - URL = burl+eurl - - - instance.appname =instance.model.project.slug+'-'+slugify(instance.model.name)+'-'+slugify(instance.model.version) - + URL = burl + eurl + + instance.appname = instance.model.project.slug + '-' + slugify(instance.model.name) + '-' + slugify( + instance.model.version) + # Create Keycloak client corresponding to this deployment - print(URL) - print(RELEASE_NAME) - print(instance.created_by.username) - client_id, client_secret = keylib.keycloak_setup_base_client(URL, RELEASE_NAME, instance.created_by.username, ['owner'], ['owner']) - + client_id, client_secret = keylib.keycloak_setup_base_client(URL, RELEASE_NAME, instance.created_by, ['owner'], + ['owner']) + skip_tls = 0 if not settings.OIDC_VERIFY_SSL: skip_tls = 1 print("WARNING: Skipping TLS verify.") - # Default is that access is private. rules = """resources: - uri: /* @@ -194,12 +193,26 @@ def pre_save_deployment(sender, instance, using, **kwargs): if 'access' in instance.params: print(instance.params['access']) if instance.params['access'] == 'public': - # No rule means open to anyone. print("Public endpoint") access_rules = {"gatekeeper.rules": "public"} del instance.params['access'] - print(instance.params) + envstr = [] + envparams = [] + + if 'environment' in instance.params: + envvars = instance.params['environment'] + envstr = "" + for envvar in envvars: + envstr += """ +- name: {} + value: {} + """.format(envvar['name'], envvar['value']) + print(envstr) + del instance.params['environment'] + + if envstr: + envparams = {"extraEnv": envstr} parameters = {'release': RELEASE_NAME, 'chart': 'deploy', @@ -223,26 +236,42 @@ def pre_save_deployment(sender, instance, using, **kwargs): 'gatekeeper.client_id': client_id, 'gatekeeper.auth_endpoint': settings.OIDC_OP_REALM_AUTH, 'gatekeeper.skip_tls': str(skip_tls)} + param_success = True + try: + parameters.update(instance.params) + parameters.update(access_rules) + if envparams: + parameters.update(envparams) + + if 'minio.buckets' in instance.params: + bucket_param = '{' + buckets = instance.params['minio.buckets'] + for bucket in buckets: + bucket_param += bucket + ',' + bucket_param = bucket_param[0:-1] + '}' + del instance.params['minio.buckets'] + parameters.update({"minio.buckets": bucket_param}) + except: + print("Failed to update parameters in deployment.models") + param_success = False - parameters.update(instance.params) - parameters.update(access_rules) print('creating chart') helmchart = HelmResource(name=RELEASE_NAME, namespace='Default', chart='deploy', params=parameters, - username=instance.created_by.username) + username=instance.created_by) helmchart.save() instance.helmchart = helmchart - if helmchart.status == 'Failed': + if helmchart.status == 'Failed' or (not param_success): # If fail, clean up in Keycloak kc = keylib.keycloak_init() - keylib.keycloak_delete_client(kc, RELEASE_NAME) - scope_id = keylib.keycloak_get_client_scope_id(kc, RELEASE_NAME+'-scope') + keylib.keycloak_delete_client(kc, RELEASE_NAME) + scope_id = keylib.keycloak_get_client_scope_id(kc, RELEASE_NAME + '-scope') keylib.keycloak_delete_client_scope(kc, scope_id) raise Exception('Failed to launch deploy job.') else: instance.release = RELEASE_NAME model.status = 'DP' - model.save() \ No newline at end of file + model.save() diff --git a/components/studio/deployments/views.py b/components/studio/deployments/views.py index d62ebe419..f61cd2859 100644 --- a/components/studio/deployments/views.py +++ b/components/studio/deployments/views.py @@ -13,7 +13,7 @@ import modules.keycloak_lib as keylib from monitor.helpers import pod_up, get_count_over_time -@login_required(login_url='/accounts/login') +@login_required def predict(request, id, project): template = 'deploy/predict.html' is_authorized = False @@ -55,7 +55,7 @@ def predict(request, id, project): return render(request, template, locals()) -@login_required(login_url='/accounts/login') +@login_required def deploy(request, id): model = Model.objects.get(id=id) print(request.user) @@ -68,7 +68,7 @@ def deploy(request, id): return HttpResponseRedirect(reverse('models:list', kwargs={'user':request.user, 'project':model.project.slug})) -@login_required(login_url='/accounts/login') +@login_required def serve_settings(request, id, project): # model = Model.objects.get(id=id) # print(request.user) @@ -113,7 +113,7 @@ def serve_settings(request, id, project): -@login_required(login_url='/accounts/login') +@login_required def undeploy(request, id): model = Model.objects.get(id=id) instance = DeploymentInstance.objects.get(model=model) @@ -130,7 +130,7 @@ def index(request): return render(request, temp, locals()) -@login_required(login_url='/accounts/login') +@login_required def deployment_index(request, user, project): temp = 'deploy/list.html' @@ -155,7 +155,7 @@ def deployment_index(request, user, project): return render(request, temp, locals()) -@login_required(login_url='/accounts/login') +@login_required def deployment_edit(request, user, project, id=None): temp = 'deploy/edit.html' from projects.models import Project @@ -181,7 +181,7 @@ def deployment_edit(request, user, project, id=None): return render(request, temp, locals()) -@login_required(login_url='/accounts/login') +@login_required def deployment_definition_index(request): temp = 'deploy/definition/list.html' @@ -192,7 +192,7 @@ def deployment_definition_index(request): -@login_required(login_url='/accounts/login') +@login_required def deployment_definition_add(request): temp = 'deploy/definition/add.html' @@ -209,7 +209,7 @@ def deployment_definition_add(request): return render(request, temp, locals()) -@login_required(login_url='/accounts/login') +@login_required def deployment_definition_edit(request, id=None): temp = 'deploy/definition/edit.html' diff --git a/components/studio/experiments/models.py b/components/studio/experiments/models.py index 9086b7e1e..8ef7b62f0 100644 --- a/components/studio/experiments/models.py +++ b/components/studio/experiments/models.py @@ -1,7 +1,6 @@ from django.db import models from django.db.models.signals import pre_delete, pre_save from django.conf import settings -from api.serializers import ProjectSerializer from rest_framework.renderers import JSONRenderer from deployments.models import HelmResource from django.dispatch import receiver @@ -29,9 +28,10 @@ def pre_save_experiments(sender, instance, using, **kwargs): job_id = uuid.uuid1().hex[0:5] release_name = '{}-{}-{}'.format(instance.project.slug, 'cronjob', job_id) is_cron = 1 - if instance.schedule == "None": + if instance.schedule == "None" or instance.schedule == "": is_cron = 0 + from api.serializers import ProjectSerializer settings_file = ProjectSerializer(instance.project) settings_file = JSONRenderer().render(settings_file.data) @@ -49,17 +49,23 @@ def pre_save_experiments(sender, instance, using, **kwargs): "namespace": settings.NAMESPACE, "project.slug": instance.project.slug, "image": instance.environment.image, - "command": '["/bin/bash", "-c", "'+instance.command+'"]', #str(instance.command.split(' ')), + "command": '["/bin/bash", "-c", "'+instance.command+'"]', "iscron": str(is_cron), "cronjob.schedule": instance.schedule, "cronjob.port": "8786", "resources.limits.cpu": "500m", "resources.limits.memory": "1Gi", "resources.requests.cpu": "100m", - "resources.requests.memory": "256Gi", + "resources.requests.memory": "256Mi", + "resources.gpu.enabled": "false", "settings_file": settings_file, "user_settings_file": user_config_file, } + if hasattr(instance, 'options'): + del instance.options['command'] + del instance.options['environment'] + del instance.options['schedule'] + parameters.update(instance.options) helmchart = HelmResource(name=release_name, namespace='Default', chart='cronjob', diff --git a/components/studio/experiments/views.py b/components/studio/experiments/views.py index 2faf2a8f8..fc73b9c0a 100644 --- a/components/studio/experiments/views.py +++ b/components/studio/experiments/views.py @@ -16,7 +16,7 @@ logger = logging.getLogger(__name__) -@login_required(login_url='/accounts/login') +@login_required def index(request, user, project): print('User: {}'.format(user)) user_permissions = get_permissions(request, project) @@ -36,7 +36,7 @@ def index(request, user, project): -@login_required(login_url='/accounts/login') +@login_required def run(request, user, project): user_permissions = get_permissions(request, project) if not user_permissions['create']: @@ -70,7 +70,7 @@ def run(request, user, project): return render(request, temp, locals()) -@login_required(login_url='/accounts/login') +@login_required def details(request, user, project, id): user_permissions = get_permissions(request, project) if not user_permissions['view']: @@ -104,7 +104,7 @@ def details(request, user, project, id): return render(request, temp, locals()) -@login_required(login_url='/accounts/login') +@login_required def delete(request, user, project, id): user_permissions = get_permissions(request, project) if not user_permissions['delete']: diff --git a/components/studio/labs/models.py b/components/studio/labs/models.py index 4dca894f2..5963e8d09 100644 --- a/components/studio/labs/models.py +++ b/components/studio/labs/models.py @@ -15,7 +15,7 @@ from django.utils.text import slugify from deployments.models import HelmResource from projects.models import Environment, Flavor -from projects.models import Project, ProjectLog +from projects.models import Project, ProjectLog, Volume from modules import keycloak_lib as keylib from rest_framework.serializers import ModelSerializer @@ -120,6 +120,32 @@ def pre_save_labs(sender, instance, using, **kwargs): if not settings.OIDC_VERIFY_SSL: skip_tls = 1 print("WARNING: Skipping TLS verify.") + + volume_param = [] + if hasattr(instance, 'extraVols') and instance.extraVols: + vols = instance.extraVols.split(',') + extraVolumes = "" + extraVolumeMounts = "" + i = 1 + for vol in vols: + volobject = Volume.objects.get(name=vol, project_slug=instance.project.slug) + if volobject: + print(volobject) + volume_name = 'extravol'+str(i) + extraVolumes += """ +- name: {} + persistentVolumeClaim: + claimName: {} + """.format(volume_name, volobject.slug) + + extraVolumeMounts += """ +- name: {} + mountPath: /home/jovyan/{} + """.format(volume_name, vol) + i = i+1 + if i>1: + volume_param = {"extraVolumes": extraVolumes, "extraVolumeMounts": extraVolumeMounts} + print(volume_param) parameters = {'release': RELEASE_NAME, 'chart': 'lab', 'global.domain': settings.DOMAIN, @@ -131,7 +157,8 @@ def pre_save_labs(sender, instance, using, **kwargs): 'gatekeeper.auth_endpoint': settings.OIDC_OP_REALM_AUTH, 'gatekeeper.skip_tls': str(skip_tls) } - + if volume_param: + parameters.update(volume_param) ingress_secret_name = 'prod-ingress' try: ingress_secret_name = settings.LABS['ingress']['secretName'] diff --git a/components/studio/labs/views.py b/components/studio/labs/views.py index 652bf1903..2e8ee31c8 100644 --- a/components/studio/labs/views.py +++ b/components/studio/labs/views.py @@ -14,7 +14,7 @@ import json import yaml -@login_required(login_url='/accounts/login') +@login_required def index(request, user, project): template = 'labs/index.html' project = Project.objects.filter(Q(slug=project), Q(owner=request.user) | Q(authorized=request.user)).first() @@ -26,7 +26,7 @@ def index(request, user, project): return render(request, template, locals()) -@login_required(login_url='/accounts/login') +@login_required def run(request, user, project): project = Project.objects.filter(Q(slug=project), Q(owner=request.user) | Q(authorized=request.user)).first() @@ -48,7 +48,7 @@ def run(request, user, project): reverse('labs:index', kwargs={'user': request.user, 'project': str(project.slug)})) -@login_required(login_url='/accounts/login') +@login_required def delete(request, user, project, id): project = Project.objects.filter(Q(slug=project), Q(owner=request.user) | Q(authorized=request.user)).first() session = Session.objects.filter(Q(id=id), Q(project=project), Q(lab_session_owner=request.user)).first() diff --git a/components/studio/models/admin.py b/components/studio/models/admin.py index f80d2b045..7d208d325 100644 --- a/components/studio/models/admin.py +++ b/components/studio/models/admin.py @@ -1,5 +1,7 @@ from django.contrib import admin -from .models import Model +from .models import Model, ModelLog, Metadata admin.site.register(Model) +admin.site.register(ModelLog) +admin.site.register(Metadata) diff --git a/components/studio/models/forms.py b/components/studio/models/forms.py index 3d7d66ab0..324c72a46 100644 --- a/components/studio/models/forms.py +++ b/components/studio/models/forms.py @@ -1,5 +1,5 @@ from django import forms -from .models import Model +from .models import Model, ModelLog, Metadata class ModelForm(forms.ModelForm): @@ -10,3 +10,17 @@ class Meta: 'uid': forms.HiddenInput(), 'project': forms.HiddenInput() } + +class ModelLogForm(forms.ModelForm): + class Meta: + model = ModelLog + fields = ( + 'run_id', 'trained_model', 'project', 'training_started_at', 'execution_time', 'code_version', + 'current_git_repo', 'latest_git_commit', 'system_details', 'cpu_details', 'training_status') + +class Metadata(forms.ModelForm): + class Meta: + model = Metadata + fields = ( + 'run_id', 'trained_model', 'project', 'model_details', 'parameters', 'metrics') + diff --git a/components/studio/models/migrations/0002_metadata_modellog.py b/components/studio/models/migrations/0002_metadata_modellog.py new file mode 100644 index 000000000..49440ba65 --- /dev/null +++ b/components/studio/models/migrations/0002_metadata_modellog.py @@ -0,0 +1,48 @@ +# Generated by Django 2.2.13 on 2020-11-25 15:42 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('models', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='ModelLog', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('run_id', models.CharField(max_length=32)), + ('trained_model', models.CharField(default='', max_length=32)), + ('project', models.CharField(default='', max_length=255)), + ('training_started_at', models.CharField(max_length=255)), + ('execution_time', models.CharField(default='', max_length=255)), + ('code_version', models.CharField(default='', max_length=255)), + ('current_git_repo', models.CharField(default='', max_length=255)), + ('latest_git_commit', models.CharField(default='', max_length=255)), + ('system_details', models.TextField(blank=True)), + ('cpu_details', models.TextField(blank=True)), + ('training_status', models.CharField(choices=[('ST', 'Started'), ('DO', 'Done'), ('FA', 'Failed')], default='ST', max_length=2)), + ], + options={ + 'unique_together': {('run_id', 'trained_model')}, + }, + ), + migrations.CreateModel( + name='Metadata', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('run_id', models.CharField(max_length=32)), + ('trained_model', models.CharField(default='', max_length=32)), + ('project', models.CharField(default='', max_length=255)), + ('model_details', models.TextField(blank=True)), + ('parameters', models.TextField(blank=True)), + ('metrics', models.TextField(blank=True)), + ], + options={ + 'unique_together': {('run_id', 'trained_model')}, + }, + ), + ] diff --git a/components/studio/models/models.py b/components/studio/models/models.py index 12efee0be..c3104e5ec 100644 --- a/components/studio/models/models.py +++ b/components/studio/models/models.py @@ -89,6 +89,47 @@ class Meta: def __str__(self): return "{name}".format(name=self.name) +class ModelLog(models.Model): + STARTED = 'ST' + DONE = 'DO' + FAILED = 'FA' + STATUS = [ + (STARTED, 'Started'), + (DONE, 'Done'), + (FAILED, 'Failed'), + ] + run_id = models.CharField(max_length=32) + trained_model = models.CharField(max_length=32, default='') + #trained_model = models.ForeignKey( + # Model, + # on_delete=models.CASCADE + #) + project = models.CharField(max_length=255, default='') + training_started_at = models.CharField(max_length=255) + #training_started_at = models.DateTimeField(auto_now_add=True) + execution_time = models.CharField(max_length=255, default='') + code_version = models.CharField(max_length=255, default='') + current_git_repo = models.CharField(max_length=255, default='') + latest_git_commit = models.CharField(max_length=255, default='') + system_details = models.TextField(blank=True) + cpu_details = models.TextField(blank=True) + training_status = models.CharField(max_length=2, choices=STATUS, default=STARTED) + + class Meta: + unique_together = ('run_id', 'trained_model') + +class Metadata(models.Model): + run_id = models.CharField(max_length=32) + trained_model = models.CharField(max_length=32, default='') + project = models.CharField(max_length=255, default='') + model_details = models.TextField(blank=True) + parameters = models.TextField(blank=True) + metrics = models.TextField(blank=True) + + class Meta: + unique_together = ('run_id', 'trained_model') + + @receiver(pre_save, sender=Model, dispatch_uid='model_pre_save_signal') def pre_save_model(sender, instance, using, **kwargs): # Load version backend @@ -112,7 +153,7 @@ def pre_save_model(sender, instance, using, **kwargs): raise Exception('Failed to create new release for model {}-{}, release type {}.'.format(instance.name, instance.version, release_type)) @receiver(pre_delete, sender=Model, dispatch_uid='model_pre_delete_signal') -def pre_delete_deployment(sender, instance, using, **kwargs): +def pre_delete_model(sender, instance, using, **kwargs): # Model is saved in bucket 'model' with filename 'instance.uid' minio_url = '{}-minio.{}'.format(instance.project.slug, settings.DOMAIN) minio_keys = get_minio_keys(instance.project) diff --git a/components/studio/models/templates/models_details.html b/components/studio/models/templates/models_details.html index 4c3e6cf7d..c779abff1 100644 --- a/components/studio/models/templates/models_details.html +++ b/components/studio/models/templates/models_details.html @@ -1,4 +1,5 @@ {% extends 'baseproject.html' %} +{% load staticfiles %} {% block content %}
@@ -7,19 +8,27 @@

{{ model.name }}

@@ -80,7 +89,7 @@

{{ model.name }}

Visibility
+ style="border-top: 1px solid #dee2e6; padding-top: 20px;">
@@ -111,24 +120,25 @@

{{ model.name }}

{% if deployments %} - - - - - - + + + + + + - {% for d in deployments %} - - - - - - - {% endfor %} + {% for d in deployments %} + + + + + + + {% endfor %}
IdNameEndpointCreated
IdNameEndpointCreated
{{ d.pk }}{{ d.model.name }}:{{ d.model.version }} - {{ d.endpoint }}{{ d.path }}/{{ d.deployment.path_predict }} - {{ d.created_at }}
{{ d.pk }}{{ d.model.name }}:{{ d.model.version }} + {{ d.endpoint }}{{ d.path }}/{{ d.deployment.path_predict }} + {{ d.created_at }}
{% else %} @@ -142,7 +152,7 @@

{{ model.name }}

{% csrf_token %} + + {% for metric in metrics %} + + {% endfor %} + +
+
+
+ +
+
+

Summary

+
+
+
+

Highest value:

+

For run with ID:

+

Lowest value:

+

For run with ID:

+
+
+ +
+
+ + +
{% if not readme %}
@@ -208,4 +410,4 @@

-{% endblock %} +{% endblock %} \ No newline at end of file diff --git a/components/studio/models/views.py b/components/studio/models/views.py index 9328f82bb..9554fb65d 100644 --- a/components/studio/models/views.py +++ b/components/studio/models/views.py @@ -1,10 +1,10 @@ import uuid from django.shortcuts import render -from django.http import HttpResponseRedirect +from django.http import HttpResponseRedirect, JsonResponse from django.urls import reverse from projects.models import Project, ProjectLog from reports.models import Report, ReportGenerator -from .models import Model +from .models import Model, ModelLog, Metadata from .forms import ModelForm from reports.forms import GenerateReportForm from django.contrib.auth.decorators import login_required @@ -12,8 +12,10 @@ import logging from reports.helpers import populate_report_by_id, get_download_link import markdown +import ast +from collections import defaultdict - +new_data = defaultdict(list) logger = logging.getLogger(__name__) @@ -23,19 +25,22 @@ def index(request): return render(request, 'models_cards.html', locals()) -@login_required(login_url='/accounts/login') +@login_required def list(request, user, project): template = 'models_list.html' project = Project.objects.filter(slug=project).first() models = Model.objects.filter(project=project) + + # model_logs = ModelLog.objects.all() + # TODO: Filter by project and access. deployments = DeploymentDefinition.objects.all() return render(request, template, locals()) -@login_required(login_url='/accounts/login') +@login_required def create(request, user, project): template = 'models_upload.html' @@ -64,7 +69,7 @@ def create(request, user, project): return render(request, template, locals()) -@login_required(login_url='/accounts/login') +@login_required def change_access(request, user, project, id): model = Model.objects.filter(pk=id).first() previous = model.get_access_display() @@ -84,7 +89,7 @@ def change_access(request, user, project, id): reverse('models:details', kwargs={'user': user, 'project': project, 'id': id})) -@login_required(login_url='/accounts/login') +@login_required def details(request, user, project, id): project = Project.objects.filter(slug=project).first() model = Model.objects.filter(id=id).first() @@ -140,6 +145,26 @@ def details(request, user, project, id): else: form = GenerateReportForm() + log_objects = ModelLog.objects.filter(project=project.name, trained_model=model) + model_logs = [] + for log in log_objects: + model_logs.append({ + 'id': log.id, + 'trained_model': log.trained_model, + 'training_status': log.training_status, + 'training_started_at': log.training_started_at, + 'execution_time': log.execution_time, + 'code_version': log.code_version, + 'current_git_repo': log.current_git_repo, + 'latest_git_commit': log.latest_git_commit, + 'system_details': ast.literal_eval(log.system_details), + 'cpu_details': ast.literal_eval(log.cpu_details) + }) + + md_objects = Metadata.objects.filter(project=project.name, trained_model=model) + if md_objects: + metrics = get_chart_data(md_objects) + filename = None readme = None import requests as r @@ -158,6 +183,42 @@ def details(request, user, project, id): return render(request, 'models_details.html', locals()) +def get_chart_data(md_objects): + new_data.clear() + metrics_pre = [] + metrics = [] + for md_item in md_objects: + metrics_pre.append({ + 'run_id': md_item.run_id, + 'metrics': ast.literal_eval(md_item.metrics), + 'parameters': ast.literal_eval(md_item.parameters) + }) + for m in metrics_pre: + for key, value in m["metrics"].items(): + new_data[key].append([m["run_id"], value, m["parameters"]]) + for key, value in new_data.items(): + data = [] + labels = [] + params = [] + run_id = [] + run_counter = 0 + for item in value: + run_counter += 1 + labels.append("Run {}".format(run_counter)) + run_id.append(item[0]) + data.append(item[1]) + params.append(item[2]) + metrics.append({ + "metric": key, + "details": { + "run_id": run_id, + "labels": labels, + "data": data, + "params": params + } + }) + return metrics + def details_public(request, id): model = Model.objects.filter(pk=id).first() @@ -192,7 +253,7 @@ def details_public(request, id): return render(request, 'models_details_public.html', locals()) -@login_required(login_url='/accounts/login') +@login_required def delete(request, user, project, id): template = 'model_confirm_delete.html' diff --git a/components/studio/monitor/views.py b/components/studio/monitor/views.py index 24d065e41..197f9917e 100644 --- a/components/studio/monitor/views.py +++ b/components/studio/monitor/views.py @@ -48,7 +48,7 @@ def get_cpu_mem(resources, project_slug, resource_type): res_list.append((res_owner, res_flavor, res_cpu_limit, res_cpu_request, res_mem_limit, res_mem_request, res_id, res_model, res_version)) return res_list -@login_required(login_url='/accounts/login') +@login_required def overview(request, user, project): is_authorized = True user_permissions = get_permissions(request, project, sett.MONITOR_PERM) diff --git a/components/studio/projects/migrations/0003_volume.py b/components/studio/projects/migrations/0003_volume.py new file mode 100644 index 000000000..e63a46890 --- /dev/null +++ b/components/studio/projects/migrations/0003_volume.py @@ -0,0 +1,30 @@ +# Generated by Django 2.2.13 on 2020-12-02 16:22 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('deployments', '0005_auto_20201029_2008'), + ('projects', '0002_remove_project_environment'), + ] + + operations = [ + migrations.CreateModel( + name='Volume', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=512)), + ('slug', models.CharField(blank=True, max_length=512, null=True)), + ('size', models.CharField(max_length=512)), + ('project_slug', models.CharField(max_length=512)), + ('created_by', models.CharField(max_length=512)), + ('settings', models.TextField(blank=True, null=True)), + ('updated_on', models.DateTimeField(auto_now=True)), + ('created_on', models.DateTimeField(auto_now_add=True)), + ('helmchart', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to='deployments.HelmResource')), + ], + ), + ] diff --git a/components/studio/projects/models.py b/components/studio/projects/models.py index 9e0005166..9276397cc 100644 --- a/components/studio/projects/models.py +++ b/components/studio/projects/models.py @@ -3,10 +3,48 @@ from django.db import models from django.contrib.auth.models import User from django.utils.text import slugify +from django.dispatch import receiver +from django.db.models.signals import pre_delete, pre_save +from django.conf import settings import string import random +from deployments.models import HelmResource +class Volume(models.Model): + name = models.CharField(max_length=512) + slug = models.CharField(max_length=512, blank=True, null=True) + size = models.CharField(max_length=512) + project_slug = models.CharField(max_length=512) + created_by = models.CharField(max_length=512) + helmchart = models.OneToOneField('deployments.HelmResource', on_delete=models.CASCADE) + settings = models.TextField(blank=True, null=True) + updated_on = models.DateTimeField(auto_now=True) + created_on = models.DateTimeField(auto_now_add=True) + + def __str__(self): + return str(self.name) + +@receiver(pre_save, sender=Volume, dispatch_uid='volume_pre_save_signal') +def pre_save_volume(sender, instance, using, **kwargs): + instance.slug = slugify(instance.name+'-'+instance.project_slug) + user = instance.created_by + parameters = {'release': instance.slug, + 'chart': 'volume', + 'name': instance.slug, + 'accessModes': 'ReadWriteMany', + 'storageClass': settings.STORAGECLASS, + 'size': instance.size} + helmchart = HelmResource(name=instance.slug, + namespace='Default', + chart='volume', + params=parameters, + username=user) + helmchart.save() + instance.helmchart = helmchart + l = ProjectLog(project=Project.objects.get(slug=instance.project_slug), module='PR', headline='Volume', + description='A new volume {name} has been created'.format(name=instance.name)) + l.save() class Flavor(models.Model): name = models.CharField(max_length=512) slug = models.CharField(max_length=512) diff --git a/components/studio/projects/templates/index_projects.html b/components/studio/projects/templates/index_projects.html index ade9e8c65..a38932e14 100644 --- a/components/studio/projects/templates/index_projects.html +++ b/components/studio/projects/templates/index_projects.html @@ -8,7 +8,7 @@

Projects

{% if not request.user.is_authenticated %}

You need to be - logged in + logged in to be able to create a project.

{% endif %} diff --git a/components/studio/projects/views.py b/components/studio/projects/views.py index 6d30af00f..3302bbc68 100644 --- a/components/studio/projects/views.py +++ b/components/studio/projects/views.py @@ -36,7 +36,7 @@ def index(request): return render(request, template, locals()) -@login_required(login_url='/accounts/login') +@login_required def settings(request, user, project_slug): user_permissions = get_permissions(request, project_slug, sett.PROJECT_SETTINGS_PERM) print(user_permissions) @@ -68,7 +68,7 @@ def settings(request, user, project_slug): return render(request, template, locals()) -@login_required(login_url='/accounts/login') +@login_required def change_description(request, user, project_slug): project = Project.objects.filter(slug=project_slug).first() @@ -87,7 +87,7 @@ def change_description(request, user, project_slug): reverse('projects:settings', kwargs={'user': request.user, 'project_slug': project.slug})) -@login_required(login_url='/accounts/login') +@login_required def grant_access_to_project(request, user, project_slug): project = Project.objects.filter(slug=project_slug).first() @@ -122,7 +122,7 @@ def grant_access_to_project(request, user, project_slug): return HttpResponseRedirect( reverse('projects:settings', kwargs={'user': user, 'project_slug': project.slug})) -@login_required(login_url='/accounts/login') +@login_required def create(request): template = 'index_projects.html' @@ -173,7 +173,7 @@ def create(request): return render(request, template, locals()) -@login_required(login_url='/accounts/login') +@login_required def details(request, user, project_slug): is_authorized = kc.keycloak_verify_user_role(request, project_slug, ['member']) @@ -211,7 +211,7 @@ def details(request, user, project_slug): return render(request, template, locals()) -@login_required(login_url='/accounts/login') +@login_required def delete(request, user, project_slug): next_page = request.GET.get('next', '/projects/') @@ -236,7 +236,7 @@ def delete(request, user, project_slug): return HttpResponseRedirect(next_page, {'message': 'Deleted project successfully.'}) -@login_required(login_url='/accounts/login') +@login_required def publish_project(request, user, project_slug): owner = User.objects.filter(username=user).first() project = Project.objects.filter(owner=owner, slug=project_slug).first() @@ -277,7 +277,7 @@ def publish_project(request, user, project_slug): reverse('projects:settings', kwargs={'user': user, 'project_slug': project_slug})) -@login_required(login_url='/accounts/login') +@login_required def load_project_activity(request, user, project_slug): template = 'project_activity.html' diff --git a/components/studio/reports/views.py b/components/studio/reports/views.py index 37b917876..2fed78ff2 100644 --- a/components/studio/reports/views.py +++ b/components/studio/reports/views.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) -@login_required(login_url='/accounts/login') +@login_required def index(request, user, project): template = 'reports_list.html' @@ -22,7 +22,7 @@ def index(request, user, project): return render(request, template, locals()) -@login_required(login_url='/accounts/login') +@login_required def add(request, user, project): project = Project.objects.filter(slug=project).first() @@ -62,7 +62,7 @@ def add(request, user, project): return render(request, 'reports_add.html', locals()) -@login_required(login_url='/accounts/login') +@login_required def details(request, user, project, id): template = 'reports_details_generator.html' @@ -72,7 +72,7 @@ def details(request, user, project, id): return render(request, template, locals()) -@login_required(login_url='/accounts/login') +@login_required def visualize_report(request, user, project, id): template = 'reports_details.html' @@ -98,7 +98,7 @@ def visualize_report_public(request, id): return render(request, template, locals()) -@login_required(login_url='/accounts/login') +@login_required def delete_generator(request, user, project, id): project = Project.objects.filter(slug=project).first() report = ReportGenerator.objects.filter(id=id).first() @@ -120,7 +120,7 @@ def delete_generator(request, user, project, id): return render(request, 'report_confirm_delete.html', locals()) -@login_required(login_url='/accounts/login') +@login_required def delete_report(request, user, project, id): project = Project.objects.filter(slug=project).first() report = Report.objects.filter(id=id).first() diff --git a/components/studio/requirements.txt b/components/studio/requirements.txt index 81746145d..c37b1bd50 100644 --- a/components/studio/requirements.txt +++ b/components/studio/requirements.txt @@ -1,4 +1,4 @@ -minio +minio==6.0.2 certifi==2019.9.11 chardet==3.0.4 Django==2.2.13 diff --git a/components/studio/static/css/bootstrap-grid.css b/components/studio/static/css/bootstrap-grid.css index 68b84f842..99e0879c8 100644 --- a/components/studio/static/css/bootstrap-grid.css +++ b/components/studio/static/css/bootstrap-grid.css @@ -2337,6 +2337,11 @@ html { margin-left: auto !important; } +.qlabel { + width: 60%; + font-size: 250%; +} + @media (min-width: 576px) { .m-sm-0 { margin: 0 !important; diff --git a/components/studio/static/css/project.css b/components/studio/static/css/project.css index 9c4d11fdb..f29d7ee68 100644 --- a/components/studio/static/css/project.css +++ b/components/studio/static/css/project.css @@ -112,3 +112,10 @@ ul.timeline > li:before { margin-left: 15px; border-radius: 5px; } + +/* Datasheet questions */ + +.qlabels { + width: 60%; + font-size: 250%; +} diff --git a/components/studio/studio/urls.py b/components/studio/studio/urls.py index ae76cbaa1..e62eced91 100644 --- a/components/studio/studio/urls.py +++ b/components/studio/studio/urls.py @@ -15,14 +15,11 @@ """ from django.contrib import admin from django.urls import path, include -# from projects.views import auth from django.conf import settings from django.conf.urls.static import static urlpatterns = [ path('admin/', admin.site.urls), - path('accounts/', include('django.contrib.auth.urls')), - # path('auth/', auth, name='auth'), path('oidc/', include('mozilla_django_oidc.urls')), path('o/', include('oauth2_provider.urls', namespace='oauth2_provider')), path('', include('models.urls', namespace='models')), diff --git a/components/studio/templates/base.html b/components/studio/templates/base.html index bdeee105f..cf995e996 100644 --- a/components/studio/templates/base.html +++ b/components/studio/templates/base.html @@ -135,34 +135,26 @@ -->
  • - - + + + {% if request.user.is_authenticated %} - - - - {% endblock %} + {% block sidebar %} {% endblock %} {% block content %} {% endblock %} - diff --git a/components/studio/templates/registration/logged_out.html b/components/studio/templates/registration/logged_out.html deleted file mode 100644 index 00f108248..000000000 --- a/components/studio/templates/registration/logged_out.html +++ /dev/null @@ -1,17 +0,0 @@ -{% extends "base.html" %} - -{% block content %} -
    -
    -
    -

    Logged out

    - -

    Login again to get access to the Scaleout Platform.

    - -

    - Click here to sign in again. -

    -
    -
    -
    -{% endblock %} diff --git a/components/studio/templates/registration/login.html b/components/studio/templates/registration/login.html deleted file mode 100644 index 50189a236..000000000 --- a/components/studio/templates/registration/login.html +++ /dev/null @@ -1,91 +0,0 @@ -{% extends "base.html" %} -{% load static %} - -{% block extra_head %} - -{% endblock %} - -{% block content %} -
    -
    -
    -

    Sign in

    - {% csrf_token %} - - - {% if form.errors %} -

    Your username and password didn't match. Please try again.

    - {% endif %} - - {% if next %} - {% if user.is_authenticated %} -

    Your account doesn't have access to this page. To proceed, - please login with an account that has access.

    - {% else %} -

    Please login to see this page.

    - {% endif %} - {% endif %} - - - - - - - - -
    - - - - -
    -
    -
    -{% endblock %} diff --git a/components/studio/templates/registration/password_reset_complete.html b/components/studio/templates/registration/password_reset_complete.html deleted file mode 100644 index dacb0f6ba..000000000 --- a/components/studio/templates/registration/password_reset_complete.html +++ /dev/null @@ -1,9 +0,0 @@ -{% extends "base.html" %} - -{% block content %} -
    -

    - Your password has been set. You may go ahead and sign in now. -

    -
    -{% endblock %} diff --git a/components/studio/templates/registration/password_reset_confirm.html b/components/studio/templates/registration/password_reset_confirm.html deleted file mode 100644 index 157722037..000000000 --- a/components/studio/templates/registration/password_reset_confirm.html +++ /dev/null @@ -1,35 +0,0 @@ -{% extends "base.html" %} - -{% block content %} -
    - {% if validlink %} -

    Change password

    -
    - {% csrf_token %} - -
      -
    • Your password can't be too similar to your other personal information.
    • -
    • Your password must contain at least 8 characters.
    • -
    • Your password can't be a commonly used password.
    • -
    • Your password can't be entirely numeric.
    • -
    -

    -
    - - - -
    -
    - - -
    - -
    - {% else %} -

    - The password reset link was invalid, possibly because it has already been used. - Please request a new password reset. -

    - {% endif %} -
    -{% endblock %} diff --git a/components/studio/templates/registration/password_reset_done.html b/components/studio/templates/registration/password_reset_done.html deleted file mode 100644 index 508dc1829..000000000 --- a/components/studio/templates/registration/password_reset_done.html +++ /dev/null @@ -1,14 +0,0 @@ -{% extends "base.html" %} - -{% block content %} -
    -

    - We've emailed you instructions for setting your password, if an account exists with the email you entered. - You should receive them shortly. -

    -

    - If you don't receive an email, please make sure you've entered the address you registered with, - and check your spam folder. -

    -
    -{% endblock %} diff --git a/components/studio/templates/registration/password_reset_email.html b/components/studio/templates/registration/password_reset_email.html deleted file mode 100644 index 68190f451..000000000 --- a/components/studio/templates/registration/password_reset_email.html +++ /dev/null @@ -1,12 +0,0 @@ -{% autoescape off %} -To initiate the password reset process for your {{ user.get_username }} STACKn Account, -click the link below: - -{{ protocol }}://{{ domain }}{% url 'password_reset_confirm' uidb64=uid token=token %} - -If clicking the link above doesn't work, please copy and paste the URL in a new browser -window instead. - -Sincerely, -The STACKn Team -{% endautoescape %} diff --git a/components/studio/templates/registration/password_reset_form.html b/components/studio/templates/registration/password_reset_form.html deleted file mode 100644 index 5c64d4479..000000000 --- a/components/studio/templates/registration/password_reset_form.html +++ /dev/null @@ -1,25 +0,0 @@ -{% extends "base.html" %} - -{% block content %} -
    -
    -
    -

    - Forgot password -

    -
    - {% csrf_token %} - -
    - - - We'll send instructions about how to change - your password to your e-mail address. -
    - -
    -
    -
    -
    -{% endblock %} diff --git a/components/studio/templates/registration/password_reset_subject.txt b/components/studio/templates/registration/password_reset_subject.txt deleted file mode 100644 index aaf87f995..000000000 --- a/components/studio/templates/registration/password_reset_subject.txt +++ /dev/null @@ -1 +0,0 @@ -STACKn password reset \ No newline at end of file diff --git a/components/studio/workflows/views.py b/components/studio/workflows/views.py index c5622c04c..cdd585147 100644 --- a/components/studio/workflows/views.py +++ b/components/studio/workflows/views.py @@ -8,7 +8,7 @@ from django.urls import reverse -@login_required(login_url='/accounts/login') +@login_required def workflows_index(request, user, project): temp = 'workflow/list.html' @@ -19,7 +19,7 @@ def workflows_index(request, user, project): return render(request, temp, locals()) -@login_required(login_url='/accounts/login') +@login_required def workflows_run(request, user, project): temp = 'workflow/add.html' project = Project.objects.filter(slug=project).first() @@ -40,7 +40,7 @@ def workflows_run(request, user, project): return render(request, temp, locals()) -@login_required(login_url='/accounts/login') +@login_required def workflows_details(request, user, project, id=None): temp = 'workflow/details.html' @@ -61,7 +61,7 @@ def workflows_details(request, user, project, id=None): return render(request, temp, locals()) -@login_required(login_url='/accounts/login') +@login_required def workflows_definition_index(request): temp = 'definition/list.html' @@ -70,7 +70,7 @@ def workflows_definition_index(request): return render(request, temp, locals()) -@login_required(login_url='/accounts/login') +@login_required def workflows_definition_add(request): temp = 'definition/add.html' @@ -87,7 +87,7 @@ def workflows_definition_add(request): return render(request, temp, locals()) -@login_required(login_url='/accounts/login') +@login_required def workflows_definition_edit(request, id=None): temp = 'definition/edit.html' diff --git a/docs/INSTALL.md b/docs/INSTALL.md index eee807661..6d378b450 100644 --- a/docs/INSTALL.md +++ b/docs/INSTALL.md @@ -109,15 +109,15 @@ - Installing Helm: ``` -sudo snap install helm –classic +sudo snap install helm --classic ``` - Installing kubectl: ``` -sudo snap install kubectl –classic +sudo snap install kubectl --classic ``` - Installing microk8s: ``` -sudo snap install microk8s –classic +sudo snap install microk8s --classic ``` - Start microk8s: ``` diff --git a/docs/README.md b/docs/README.md index 3f0a05828..8e4fad6ef 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,7 +1,4 @@ -What is STACKn
    -Why use STACKn
    -Core features
    -Setup +![alt text](https://thumb.tildacdn.com/tild3162-6435-4365-a230-656137616436/-/resize/560x/-/format/webp/stacknlogo3.png) # What is STACKn? @@ -17,13 +14,13 @@ STACKn is cloud-native and can be deployed on any solution that implements the K # Core features ## Custom Resource management -- Ability to lifecycle control resources. STACKn provides model, dataset, files and project lifecycle management, including user management. +Ability to lifecycle control resources. STACKn provides model, dataset, files and project lifecycle management, including user management. ## Model Management -- Ability to track models from cradle to grave with version control, inference auto scaling and control as well as audit trails and scheduled rollouts and/or decommissions. +Ability to track models from cradle to grave with version control, inference auto scaling and control as well as audit trails and scheduled rollouts and/or decommissions. ## Platform support -- Deploy anywhere where there is a Kubernetes compliant API. +Deploy anywhere where there is a Kubernetes compliant API. ## Integration and customization - The STACKn front end is composed of modules on a plugin architecture. The versatility enables composeability and extendability of multiple services together for consumption by the end user. @@ -35,6 +32,7 @@ STACKn is a composition of multiple required components. The overview can give y For additional details please see the technical documentation. # Setup + ## Getting started This guide lets you quickly get started with STACKn. @@ -43,7 +41,7 @@ This guide lets you quickly get started with STACKn. 3. Download charts 4. Install STACKn 5. Setup a user - +6. Create a project ### 1. Check prerequisites @@ -62,6 +60,7 @@ To configure STACKn you must know your storage class name and storage provisione - Setup a desired namespace (or default) - Setup a service account (or rolebind to admin) Ensure your service account user has a rolebinding to administrator permissions for deployment. + ```bash cat < See [LICENSE](LICENCE.md) +> See [LICENSE](https://github.com/scaleoutsystems/stackn/blob/master/LICENSE) diff --git a/docs/_navbar.md b/docs/_navbar.md new file mode 100644 index 000000000..a34641b61 --- /dev/null +++ b/docs/_navbar.md @@ -0,0 +1,7 @@ + + +* [Home](/) +* [Installation](/INSTALL) +* [Tutorials](/tutorial) +* [Dev](/dev) +* [Release Notes](/releasenotes) diff --git a/docs/_sidebar.md b/docs/_sidebar.md deleted file mode 100644 index f0bf6491e..000000000 --- a/docs/_sidebar.md +++ /dev/null @@ -1,5 +0,0 @@ - - -* [Home](/) -* [Installation](INSTALL.md) -* [Getting started](guide.md) diff --git a/docs/dev.md b/docs/dev.md index 4e80c8ffa..b3913be3a 100644 --- a/docs/dev.md +++ b/docs/dev.md @@ -2,7 +2,7 @@ ## Deployment -For deployment of STACKn, see [[...]]... +For deployment of STACKn, see the [installation guide](/INSTALL). ## Github diff --git a/docs/guide.md b/docs/guide.md deleted file mode 100644 index 5155ff1ff..000000000 --- a/docs/guide.md +++ /dev/null @@ -1,78 +0,0 @@ -# Getting started - -## Create a Project -To get up and running with STACKn, start by creating a new Project -following the steps below: - -1. Click `Projects` in the left side menu. -2. Type in a name and description for your Project. -3. If you have an existing GitHub repository that you want to use as a base -for your Project, include a URL to it in the `Import from...` field. This will import the repository in your Project file area. - -## Project Overview -Once you have created a Project, you will see another side menu that gives you -access to the different components of STACKn. On the `Overview` page, you will see -a README.md file that serves as an introduction to the project. It's content is taken from a README file in the root of your working directory. If no such file is present, you will see a __Getting Started__ -guide similar to this one. - -## Create a new Lab Session -Lab Sessions let you spawn Jupyter Labs instances backed by resources of a given flavor. Labs are the hub for experimentation in your Projects. To set one up, follow the steps below: - -1. Go to `Labs` from the side menu to the left. -2. Choose an `Image` and a `Flavor` for you Lab Session. -3. Simply press `Submit`. - -You will see a list of your Lab Sessions below the submit form. - -![Lab Sessions](https://github.com/scaleoutsystems/stackn/tree/master/docs/images/labs.png) - -## Datasets -When you create a Project, you automatically get an S3-compatible object storage (MinIO) for your datasets, -reports, models etc. You can see what is available in your `datasets` bucket -directly from STACKn on the `Datasets` page. - -On top of the page, you find a link to your MinIO instance together with the login -credentials. Once you are logged in, you can upload files and manage your buckets, but -do not delete or rename the already existing buckets since they fill specific functions. - -![Datasets](https://github.com/scaleoutsystems/stackn/tree/master/docs/images/datasets.png) - -## Models -You can see a list of your machine learning models on the `Models` page. From there, -you can also deploy models or delete the ones that are not needed anymore. - -## Metrics -Within the `Metrics` page, you can see a list of all your configurations for measuring -a model's performance. For example, classification reports. - -To add new Metrics, click `Add new` in the top right corner of the screen. - -![Add Metrics](https://github.com/scaleoutsystems/stackn/tree/master/docs/images/metrics.png) - -To be able to configure this, you need to have a file implementing the algorithm for -measuring the performance of the model. We call this a `generator file`. You might want -to set up a way to visualize this performance. For example, a pyplot for a classification -report. We call this a `visualizer file`. These two files and any other metrics-related files -need to be placed under a folder called `reports` in your Lab Session. In this way, you will -get access to all the related files within your working directory when executing the generation -and visualization algorithms. Once the files are stored in the correct place, you will see -them in the drop-down menus in the submit form. - -## Settings -The `Settings` page contains all the information about your Project and its components. Some -of the things you can do there are: - -- Change your Project's description -- Find link to your MinIO instance and login keys -- Download a configuration file for your Project which is required when working with -STACKn CLI -- Transfer ownership of your Project to another user -- Delete permanently your Project - -## Next steps -Now that you are familiar with the base functionality of STACKn, a good next step is to work through the example Projects available here: - -* [Classification of hand-written digits (MNIST)](https://github.com/scaleoutsystems/digits-example-project) -* [Classfication of malignant blood cells (AML)](https://github.com/scaleoutsystems/aml-example-project) - -These examples will teach you how to build a ML-model from scratch and how to serve it for private or public use. diff --git a/docs/index.html b/docs/index.html index 3523b7967..233a3c367 100644 --- a/docs/index.html +++ b/docs/index.html @@ -13,7 +13,9 @@ diff --git a/docs/releasenotes.md b/docs/releasenotes.md new file mode 100644 index 000000000..4e22fcc83 --- /dev/null +++ b/docs/releasenotes.md @@ -0,0 +1,82 @@ +# Release Notes + +## v0.3.0 + +### New functionality + +- You can now set environment variables in a model deployment +- New CLI command `stackn get settings` for listing all the settings needed to set up the CLI client. +The keycloak host is now set automatically after providing the studio host. +- Added a chart to the models module in STACKn to show how model metrics have changed over time and runs (draft) +- Create and delete project volumes with the CLI + +```bash +stackn create volume -s 10Mi -n example-vol +stackn get volumes +stackn delete volume -n example-vol +stackn create lab -f medium -e jupyter-minimal -v example-vol1,example-vol2 +``` + +- A possibility to specify which buckets (arbitrary number) from the Minio instance should be mounted in the deployed container +- A possibility to create jobs from command line + +``` +stackn create job -c job.json +stackn get jobs +``` + +### Other + +- Freshened up and new optimized STACKn documentation +- STACKn is now using ONLY Keycloak for authentication + +## v0.2.0 + +### New functionality + +- Introduced ownership for Lab Sessions +- Asynchronous allocation of Project resources with Celery tasks +- Added API permission classes +- Added Cron Jobs under Experiments +- Introduced Project Activity log +- Added resource limits for deployments +- Introduced basic monitoring overview +- Defined default project structure through + +```bash +stackn init +``` + +- Added new commands in the CLI client + +```bash +stackn project add members ... + +stackn --version + +stackn lab create -flavor ... -environment ... + +stackn lab list all + +stackn create/get/delete dataset ... +``` + +- STACKn local deploy (instructions available for macOS and Linux) + +### Bug fixes + +- Only the Project owner can grant access to it +- Cleaned up obsolete k8s jobs from the kubernetes cluster +- Optimized Django migrations management + + +## v0.1.0 + +- Experiments view now working. +- Default versioning for models vX.Y.Z (can be configured by user). +- Model deployments integrated with Keycloak. +- CLI reworked. Now possible to login and manage all projects, no need for project-specific configuration files. +- Add members to projects via UI. +- Manual scaling of model deployments. +- Basic monitoring of model deployments. +- Many bug fixes. diff --git a/docs/serving.md b/docs/serving.md deleted file mode 100644 index f9c770f8e..000000000 --- a/docs/serving.md +++ /dev/null @@ -1,43 +0,0 @@ -# Minimal Model Deployment - -If you haven't already installed the STACKn CLI, you can install it with pip: -``` -pip install git+https://@github.com/scaleoutsystems/stackn@develop#subdirectory=cli -``` - - -Create a project: -``` -stackn create project -n demo -``` -Create a directory for your model: -``` -mkdir demo-model -cd demo-model -``` -Initialize the model with -``` -stackn init -``` -Create the model and deploy it: -``` -stackn create model -n test-model -r minor -stackn create deployment -m test-model -d default-python -``` -It will take a minute for the model to deploy. Once it is ready, you can run a prediction: -``` -stackn predict -m test-model -v v0.1.0 -i '{"pred":"test"}' -``` -Alternatively you can create a lab session: -``` -stackn create lab -f large -e default -``` -and then you can call the model endpoint from inside a notebook: -``` -from scaleout.auth import get_token -import requests -url = 'https://studio.scilifelab.stackn.dev/demo-cbn/serve/demo-model/v010/predict/' -token, config = get_token() -res = requests.post(url, headers={"Authorization": "Token "+token}, json={"pred":"test"}) -res.json() -``` \ No newline at end of file diff --git a/docs/mnist-example.md b/docs/tutorial.md similarity index 57% rename from docs/mnist-example.md rename to docs/tutorial.md index 79d7940f6..ec1d0dc4e 100644 --- a/docs/mnist-example.md +++ b/docs/tutorial.md @@ -1,19 +1,104 @@ -# MNIST Example +# Getting started + +## Create a Project +To get up and running with STACKn, start by creating a new Project +following the steps below: + +1. Click `Projects` in the left side menu. +2. Type in a name and description for your Project. +3. If you have an existing GitHub repository that you want to use as a base +for your Project, include a URL to it in the `Import from...` field. This will import the repository in your Project file area. + +## Project Overview +Once you have created a Project, you will see another side menu that gives you +access to the different components of STACKn. On the `Overview` page, you will see +a README.md file that serves as an introduction to the project. It's content is taken from a README file in the root of your working directory. If no such file is present, you will see a __Getting Started__ +guide similar to this one. + +## Create a new Lab Session +Lab Sessions let you spawn Jupyter Labs instances backed by resources of a given flavor. Labs are the hub for experimentation in your Projects. To set one up, follow the steps below: + +1. Go to `Labs` from the side menu to the left. +2. Choose an `Image` and a `Flavor` for you Lab Session. +3. Simply press `Submit`. + +You will see a list of your Lab Sessions below the submit form. + +![Lab Sessions](images/labs.png) + +## Datasets +When you create a Project, you automatically get an S3-compatible object storage (MinIO) for your datasets, +reports, models etc. You can see what is available in your `datasets` bucket +directly from STACKn on the `Datasets` page. + +On top of the page, you find a link to your MinIO instance together with the login +credentials. Once you are logged in, you can upload files and manage your buckets, but +do not delete or rename the already existing buckets since they fill specific functions. + +![Datasets](images/datasets.png) + +## Models +You can see a list of your machine learning models on the `Models` page. From there, +you can also deploy models or delete the ones that are not needed anymore. + +## Metrics +Within the `Metrics` page, you can see a list of all your configurations for measuring +a model's performance. For example, classification reports. + +To add new Metrics, click `Add new` in the top right corner of the screen. + +![Add Metrics](images/metrics.png) + +To be able to configure this, you need to have a file implementing the algorithm for +measuring the performance of the model. We call this a `generator file`. You might want +to set up a way to visualize this performance. For example, a pyplot for a classification +report. We call this a `visualizer file`. These two files and any other metrics-related files +need to be placed under a folder called `reports` in your Lab Session. In this way, you will +get access to all the related files within your working directory when executing the generation +and visualization algorithms. Once the files are stored in the correct place, you will see +them in the drop-down menus in the submit form. + +## Settings +The `Settings` page contains all the information about your Project and its components. Some +of the things you can do there are: + +- Change your Project's description +- Find link to your MinIO instance and login keys +- Download a configuration file for your Project which is required when working with +STACKn CLI +- Transfer ownership of your Project to another user +- Delete permanently your Project + +## Next steps +Now that you are familiar with the base functionality of STACKn, a good next step is to work through the example Projects available here: + +* [Classification of hand-written digits (MNIST)](https://github.com/scaleoutsystems/digits-example-project) +* [Classfication of malignant blood cells (AML)](https://github.com/scaleoutsystems/aml-example-project) + +These examples will teach you how to build a ML-model from scratch and how to serve it for private or public use. + +# Deploy a Model Create a directory and initialize an empty project structure: + ``` mkdir MNIST && cd MNIST stackn init ``` + ## Data + We keep the raw data in ``data/raw``: + ``` wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz -P data/raw/ wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz -P data/raw/ wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz -P data/raw/ wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz -P data/raw/ ``` + Unzip the raw data: + ``` gunzip data/raw/t*-ubyte.gz ``` @@ -21,6 +106,7 @@ gunzip data/raw/t*-ubyte.gz The data is not in any standard image format, so we will need to process it before using it for training a model. The processed data goes in ``data/processed``. For this we will use the packages ``idx2numpy``, ``numpy``. Add them to ``requirements.txt``. We will now create a script for preprocessing the data, ``src/data/load_convert.py``: + ``` import idx2numpy import numpy as np @@ -40,10 +126,13 @@ pickle.dump((imgs_test, labels_test), file_test) file_train.close() file_test.close() ``` + ``` python src/data/load_convert.py ``` + Next, we create a script ``src/data/reshape_normalize.py`` that takes the interim data and reshapes and normalizes it to its final format, stored in ``data/processed``: + ``` import pickle @@ -70,10 +159,13 @@ file_test = open('data/processed/mnist_test.pkl', 'wb') pickle.dump((imgs_train, labels_train), file_train) pickle.dump((imgs_test, labels_test), file_test) ``` + ``` python src/data/reshape_normalize.py ``` - ## Train + +## Train + Now that we have the processed data, we can build and train a model. We will use ``Keras`` to build the model, which requires Tensorflow, so add ``keras`` and ``tensorflow`` to ``requirements.txt``. The script that trains the model is saved in ``src/models/train.py``. The trained model is saved in ``models/`` @@ -127,13 +219,15 @@ f.write(model_json) f.close() model.save_weights('models/model_weights.h5') ``` + ``` python src/models/train.py ``` - ## Deploy + To use the trained model, we need to implement ``model_load`` and ``model_predict`` in ``src/models/predict.py``: + ``` import tensorflow as tf import numpy as np @@ -157,7 +251,9 @@ def model_predict(inp, model=[]): pred = model.predict(np.array(inp.img)) return json.dumps({"prediction": pred.tolist()}) ``` + Note that we access the input as ``inp.img``. The input is defined in ``src/models/input_type.py``: + ``` from pydantic import BaseModel from typing import List @@ -168,23 +264,31 @@ class PredType(BaseModel): # Default pred: str. Can be accessed in predict as inp.pred. img: List[List[List[List[float]]]] ``` -Here, the input is a four-dimensional list of floats. You can read more about Pydantic here: https://pydantic-docs.helpmanual.io/ +Here, the input is a four-dimensional list of floats. You can read more about Pydantic here: https://pydantic-docs.helpmanual.io/ You create a new model with the CLI: + ``` stackn create model -n mnist -r minor ``` + and then you deploy it in the ``default-python`` environment: + ``` stackn create deployment -m mnist -d default-python ``` + List your new deployment: + ``` stackn get deployments ``` + ## Predict + You can call the endpoint: + ``` import pickle @@ -202,7 +306,9 @@ url = 'your endpoint' res = requests.post(url, headers={"Authorization": "Token "+token}, json={"img": img_inp}, verify=False) print(res.json()) ``` + You can also test your ``predict`` function locally (add to ``src/models/predict.py``) + ``` if __name__ == "__main__": model = model_load() @@ -217,4 +323,65 @@ if __name__ == "__main__": inp = PredType(img=img_inp) res = model_predict(inp, model) print(res) -``` \ No newline at end of file +``` + +# CLI + +## Minimal Model Deployment + +If you haven't already installed the STACKn CLI, you can install it with pip: + +``` +pip install git+https://@github.com/scaleoutsystems/stackn@develop#subdirectory=cli +``` + +- Create a project + +``` +stackn create project -n demo +``` + +- Create a directory for your model + +``` +mkdir demo-model +cd demo-model +``` + +- Initialize the model with + +``` +stackn init +``` + +- Create the model and deploy it + +``` +stackn create model -n test-model -r minor +stackn create deployment -m test-model -d default-python +``` + +It will take a minute for the model to deploy. + +- Once it is ready, you can run a prediction + +``` +stackn predict -m test-model -v v0.1.0 -i '{"pred":"test"}' +``` + +- Alternatively you can create a lab session + +``` +stackn create lab -f large -e default +``` + +and then you can call the model endpoint from inside a notebook + +``` +from scaleout.auth import get_token +import requests +url = 'https://studio.scilifelab.stackn.dev/demo-cbn/serve/demo-model/v010/predict/' +token, config = get_token() +res = requests.post(url, headers={"Authorization": "Token "+token}, json={"pred":"test"}) +res.json() +```