From 64ea22b7f04266a0e3a6e0de7e44435d3c5594cd Mon Sep 17 00:00:00 2001 From: SkafteNicki Date: Wed, 27 Sep 2023 08:28:19 +0200 Subject: [PATCH] fix spelling mistakes --- .pre-commit-config.yaml | 6 ++++ projects.md | 2 +- pyproject.toml | 2 +- reports/README.md | 2 +- reports/report.py | 6 ++-- s10_extra/exercise_files/fashion_trainer.py | 2 +- s10_extra/high_performance_clusters.md | 12 ++++---- s10_extra/hyperparameters.md | 4 +-- s10_extra/kubernetes.md | 2 +- s10_extra/onnx.md | 2 +- s1_development_environment/command_line.md | 2 +- s1_development_environment/conda.md | 2 +- .../deep_learning_software.md | 4 +-- s1_development_environment/editor.md | 8 +++--- .../exercise_files/1_Tensors_in_PyTorch.ipynb | 2 +- .../2_Neural_Networks_in_PyTorch.ipynb | 4 +-- .../5_Inference_and_Validation.ipynb | 2 +- .../6_Saving_and_Loading_Models.ipynb | 2 +- .../exercise_files/fc_model.py | 2 +- .../code_structure.md | 4 +-- s2_organisation_and_version_control/dvc.md | 4 +-- s2_organisation_and_version_control/git.md | 8 +++--- .../good_coding_practice.md | 6 ++-- s3_reproducibility/config_files.md | 2 +- s3_reproducibility/docker.md | 6 ++-- s4_debugging_and_logging/boilerplate.md | 14 +++++----- s4_debugging_and_logging/debugging.md | 2 +- s4_debugging_and_logging/logging.md | 4 +-- s5_continuous_integration/README.md | 8 +++--- s5_continuous_integration/auto_docker.md | 4 +-- s5_continuous_integration/cml.md | 2 +- s5_continuous_integration/github_actions.md | 8 +++--- s5_continuous_integration/pre_commit.md | 2 +- s5_continuous_integration/unittesting.md | 6 ++-- s6_the_cloud/using_the_cloud.md | 4 +-- s7_deployment/apis.md | 28 +++++++++---------- s7_deployment/cloud_deployment.md | 2 +- s7_deployment/local_deployment.md | 4 +-- s8_monitoring/data_drifting.md | 2 +- s8_monitoring/monitoring.md | 2 +- s9_scalable_applications/data_loading.md | 4 +-- s9_scalable_applications/inference.md | 12 ++++---- timeplan.md | 6 ++-- 43 files changed, 109 insertions(+), 103 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bc4f23b17..623a0414a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,3 +20,9 @@ repos: hooks: - id: black name: Format code + + - repo: https://github.com/codespell-project/codespell + rev: v2.2.5 + hooks: + - id: codespell + additional_dependencies: [tomli] diff --git a/projects.md b/projects.md index 365384add..d9674846f 100644 --- a/projects.md +++ b/projects.md @@ -21,7 +21,7 @@ all the awesome packages that exist to extend the functionality of Pytorch. For choose between one of three such frameworks which will serve as the basis of your project. The three frameworks are: * [PyTorch Image Models](https://github.com/rwightman/pytorch-image-models). PyTorch Image Models (also known as TIMM) - is the absolutly most used computer vision package (maybe except for `torchvision`). It contains models, scripts and + is the absolutely most used computer vision package (maybe except for `torchvision`). It contains models, scripts and pre trained for a lot of state-of-the-art image models within computer vision. * [Transformers](https://github.com/huggingface/transformers). The Transformers repository from the Huggingface group diff --git a/pyproject.toml b/pyproject.toml index 879b71573..6f1261dea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,4 +27,4 @@ line-length = 120 exclude = "(.eggs|.git|.hg|.mypy_cache|.venv|_build|buck-out|build|dist)" [tool.codespell] -skip = "*.pdf" +skip = "*.pdf,*.ipynb" diff --git a/reports/README.md b/reports/README.md index 8ab477788..20d672a3c 100644 --- a/reports/README.md +++ b/reports/README.md @@ -144,7 +144,7 @@ end of the project. > > Example: > *We used ... for managing our dependencies. The list of dependencies was auto-generated using ... . To get a* -> *complete copy of our development enviroment, one would have to run the following commands* +> *complete copy of our development environment, one would have to run the following commands* > > Answer: diff --git a/reports/report.py b/reports/report.py index f90b2f87c..73ace1add 100644 --- a/reports/report.py +++ b/reports/report.py @@ -53,7 +53,7 @@ def check(): answers.append(per_question[-1]) answers = answers[1:] # remove first section - answers = [ans.strip("\n") for ans in answers] + answers = [answer.strip("\n") for answer in answers] def no_constraints(answer, index): pass @@ -124,8 +124,8 @@ def multi_constrains(answer, index, constrains): if len(answers) != 27: raise ValueError("Number of answers are different from the expected 27. Have you filled out every field?") - for i, (ans, const) in enumerate(zip(answers, question_constrains), start=1): - const(ans, i) + for i, (answer, const) in enumerate(zip(answers, question_constrains), start=1): + const(answer, i) if __name__ == "__main__": diff --git a/s10_extra/exercise_files/fashion_trainer.py b/s10_extra/exercise_files/fashion_trainer.py index 97bef6295..4b4cc92e7 100644 --- a/s10_extra/exercise_files/fashion_trainer.py +++ b/s10_extra/exercise_files/fashion_trainer.py @@ -102,7 +102,7 @@ def train_and_test(): for epoch in range(num_epochs): for batch_idx, (images, labels) in enumerate(train_loader): - # Transfering images and labels to GPU if available + # Transferring images and labels to GPU if available images, labels = images.to(device), labels.to(device) # Forward pass diff --git a/s10_extra/high_performance_clusters.md b/s10_extra/high_performance_clusters.md index 0da858788..66ca6e549 100644 --- a/s10_extra/high_performance_clusters.md +++ b/s10_extra/high_performance_clusters.md @@ -27,7 +27,7 @@ Tier, the larger applications it is possible to run. ## Cluster architectures In very general terms, cluster can come as two different kind of systems: supercomputers and LSF -(Load Sharing Facility). A supercomputer (as shown below) is organized into different modules, that are seperated by +(Load Sharing Facility). A supercomputer (as shown below) is organized into different modules, that are separated by network link. When you login to a supercomputer you will meet the front end which contains all the software needed to run computations. When you submit a job it will get sent to the backend modules which in most cases includes: general compute modules (CPU), acceleration modules (GPU), a memory module (RAM) and finally a storage module (HDD). Depending @@ -36,7 +36,7 @@ important but in physics simulation the general compute module / storage model i
![Image](../figures/meluxina_overview.png){ width="800" } -
Overview of the Meluxina supercomputer thats part of EuroHPC. +
Overview of the Meluxina supercomputer that's part of EuroHPC. Image credit
@@ -48,7 +48,7 @@ better to run on a LSF system if you are only requesting resources that can be h is better to run on a supercomputer if you have a resource intensive application that requires many devices to communicate with each others. -Regardless of cluster architechtures, on the software side of HPC, the most important part is whats called the +Regardless of cluster architectures, on the software side of HPC, the most important part is what's called the *HPC scheduler*. Without a HPC scheduler an HPC cluster would just be a bunch of servers with different jobs interfering with each other. The problem is when you have a large collection of resources and a large collection of users, you cannot rely on the users just running their applications without interfering with each other. A HPC scheduler @@ -113,7 +113,7 @@ of cluster. For the purpose of this exercise we are going to see how we can run using this [requirements file](https://github.com/SkafteNicki/dtu_mlops/tree/main/s10_extra/exercise_files/image_classifier_requirements.txt). -3. Thats all the setup needed. You would need to go through the creating of environment and installation of requirements +3. That's all the setup needed. You would need to go through the creating of environment and installation of requirements whenever you start a new project (no need for reinstalling conda). For the next step we need to look at how to submit jobs on the cluster. We are now ready to submit the our first job to the cluster: @@ -135,7 +135,7 @@ of cluster. For the purpose of this exercise we are going to see how we can run bsub < jobscript.sh ``` - You can check the status of your script by running the `bstat` command. Hopefully, the job should go trough + You can check the status of your script by running the `bstat` command. Hopefully, the job should go through really quickly. Take a look at the output file, it should be called something like `gpu_*.out`. Also take a look at the `gpu_*.err` file. Does both files look as they should? @@ -173,7 +173,7 @@ of cluster. For the purpose of this exercise we are going to see how we can run --trainer.accelerator 'gpu' --trainer.devices 1 --trainer.max_epochs 5 ``` - which will run the image classifier script (change it if you are runnning something else). + which will run the image classifier script (change it if you are running something else). 3. Finally submit the job: diff --git a/s10_extra/hyperparameters.md b/s10_extra/hyperparameters.md index 3d2884f20..c70e5f328 100644 --- a/s10_extra/hyperparameters.md +++ b/s10_extra/hyperparameters.md @@ -115,7 +115,7 @@ rest to a "recommended value". 4. If implemented correctly the number of hyperparameter combinations should be at least 1000, meaning that we not only need baysian optimization but probably also need pruning to succeed. Checkout the page for - [build-in pruners](https://optuna.readthedocs.io/en/stable/reference/pruners.html) in Optuna. Implement + [built-in pruners](https://optuna.readthedocs.io/en/stable/reference/pruners.html) in Optuna. Implement pruning in the script. I recommend using either the `MedianPruner` or the `ProcentilePruner`. 5. Re-run the study using pruning with a large number of trials (`n_trials>50`) @@ -182,6 +182,6 @@ rest to a "recommended value". 6. Finally, make sure that you can access the results -Thats all on how to do hyperparameter optimization in a scalable way. If you feel like it you can try to apply these +That's all on how to do hyperparameter optimization in a scalable way. If you feel like it you can try to apply these techniques on the ongoing corrupted MNIST example, where you are free to choose what hyperparameters that you want to use. diff --git a/s10_extra/kubernetes.md b/s10_extra/kubernetes.md index db1bcd73c..591e80388 100644 --- a/s10_extra/kubernetes.md +++ b/s10_extra/kubernetes.md @@ -7,7 +7,7 @@ !!! danger Module is still under development -## Kubernetes architechture +## Kubernetes architecture
![Image](../figures/components_of_kubernetes.png){ width="800" } diff --git a/s10_extra/onnx.md b/s10_extra/onnx.md index 7cc9a34d5..ac98e95fe 100644 --- a/s10_extra/onnx.md +++ b/s10_extra/onnx.md @@ -15,7 +15,7 @@ that datapoint. At a high-level, model predictions depends on three things: * The codebase that implements the models prediction method * The model weights which contains an actual instance of the model -* Code dependencies nessesary for running the codebase. +* Code dependencies necessary for running the codebase. We have already in module [M9 on Docker](../s3_reproducibility/docker.md) touch on how to take care of all these things. Containers makes it easy to link a codebase, model weights and code dependencies into a single object. diff --git a/s1_development_environment/command_line.md b/s1_development_environment/command_line.md index 0af55fb39..d365a12d6 100644 --- a/s1_development_environment/command_line.md +++ b/s1_development_environment/command_line.md @@ -35,7 +35,7 @@ As already stated, it is essentially just a big text interface to interact with when trying to execute a command, there are several parts to it: 1. The **prompt** is the part where you type your commands. It usually contains the name of the current directory you - are in, followed by some kind of sign: `$`, `>`, `:` are the usual onces. It can also contain other information, + are in, followed by some kind of sign: `$`, `>`, `:` are the usual ones. It can also contain other information, such as in the case of the above image it is also showing the current `conda` environment. 2. The **command** is the actual command you want to execute. For example, `ls` or `cd` 3. The **options** are additional arguments that you can pass to the command. For example, `ls -l` or `cd ..`. diff --git a/s1_development_environment/conda.md b/s1_development_environment/conda.md index 7008752ce..77f6c698d 100644 --- a/s1_development_environment/conda.md +++ b/s1_development_environment/conda.md @@ -1,6 +1,6 @@ ![Logo](../figures/icons/conda.png){ align=right width="130"} -# Conda and virtual enviroments +# Conda and virtual environments --- diff --git a/s1_development_environment/deep_learning_software.md b/s1_development_environment/deep_learning_software.md index 4f8e08635..c962a0338 100644 --- a/s1_development_environment/deep_learning_software.md +++ b/s1_development_environment/deep_learning_software.md @@ -153,7 +153,7 @@ corrupted version of regular mnist. Your overall task is the following: > **Implement a mnist neural network that achieves at least 85 % accuracy on the test set.** Before any training can start, you should identify what corruption that we have applied to the mnist dataset to -create the corrupted version. This should give you a clue about what network architechture to use. +create the corrupted version. This should give you a clue about what network architecture to use. One key point of this course is trying to stay organized. Spending time now organizing your code, will save time in the future as you start to add more and more features. As subgoals, please fulfill the following exercises @@ -177,7 +177,7 @@ To start you off, a very barebone version of each script is provided in the `fin implemented some logic, especially to make sure you can easily run different subcommands in for step 4. If you are interested in how this is done you can checkout this optional module on defining [command line interfaces (CLI)](../s10_extra/cli.md). We additionally also provide an `requirements.py` with -suggestion to what packages are nessesary to complete the exercise. +suggestion to what packages are necessary to complete the exercise. \ As documentation that your model is actually working, when running in the `train` command the script needs to diff --git a/s1_development_environment/editor.md b/s1_development_environment/editor.md index 4ee03eddb..e5c68c94a 100644 --- a/s1_development_environment/editor.md +++ b/s1_development_environment/editor.md @@ -40,7 +40,7 @@ The main components of VS code are: * The side bar: The side bar has different functionality depending on what extension that you have open. In most cases, the side bar will just contain the file explorer. -* The editor: This where you code is. VS code supports a number of layouts in the editor (one column, two column ect.). +* The editor: This where you code is. VS code supports a number of layouts in the editor (one column, two column etc.). You can make a custom layout by dragging a file to where you want the layout to split. * The panel: The panel contains a terminal for you to interact with. This can quickly be used to try out code by @@ -77,10 +77,10 @@ following exercises are just to get you started but you can find many more tutor which indicates that you are using the stock python installation, instead of the one you have created using `conda`. Click it and change the python environment to the one you actually want to use. -3. One of the most useful tools in VSCode is the ability to navigate a hole project using the build-in +3. One of the most useful tools in VSCode is the ability to navigate a hole project using the built-in `Explorer`. To really take advantage of the VS code you need to make sure what you are working on is a project. Create a folder called `hello` (somewhere on your laptop) and open it in VScode (Click `File` in the menu and then - select `Open Folder`). You should end up with a completly clean workspace (as shown below). Click the `New file` + select `Open Folder`). You should end up with a completely clean workspace (as shown below). Click the `New file` button and create a file called `hello.py`.
@@ -102,7 +102,7 @@ following exercises are just to get you started but you can find many more tutor * Select some code and right click, choosing to run in a interactive window (where you can interact with the results like in a jupyter notebook) -Thats, the basic of using VScode. We recommend highly that you revisit +That's, the basic of using VScode. We recommend highly that you revisit [this tutorial](https://code.visualstudio.com/docs/python/python-tutorial) during the course when we get to topics such as debugging and version control which VScode can help with. diff --git a/s1_development_environment/exercise_files/1_Tensors_in_PyTorch.ipynb b/s1_development_environment/exercise_files/1_Tensors_in_PyTorch.ipynb index a0cea03bf..12feb0bb5 100644 --- a/s1_development_environment/exercise_files/1_Tensors_in_PyTorch.ipynb +++ b/s1_development_environment/exercise_files/1_Tensors_in_PyTorch.ipynb @@ -154,7 +154,7 @@ "* `weights.reshape(a, b)` will return a new tensor with the same data as `weights` with size `(a, b)` sometimes, and sometimes a clone, as in it copies the data to another part of memory.\n", "* `weights.resize_(a, b)` returns the same tensor with a different shape. However, if the new shape results in fewer elements than the original tensor, some elements will be removed from the tensor (but not from memory). If the new shape results in more elements than the original tensor, new elements will be uninitialized in memory. Here I should note that the underscore at the end of the method denotes that this method is performed **in-place**. Here is a great forum thread to [read more about in-place operations](https://discuss.pytorch.org/t/what-is-in-place-operation/16244) in PyTorch.\n", "* `weights.view(a, b)` will return a new tensor with the same data as `weights` with size `(a, b)`.\n", - "* `torch.transpose(weights,0,1)` will return transposed weights tensor. This returns transposed version of inpjut tensor along dim 0 and dim 1. This is efficient since we do not specify to actual dimesions of weights.\n", + "* `torch.transpose(weights,0,1)` will return transposed weights tensor. This returns transposed version of inpjut tensor along dim 0 and dim 1. This is efficient since we do not specify to actual dimensions of weights.\n", "\n", "I usually use `.view()`, but any of the three methods will work for this. So, now we can reshape `weights` to have five rows and one column with something like `weights.view(5, 1)`.\n", "\n", diff --git a/s1_development_environment/exercise_files/2_Neural_Networks_in_PyTorch.ipynb b/s1_development_environment/exercise_files/2_Neural_Networks_in_PyTorch.ipynb index 03daf66ab..d40489aa5 100644 --- a/s1_development_environment/exercise_files/2_Neural_Networks_in_PyTorch.ipynb +++ b/s1_development_environment/exercise_files/2_Neural_Networks_in_PyTorch.ipynb @@ -215,7 +215,7 @@ "\\Large \\sigma(x_i) = \\cfrac{e^{x_i}}{\\sum_k^K{e^{x_k}}}\n", "$$\n", "\n", - "What this does is squish each input $x_i$ between 0 and 1 and normalizes the values to give you a proper probability distribution where the probabilites sum up to one.\n", + "What this does is squish each input $x_i$ between 0 and 1 and normalizes the values to give you a proper probability distribution where the probabilities sum up to one.\n", "\n", "> **Exercise:** Implement a function `softmax` that performs the softmax calculation and returns probability distributions for each example in the batch. Note that you'll need to pay attention to the shapes when doing this. If you have a tensor `a` with shape `(64, 10)` and a tensor `b` with shape `(64,)`, doing `a/b` will give you an error because PyTorch will try to do the division across the columns (called broadcasting) but you'll get a size mismatch. The way to think about this is for each of the 64 examples, you only want to divide by one value, the sum in the denominator. So you need `b` to have a shape of `(64, 1)`. This way PyTorch will divide the 10 values in each row of `a` by the one value in each row of `b`. Pay attention to how you take the sum as well. You'll need to define the `dim` keyword in `torch.sum`. Setting `dim=0` takes the sum across the rows while `dim=1` takes the sum across the columns." ] @@ -240,7 +240,7 @@ "def softmax(x):\n", " ## TODO: Implement the softmax function here\n", "\n", - "# Here, out should be the output of the network in the previous excercise with shape (64,10)\n", + "# Here, out should be the output of the network in the previous exercise with shape (64,10)\n", "probabilities = softmax(out)\n", "\n", "# Does it have the right shape? Should be (64, 10)\n", diff --git a/s1_development_environment/exercise_files/5_Inference_and_Validation.ipynb b/s1_development_environment/exercise_files/5_Inference_and_Validation.ipynb index 30637784e..39641d18c 100644 --- a/s1_development_environment/exercise_files/5_Inference_and_Validation.ipynb +++ b/s1_development_environment/exercise_files/5_Inference_and_Validation.ipynb @@ -388,7 +388,7 @@ "source": [ "## Next Up!\n", "\n", - "In the next part, I'll show you how to save your trained models. In general, you won't want to train a model everytime you need it. Instead, you'll train once, save it, then load the model when you want to train more or use if for inference." + "In the next part, I'll show you how to save your trained models. In general, you won't want to train a model every time you need it. Instead, you'll train once, save it, then load the model when you want to train more or use if for inference." ] } ], diff --git a/s1_development_environment/exercise_files/6_Saving_and_Loading_Models.ipynb b/s1_development_environment/exercise_files/6_Saving_and_Loading_Models.ipynb index 3bf723641..2f7fb7195 100644 --- a/s1_development_environment/exercise_files/6_Saving_and_Loading_Models.ipynb +++ b/s1_development_environment/exercise_files/6_Saving_and_Loading_Models.ipynb @@ -206,7 +206,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This means we need to rebuild the model exactly as it was when trained. Information about the model architecture needs to be saved in the checkpoint, along with the state dict. To do this, you build a dictionary with all the information you need to compeletely rebuild the model." + "This means we need to rebuild the model exactly as it was when trained. Information about the model architecture needs to be saved in the checkpoint, along with the state dict. To do this, you build a dictionary with all the information you need to completely rebuild the model." ] }, { diff --git a/s1_development_environment/exercise_files/fc_model.py b/s1_development_environment/exercise_files/fc_model.py index 811a8f48b..962a4c05a 100644 --- a/s1_development_environment/exercise_files/fc_model.py +++ b/s1_development_environment/exercise_files/fc_model.py @@ -43,7 +43,7 @@ def validation(model, testloader, criterion): Arguments: model: torch network testloader: torch.utils.data.DataLoader, dataloader of test set - criterion: loss funtion + criterion: loss function """ accuracy = 0 test_loss = 0 diff --git a/s2_organisation_and_version_control/code_structure.md b/s2_organisation_and_version_control/code_structure.md index 9362ff660..d57ae512e 100644 --- a/s2_organisation_and_version_control/code_structure.md +++ b/s2_organisation_and_version_control/code_structure.md @@ -58,7 +58,7 @@ run a file I recommend always doing this from the root directory e.g. ```bash python src/data/make_dataset.py data/raw data/processed python src/models/train_model.py -ect... +etc... ``` in this way paths (for saving and loading files) are always relative to the root. @@ -157,7 +157,7 @@ in this way paths (for saving and loading files) are always relative to the root That ends the module on code structure and `cookiecutter`. We again want to stress the point that `cookiecutter` is just one template for organizing your code. What often happens in a team is that multiple templates are needed in -different stages of the development phase or for different product types because they share commen structure, while +different stages of the development phase or for different product types because they share common structure, while still having some specifics. Keeping templates up-to-date then becomes critical such that no team member is using an outdated template. If you ever end up in this situation, we highly recommend to checkout [cruft](https://github.com/cruft/cruft) that works alongside `cookiecutter` to not only make projects but update diff --git a/s2_organisation_and_version_control/dvc.md b/s2_organisation_and_version_control/dvc.md index b260b243d..9c3927e89 100644 --- a/s2_organisation_and_version_control/dvc.md +++ b/s2_organisation_and_version_control/dvc.md @@ -150,7 +150,7 @@ it contains excellent tutorials. than your standard code) to make sure that the two commands indeed downloads both your code and data. 10. Lets look about the process of updating our data. Remember the important aspect of version control is that we do not - need to store explicit files called `data_v1.pt`, `data_v2.pt` ect. but just have a single `data.pt` that where we + need to store explicit files called `data_v1.pt`, `data_v2.pt` etc. but just have a single `data.pt` that where we can always checkout earlier versions. Initially start by copying the data `data/corruptmnist_v2` folder from this repository to your MNIST code. This contains 3 extra datafiles with 15000 additional observations. Rerun your data pipeline so these gets incorporated into the files in your `processed` folder. @@ -175,7 +175,7 @@ it contains excellent tutorials. version control it, store it online and make it easy for other to download. Feel free to experiment with this using your own model checkpoints. -Thats all for today. With the combined power of `git` and `dvc` we should be able to version control everything in +That's all for today. With the combined power of `git` and `dvc` we should be able to version control everything in our development pipeline such that no changes are lost (assuming we commit regularly). It should be noted that `dvc` offers such more than just data version control, so if you want to deep dive into `dvc` we recommend their [pipeline](https://dvc.org/doc/user-guide/project-structure/pipelines-files) feature and how this can be used to setup diff --git a/s2_organisation_and_version_control/git.md b/s2_organisation_and_version_control/git.md index 741e84b7a..3ccda1da7 100644 --- a/s2_organisation_and_version_control/git.md +++ b/s2_organisation_and_version_control/git.md @@ -142,7 +142,7 @@ working together on the same project. I am continuously updating/changing some of the material during the course and I therefore recommend that you each day before the lecture do a `git pull` on your local copy -4. Git may seem like a waste of time when solutions like dropbox, google drive ect exist, and it is +4. Git may seem like a waste of time when solutions like dropbox, google drive etc exist, and it is not completely untrue when you are only one or two working on a project. However, these file management systems falls short when hundreds to thousands of people work together. For this exercise you will go through the steps of sending an open-source contribution: @@ -164,7 +164,7 @@ working together on the same project. 4. You are now ready to make changes to the repository. Try to find something to improve (any spelling mistakes?). When you have made the changes, do the standard git cycle: `add -> commit -> push` - 5. Go online to the original repository and go the `Pull requests` tab. Find `compare` botton and + 5. Go online to the original repository and go the `Pull requests` tab. Find `compare` button and choose the to compare the `master branch` of the original repo with the branch that you just created in your own repository. Check the diff on the page to make sure that it contains the changes you have made. @@ -206,7 +206,7 @@ working together on the same project. >>>>>>> master ``` - this should be interpret as: everything thats between `<<<<<<<` and `=======` are the changes made by your + this should be interpret as: everything that's between `<<<<<<<` and `=======` are the changes made by your local commit and everything between `=======` and `>>>>>>>` are the changes you are trying to pull. To fix the merge conflict you simply have to make the code in the two "cells" work together. When you are done, remove the identifiers `<<<<<<<`, `=======` and `>>>>>>>`. @@ -270,7 +270,7 @@ That covers the basics of git to get you started. In the exercise folder you can with the most useful commands for future reference. Finally, we want to point out another awesome feature of Github: in browser editor. Sometimes you have a small edit that you want to make, but still would like to do this in a IDE/editor. Or you may be in the situation where you are working from another device than your usual developer machine. -Github has an build-in editor that can simply be enabled by changing any URL from +Github has an built-in editor that can simply be enabled by changing any URL from ```bash https://github.com/username/repository diff --git a/s2_organisation_and_version_control/good_coding_practice.md b/s2_organisation_and_version_control/good_coding_practice.md index cc95769cf..3241a3250 100644 --- a/s2_organisation_and_version_control/good_coding_practice.md +++ b/s2_organisation_and_version_control/good_coding_practice.md @@ -11,7 +11,7 @@ To understand what good coding practice is, it is important to understand what it is *not*: * Making sure your code run fast -* Making sure that you use a specific coding paradigm (object orientated programming ect.) +* Making sure that you use a specific coding paradigm (object orientated programming etc.) * Making sure to only use few dependencies Instead good coding practices really comes down to two topics: documentation and styling. @@ -150,7 +150,7 @@ max-line-length = 100 In addition to writing documentation and following a specific styling, in python we have a third way of improving the quality of our code: [through typing](https://docs.python.org/3/library/typing.html). Typing goes back to the earlier -programming languages like `c`, `c++` ect. where [data types](https://www.scaler.com/topics/cpp/data-types-in-cpp/) +programming languages like `c`, `c++` etc. where [data types](https://www.scaler.com/topics/cpp/data-types-in-cpp/) needed to be explicit stated for variables: ```cpp @@ -198,7 +198,7 @@ different. -Finally, since this is a very generic function it also works on `numpy` arrays ect. we can always default to the `Any` +Finally, since this is a very generic function it also works on `numpy` arrays etc. we can always default to the `Any` type if we are not sure about all the specific types that a function can take ```python diff --git a/s3_reproducibility/config_files.md b/s3_reproducibility/config_files.md index 5697ceb34..677672bf1 100644 --- a/s3_reproducibility/config_files.md +++ b/s3_reproducibility/config_files.md @@ -46,7 +46,7 @@ an [argument parser](https://docs.python.org/3/library/argparse.html) e.g. run e python train.py --batch_size 256 --learning_rate 1e-4 --other_hp 12345 ``` -This at least solves the problem with configurability. However, we again can end up with loosing experiments if we are +This at least solves the problem with configurability. However, we again can end up with losing experiments if we are not careful. What we really want is some way to easy configure our experiments where the hyperparameters are systematically saved diff --git a/s3_reproducibility/docker.md b/s3_reproducibility/docker.md index 20a81bdcb..165727635 100644 --- a/s3_reproducibility/docker.md +++ b/s3_reproducibility/docker.md @@ -22,7 +22,7 @@ To really get reproducibility we need to also capture also system level componen * software dependencies (other than python packages) Docker provides this kind of system-level reproducibility by creating isolated programs dependencies. In addition to -docker providing reproducibility, one of the key features are also scaleability which is important when we later on +docker providing reproducibility, one of the key features are also scalability which is important when we later on are going to discuss deployment. Because docker is system-level reproducible, it does not (conceptually) matter if we try to start our program on a single machine or a 1000 machines at once. @@ -229,7 +229,7 @@ beneficial for you to download. ??? warning "MAC M1/M2 users" There is a good chance that it docker build will not work out of the box for you, because M1/M2 chips use - another build architechture. Thus you need to specify the platform that you want to build for. This can be + another build architecture. Thus you need to specify the platform that you want to build for. This can be done by adding the following to your `FROM` statement: ```docker @@ -335,7 +335,7 @@ beneficial for you to download. docker pull nvidia/cuda:11.0.3-base-ubuntu20.04 ``` - but it may differ based on what cuda vision you have. You can find all the different offical Nvidia images + but it may differ based on what cuda vision you have. You can find all the different official Nvidia images [here](https://hub.docker.com/r/nvidia/cuda). After pulling the image, try running the `nvidia-smi` command inside a container based on the image you just pulled. It should look something like this: diff --git a/s4_debugging_and_logging/boilerplate.md b/s4_debugging_and_logging/boilerplate.md index f0be53b68..04c8b5d4b 100644 --- a/s4_debugging_and_logging/boilerplate.md +++ b/s4_debugging_and_logging/boilerplate.md @@ -78,9 +78,9 @@ trainer = Trainer() traier.fit(model) ``` -Thats is essentially all that you need to specify in lightning to have a working model. The trainer object does not +That's is essentially all that you need to specify in lightning to have a working model. The trainer object does not have methods that you need to implement yourself, but it have a bunch of arguments that can be used to control how many -epochs that you want to train, if you want to run on gpu ect. To get the training of our model to work we just need to +epochs that you want to train, if you want to run on gpu etc. To get the training of our model to work we just need to specify how our data should be feed into the lighning framework. ### Data @@ -102,7 +102,7 @@ all three assume that we are using `torch.utils.data.DataLoader` for the dataloa return DataLoader(...) ``` -2. Maybe even simplier, we can directly feed such dataloaders in the `fit` method of the `Trainer` object: +2. Maybe even simpler, we can directly feed such dataloaders in the `fit` method of the `Trainer` object: ```python trainer.fit(model, train_dataloader, val_dataloader) @@ -122,9 +122,9 @@ use one of the [build in callbacks](https://pytorch-lightning.readthedocs.io/en/latest/extensions/callbacks.html#built-in-callbacks). Of particular interest are `ModelCheckpoint` and `EarlyStopping` callbacks: -* The `ModelCheckpoint` makes sure to save checkpoints of you model. This is in pricipal not hard to do yourself, but +* The `ModelCheckpoint` makes sure to save checkpoints of you model. This is in principal not hard to do yourself, but the `ModelCheckpoint` callback offers additional functionality by saving checkpoints only when some metric improves, - or only save the best `K` performing models ect. + or only save the best `K` performing models etc. ```python model = MyModel() @@ -160,7 +160,7 @@ lightning standard, such that we can take advantage of all the tricks the framew implement our model in `lightning` to begin with, is that to truly understand why it is beneficially to use a high-level framework to do some of the heavy lifting you need to have gone through some of implementation troubles yourself. -1. Convert your corrupted MNIST model into a `LightningModule`. You can either choose to completly override your old +1. Convert your corrupted MNIST model into a `LightningModule`. You can either choose to completely override your old model or implement it in a new file. The bare minimum that you need to add while converting to get it working with the rest of lightning: @@ -244,7 +244,7 @@ framework to do some of the heavy lifting you need to have gone through some of Pytorch lightning. You can enable this by setting the [precision](https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html#precision) flag in the `Trainer`. -10. (Optional) Lightning also have build-in support for profiling. Checkout how to do this using the +10. (Optional) Lightning also have built-in support for profiling. Checkout how to do this using the [profiler](https://pytorch-lightning.readthedocs.io/en/latest/tuning/profiler.html) argument in the `Trainer` object. diff --git a/s4_debugging_and_logging/debugging.md b/s4_debugging_and_logging/debugging.md index cc450bfe3..2f762a742 100644 --- a/s4_debugging_and_logging/debugging.md +++ b/s4_debugging_and_logging/debugging.md @@ -7,7 +7,7 @@ Debugging is very hard to teach and is one of the skills that just comes with experience. That said, there are good and bad ways to debug a program. We are all probably familiar with just inserting `print(...)` statements everywhere in our code. It is easy and can many times help narrow down where the problem happens. That said, this is not a great -way of debugging when dealing with a very large codebase. You should therefore familiarize yourself with the build-in +way of debugging when dealing with a very large codebase. You should therefore familiarize yourself with the built-in [python debugger](https://docs.python.org/3/library/pdb.html) as it may come in handy during the course.
diff --git a/s4_debugging_and_logging/logging.md b/s4_debugging_and_logging/logging.md index 06e9e30e9..55e1a1b25 100644 --- a/s4_debugging_and_logging/logging.md +++ b/s4_debugging_and_logging/logging.md @@ -60,7 +60,7 @@ collaboration and sharing of results. logged. 6. Now log something else than scalar values. This could be a image, a histogram or a matplotlib figure. In all - cases the logging is still going to use `wandb.log` but you need extra calls to `wandb.Image` ect. depending + cases the logging is still going to use `wandb.log` but you need extra calls to `wandb.Image` etc. depending on what you choose to log. 7. Finally, lets create a report that you can share. Click the **Create report** button and include some of the @@ -104,7 +104,7 @@ collaboration and sharing of results. please take a look at the script being copied into the image and afterwards build the docker image. 3. When we want to run the image, what we need to do is including a environment variables that contains the API key - we generated. This will then autheticate the docker container with the wandb server: + we generated. This will then authenticate the docker container with the wandb server: ```bash docker run -e WANDB_API_KEY= wandb:latest diff --git a/s5_continuous_integration/README.md b/s5_continuous_integration/README.md index 502d227d6..9a5c9015f 100644 --- a/s5_continuous_integration/README.md +++ b/s5_continuous_integration/README.md @@ -32,7 +32,7 @@ to the end of the pipeline. This is where *continuous X* comes into play. The word *continuous* here refer to the fact that the pipeline should *continuously* be updated as we make code changes. You can also choose to think of this as *automatization* of processes. The *X* then covers that the process we need to go through to -automatize steps in the pipeline, depends on where we are in the pipeline e.g. the tools needed to +automate steps in the pipeline, depends on where we are in the pipeline e.g. the tools needed to do continuous integration is different from the tools need to do continuous delivery. In this session we are going to focus on *continuous integration (CI)*. As indicated in the image above, CI usually @@ -40,7 +40,7 @@ takes care of the first part of the developer pipeline that has to do with the c testing. In particular, in this module we are going to take a closer look at these questions: * How to write unittests for our applications -* How to automatize tests being run on code changes +* How to automate tests being run on code changes * How to secure we do not commit code that does not follow our code standards -* How we can automatize building of docker images -* How we can automatize training of our machine learning pipeline +* How we can automate building of docker images +* How we can automate training of our machine learning pipeline diff --git a/s5_continuous_integration/auto_docker.md b/s5_continuous_integration/auto_docker.md index 84dcf9d85..d8d5b6434 100644 --- a/s5_continuous_integration/auto_docker.md +++ b/s5_continuous_integration/auto_docker.md @@ -8,7 +8,7 @@ The Github Actions we learned about in [M16](github_actions.md) are an powerful than simply running our tests tests that we write for our application. In this module we are going to look at how we can use it for continuously building docker images. As you have already seen docker building can take a couple of minutes to build each time we do changes to our code base. For this reason we really just want to build a new image every time -we do a commit of our code. Thus, it should come as no surprise that we can also automatize the building process and +we do a commit of our code. Thus, it should come as no surprise that we can also automate the building process and furthermore we can take advantage of online compute power to parallelize the process. As discussed in the initial module on [docker](../s3_reproducibility/docker.md), @@ -76,7 +76,7 @@ not store our data in Github, we cannot copy it during the build process. 6. Upload the workflow to your github repository and check that it is being executed. If everything you should be able to see the the build docker image in your container repository in docker hub. -7. Make sure that you can execute `docker pull` locally to pull down the image that you just continuesly build +7. Make sure that you can execute `docker pull` locally to pull down the image that you just continuously build 8. (Optional) To test that the container works directly in github you can also try to include an additional step that actually runs the container. diff --git a/s5_continuous_integration/cml.md b/s5_continuous_integration/cml.md index b31f86096..60f4ab9ba 100644 --- a/s5_continuous_integration/cml.md +++ b/s5_continuous_integration/cml.md @@ -104,7 +104,7 @@ after the run is done. cml-send-comment report.md ``` - Nearly everything in the workflow file should look familar, except the last two lines. + Nearly everything in the workflow file should look familiar, except the last two lines. 3. Try pushing the workflow file to your github repository and make sure that it completes. If it does not, you may need to adjust the workflow file slightly. diff --git a/s5_continuous_integration/github_actions.md b/s5_continuous_integration/github_actions.md index 90c2b496c..7f3827de9 100644 --- a/s5_continuous_integration/github_actions.md +++ b/s5_continuous_integration/github_actions.md @@ -13,8 +13,8 @@ because * You need to run it often to make sure to catch bugs early on * If you want to have high code coverage of your code base, you will need many tests that takes a long time to run -For these reasons we want to automatize the testing, such that it done every time we push to our repository. If we -combine this with only pushing to branches and then only merging these branches whenever all automatized testing have +For these reasons we want to automate the testing, such that it done every time we push to our repository. If we +combine this with only pushing to branches and then only merging these branches whenever all automated testing have passed, our code should be fairly safe against unwanted bugs (assuming your tests are well covering your code). ## Github actions @@ -28,7 +28,7 @@ Lets take a look at how a github workflow file is organized: * Initially we start by giving the workflow a `name` * Next we specify on what events the workflow should be triggered. This includes both the action - (pull request, push ect) and on what branches is should activate + (pull request, push etc) and on what branches is should activate * Next we list the jobs that we want to do. Jobs are by default executed in parallel but can also be dependent on each other * In the `runs-on` we can specify which operation system we want the workflow to run on. We also @@ -124,7 +124,7 @@ Lets take a look at how a github workflow file is organized: 2. To your main/master branch add the following rules: - * Atleast one person needs to approve any PR + * At least one person needs to approve any PR * All your workflows has to pass * All conversations needs to be resolved diff --git a/s5_continuous_integration/pre_commit.md b/s5_continuous_integration/pre_commit.md index 7fc31e27b..552e11439 100644 --- a/s5_continuous_integration/pre_commit.md +++ b/s5_continuous_integration/pre_commit.md @@ -98,5 +98,5 @@ this will make sure that the file is automatically executed whenever we run `git 7. Finally, figure out how to disable `pre-commit` again. -That was all about how `pre-commit` can be used to automatize tasks. If you want to deep dive more into the topic you +That was all about how `pre-commit` can be used to automate tasks. If you want to deep dive more into the topic you can checkout this [page](https://pre-commit.com/#python) on how to define your own `pre-commit` hooks. diff --git a/s5_continuous_integration/unittesting.md b/s5_continuous_integration/unittesting.md index 41319f95b..5bea2602a 100644 --- a/s5_continuous_integration/unittesting.md +++ b/s5_continuous_integration/unittesting.md @@ -8,7 +8,7 @@ What often comes to mind for many developers, when discussing continuous integration (CI) is code testing. CI should secure that whenever a codebase is updated it is automatically tested such that if bugs have been -introduced in the codebase it will be catched early on. If you look at the +introduced in the codebase it will be caught early on. If you look at the [MLOps cyclepipeline](../figures/mlops-loop-en.jpg), CI is one of cornerstones of operations part. However, it should be notes that applying CI does not magically secure that your code does not break. CI is only as strong as the tests that are automatically executed. CI simply structures and automates this. @@ -34,7 +34,7 @@ important but we are not going to focus on it in this course. ## Pytest -Before we can begin to automatize testing of our code base we of course need to write the tests first. It is both a hard +Before we can begin to automate testing of our code base we of course need to write the tests first. It is both a hard and tedious task to do but arguable the most important aspects of CI. Python offers a couple of different libraries for writing tests. We are going to use `pytest`. @@ -234,5 +234,5 @@ features). Another open-source framework that you could choose to checkout is [hypothesis](https://github.com/HypothesisWorks/hypothesis) that can really help catch errors in corner cases of your code. In addition to writing unittests it is also highly recommended to test code that you include in your docstring belonging to your functions and modulus to make sure that any code there is in your documentation is also -correct. For such testing we can highly recommend using pythons build-in framework +correct. For such testing we can highly recommend using pythons built-in framework [doctest](https://docs.python.org/3/library/doctest.html). diff --git a/s6_the_cloud/using_the_cloud.md b/s6_the_cloud/using_the_cloud.md index c701cfca6..d9c969346 100644 --- a/s6_the_cloud/using_the_cloud.md +++ b/s6_the_cloud/using_the_cloud.md @@ -382,7 +382,7 @@ parts of our pipeline. docker push gcr.io//gcp_vm_tester ``` - confirm by going to the container registry in the cloud consol and check that the image has been correctly + confirm by going to the container registry in the cloud console and check that the image has been correctly pushed. 3. Lets then create a VM with that particular docker image. Instead of using `gcloud compute instances create` we @@ -403,7 +403,7 @@ parts of our pipeline. 3. We are now moving on to the final way to train our code, using `Vertex AI` service. - 1. Start by enabling it by searching for `Vertex AI` in the cloud consol and go to the service + 1. Start by enabling it by searching for `Vertex AI` in the cloud console and go to the service 2. The way we are going to use Vertex AI is to create custom jobs because we have already developed docker containers that contains everything to run our code. Thus the only command that we actually need to use is diff --git a/s7_deployment/apis.md b/s7_deployment/apis.md index ca1907c3f..620201975 100644 --- a/s7_deployment/apis.md +++ b/s7_deployment/apis.md @@ -7,7 +7,7 @@ !!! info "Core Module" Before we can get deployment of our models we need to understand concepts such as APIs and requests. The core reason -for this is that we need a new abstraction layer ontop of our applications that are not python specific. While Python +for this is that we need a new abstraction layer on top of our applications that are not python specific. While Python is the defacto language for machine learning, we cannot expect everybody else to use it and in particular we cannot expect network protocols (both locally and external) to be able to communicate with our python programs out of the box. For this reason we need to understand requests, in particular HTTP requests and how to create APIs that can interact @@ -16,7 +16,7 @@ with those requests. ## Requests When we are talking about requests, we are essentially talking about the communication method used in client-server -types of architechtures. As shown in the image below, in this architechture the client (user) is going to send +types of architectures. As shown in the image below, in this architecture the client (user) is going to send *requests* to a server (our machine learning application) and the server will give a *response*. For example the user may send a request of getting the class of a specific image, which our application will do and then send back the response in terms of a label. @@ -96,7 +96,7 @@ We are going to do a couple of exercises on sending requests using client to the server or vice-versa. Try looking at the `response.content` attribute. What is the type of this attribute? -5. You should hopefully observe that the `.content` atttribute is of type `bytes`. It is important to note that this is +5. You should hopefully observe that the `.content` attribute is of type `bytes`. It is important to note that this is the standard way of sending payloads to encode them into `byte` objects. To get a more human readable version of the response, we can convert to [JSON](https://www.json.org/json-en.html) format @@ -145,7 +145,7 @@ We are going to do a couple of exercises on sending requests using response = requests.post('https://httpbin.org/post', data = pload) ``` - Investigate the response (this is an artifical example, because we actually does not control the server). + Investigate the response (this is an artificial example, because we actually does not control the server). 9. Finally, we should also know that requests can be send directly from the command line using the `curl` command. Sometimes it is easier to send a request directly from the terminal and sometimes it is easier to do directly from @@ -165,7 +165,7 @@ We are going to do a couple of exercises on sending requests using 3. Try to redo some of the exercise yourself using `curl`. -That ends the intro session on `requests`. Do not worry if you are still not completly comfortable with sending +That ends the intro session on `requests`. Do not worry if you are still not completely comfortable with sending requests, we are going to return do how we do it in practise when we have actually created our own API. If you want to learn more about the `requests` package you can checkout [this tutorial](https://realpython.com/python-requests/) and if you want to see more example on how to use `curl` you can checkout @@ -173,7 +173,7 @@ if you want to see more example on how to use `curl` you can checkout ## Creating APIs -Requests are all about being on the client side of our client-server architechture. We are now going to move on to the +Requests are all about being on the client side of our client-server architecture. We are now going to move on to the server side where we will be learning about writing the APIs that requests can interact with. An application programming interface (API) is essentially the way of the developer (you) telling a user how to use the application that you have created. The API is an abstraction layer that allows the user to interact with our application in the way we want them @@ -191,9 +191,9 @@ and we could go on. However, there may be functionality that github is not inter they may therefore choose not to have endpoints for specific features. The particular kind of API we are going to work with is called REST API (or RESTful API). The REST API specify specific -contrains that a particular API needs to fullfill to be considered RESTful. You can read more about what the six +constraints that a particular API needs to fulfill to be considered RESTful. You can read more about what the six guiding principals behind REST API is [on this page](https://restfulapi.net/) but one of the most important to have in -mind is that the client-server architechture needs to be stateless. This means that whenever a request is send to the +mind is that the client-server architecture needs to be stateless. This means that whenever a request is send to the server it needs to be self-contained (all information included) and the server cannot rely on any previously stored information from previous requests. @@ -217,7 +217,7 @@ you can look through for help. pip install fastapi ``` - This contais the functions, modules, and variables we are going to need to define our interface. + This contains the functions, modules, and variables we are going to need to define our interface. 2. Additionally, also install `uvicorn` which is a package for defining low level server applications. @@ -283,7 +283,7 @@ you can look through for help. 1. Lets start by changing the root function to include a bit more info. In particular we are also interested in returning the status code so the end user can easily read that. Default status codes are included in the - [http](https://docs.python.org/3/library/http.html) build-in python package: + [http](https://docs.python.org/3/library/http.html) built-in python package: ```python from http import HTTPStatus @@ -321,7 +321,7 @@ you can look through for help. Add this API, reload and execute both a valid parameter and a non-valid parameter. - 3. In contrast to path parameters we have query parameters. In the requests exersices we saw an example of this + 3. In contrast to path parameters we have query parameters. In the requests exercises we saw an example of this where we were calling with the query `'q': 'requests+language:python'`. Any parameter in FastAPI that is not a path parameter, will be considered a query parameter: @@ -415,7 +415,7 @@ you can look through for help. ``` A couple of new things are going on here: we use the specialized `UploadFile` and `File` bodies in our input - definition. Addtionally, we added the `async`/`await` keywords. Figure out what everything does and try to run + definition. Additionally, we added the `async`/`await` keywords. Figure out what everything does and try to run the application (you can use any image file you like). 4. The above application actually does not do anything. Lets add [opencv](https://pypi.org/project/opencv-python/) @@ -452,7 +452,7 @@ you can look through for help. ``` returns a list of strings like `['a cat laying on a couch with a stuffed animal']` (try this yourself). Create a - FastAPI application that can do inference using this model e.g. it should take in an image, preferrably an optional + FastAPI application that can do inference using this model e.g. it should take in an image, preferably an optional `json` object for configuring some of the hyperparameters (like `max_length`) and should return a string containing the generated caption. @@ -490,7 +490,7 @@ you can look through for help. application. For the following you can take whatever previous FastAPI application as the base application for the container - 1. Start by creating a `requirement.txt` file for you application. You will atleast need `fastapi` and `uvicorn` in + 1. Start by creating a `requirement.txt` file for you application. You will at least need `fastapi` and `uvicorn` in the file and we as always recommend that you are specific about the version you want to use: ```txt diff --git a/s7_deployment/cloud_deployment.md b/s7_deployment/cloud_deployment.md index 400617495..11a0a78f9 100644 --- a/s7_deployment/cloud_deployment.md +++ b/s7_deployment/cloud_deployment.md @@ -161,7 +161,7 @@ service in GCP for deploying containers. afterwards check you container registry to check that you have successfully pushed the image. -2. Next go to `Cloud Run` in the cloud consol an enable the service +2. Next go to `Cloud Run` in the cloud console an enable the service 3. Click the `Create Service` button which should bring you to a page similar to the one below diff --git a/s7_deployment/local_deployment.md b/s7_deployment/local_deployment.md index b2fb0c466..c50e6e340 100644 --- a/s7_deployment/local_deployment.md +++ b/s7_deployment/local_deployment.md @@ -68,7 +68,7 @@ We are here going to look at `torch.jit.script` for compiling our code. Hint: use [torch.topk](https://pytorch.org/docs/stable/generated/torch.topk.html). -4. Finally, try benchmarking the non-scripted model against the scripted model. I recommend using the build-in +4. Finally, try benchmarking the non-scripted model against the scripted model. I recommend using the built-in benchmarker in Pytorch: `torch.utils.benchmark.Timer`, which you can read more about how to use [here](https://pytorch.org/tutorials/recipes/recipes/benchmark.html). Do you see a increase in performance of the scripted model compared to the non-scriptet model. If so, what is the percentage increase in efficiency? @@ -86,7 +86,7 @@ services for packaging and serving multiple Pytorch at the same time. Before we go into details of Torchmetrics, an important question is why we need such an abstraction on top of our -developed model. Why cant we just do: +developed model. Why can't we just do: ```bash python inference.py --my_model model_checkpoint.pt --new_datapoint img.png diff --git a/s8_monitoring/data_drifting.md b/s8_monitoring/data_drifting.md index 1c7070eda..08717e4cc 100644 --- a/s8_monitoring/data_drifting.md +++ b/s8_monitoring/data_drifting.md @@ -281,7 +281,7 @@ should actually respond to feature beginning to drift and when it is probably fi application what kind of rules that should be implemented. Additionally, the tools presented here are also in no way complete and are especially limited in one way: they are only considering the marginal distribution of data. Every analysis that we done have been on the distribution per feature (the marginal distribution), however as the image below -show it is possible for data to have drifted to another distribution with the marginal being approximatively the same. +show it is possible for data to have drifted to another distribution with the marginal being approximately the same.
![Image](../figures/data_drift_marginals.png){width="500"} diff --git a/s8_monitoring/monitoring.md b/s8_monitoring/monitoring.md index 805ef4e28..e33e9e162 100644 --- a/s8_monitoring/monitoring.md +++ b/s8_monitoring/monitoring.md @@ -43,7 +43,7 @@ metric/telemetry. We commonly run into what is referred to the [goldielock problem](https://en.wikipedia.org/wiki/Goldilocks_principle) where we want just the *right amount* of alerts however it is more often the case that we either have -* Too many alerts, such that they become irrelevant and the really important onces are overseen, often referred to as +* Too many alerts, such that they become irrelevant and the really important ones are overseen, often referred to as alert fatigue * Or alternatively, we have too little alerts and problems that should have triggered an alert is not dealt with when they happen which can have unforeseen consequences. diff --git a/s9_scalable_applications/data_loading.md b/s9_scalable_applications/data_loading.md index 140c3ec64..2b0df0ded 100644 --- a/s9_scalable_applications/data_loading.md +++ b/s9_scalable_applications/data_loading.md @@ -71,7 +71,7 @@ dataset = MyDataset() dataloader = Dataloader( dataset, batch_size=8, - num_workers=4 # this is the number of threds we want to parallize workload over + num_workers=4 # this is the number of threads we want to parallelize workload over ) ``` @@ -155,7 +155,7 @@ datafiles (.jpg) at runtime. `-batches_to_check` flag). Also if you are not seeing an improvement, try increasing the batch size (since data loading is parallelized per batch). - For certain machines like the Mac with M1 chipset it is nessesary to set the `multiprocessing_context` flag in the + For certain machines like the Mac with M1 chipset it is necessary to set the `multiprocessing_context` flag in the dataloder to `"fork"`. This essentially tells the dataloader how the worker nodes should be created. 6. Retry the experiment where you change the data augmentation to be more complex: diff --git a/s9_scalable_applications/inference.md b/s9_scalable_applications/inference.md index 180908bed..ec39b17ac 100644 --- a/s9_scalable_applications/inference.md +++ b/s9_scalable_applications/inference.md @@ -41,8 +41,8 @@ architectures (conv) are more efficient than transformer (vit) for the same para As dissed in this [blogpost](https://devblog.pytorchlightning.ai/training-an-edge-optimized-speech-recognition-model-with-pytorch-lightning-a0a6a0c2a413) -the largest increase in inference speed you will see (given some speficic hardware) is choosing an efficient model -architechture. In the exercises below we are going to investigate the inference speed of different architechtures. +the largest increase in inference speed you will see (given some specific hardware) is choosing an efficient model +architectures. In the exercises below we are going to investigate the inference speed of different architectures. 1. Start by checking out this [table](https://pytorch.org/vision/stable/models.html#table-of-all-available-classification-weights) @@ -307,13 +307,13 @@ in sparse networks. ## Knowledge distillation Knowledge distillation is somewhat similar to pruning, in the sense that it tries to find a smaller model that can -perform equally well as a large model, however it does so in a completly different way. Knowledge distillation is a +perform equally well as a large model, however it does so in a completely different way. Knowledge distillation is a *model compression* technique that builds on the work of [Bucila et al.](https://www.cs.cornell.edu/~caruana/compression.kdd06.pdf) in which we try do distill/compress the knowledge of a large complex model (also called the teacher model) into a simpler model (also called the student model). The best known example of this is the [DistilBERT model](https://arxiv.org/abs/1910.01108). The DistilBERT model is a -smaller version of the large natural-language procession model Bert, which achives 97% of the performance of Bert while +smaller version of the large natural-language procession model Bert, which achieves 97% of the performance of Bert while only containing 40% of the weights and being 60% faster. You can see in the figure below how it is much smaller in size compared to other models developed at the same time. @@ -323,7 +323,7 @@ compared to other models developed at the same time.
Knowledge distillation works by assuming we have a big teacher that is already performing well that we want to compress. -By runnning our training set through our large model we get a softmax distribution for each and every training sample. +By running our training set through our large model we get a softmax distribution for each and every training sample. The goal of the students, is to both match the original labels of the training data but also match the softmax distribution of the teacher model. The intuition behind doing this, is that teacher model needs to be more complex to learn the complex inter-class relasionship from just (one-hot) labels. The student on the other hand gets directly feed @@ -337,7 +337,7 @@ the same capasity to learn the same as the teacher. ### ❔ Exercises -Lets try implementing model distillation ourself. We are going to see if we can achive this on the +Lets try implementing model distillation ourself. We are going to see if we can achieve this on the [cifar10](https://www.cs.toronto.edu/~kriz/cifar.html) dataset. Do note that exercise below can take quite long time to finish because it involves training multiple networks and therefore involve some waiting. diff --git a/timeplan.md b/timeplan.md index fe5857c28..3e9c69848 100644 --- a/timeplan.md +++ b/timeplan.md @@ -4,13 +4,13 @@ The course is organised into *exercise* (2/3 of the course) days and *project* days (1/3 of the course). -*Exercise* days start at 9:00 in the morning with an lecture (15-30 min) that will give some context about atleast one +*Exercise* days start at 9:00 in the morning with an lecture (15-30 min) that will give some context about at least one of the topics of that day. Additionally, previous days exercises may shortly be touched upon. The remaining of the day will be spend on solving exercises either individually or in small groups. For some people the exercises may be fast to do and for others it will take the hole day. We will provide help throughout the day. We will try to answer questions on slack but help with be priorities to students physically on campus. -*Project* days are intended for project work and you are therefore responsable for making an agreement with your group +*Project* days are intended for project work and you are therefore responsible for making an agreement with your group when and where you are going to work. The first project days there will be a lecture at 9:00 with project information. Other project days we may also start the day with an external lecture, which we highly recommend that you participate in. During each project day we will have office hours where you can ask questions for the project. @@ -22,7 +22,7 @@ Legend: 📝 Slides, 🎥 Recording. ## Week 1 -In the first week you will be introduced to a number of development practises for organising and developing code, +In the first week you will be introduced to a number of development practices for organizing and developing code, especially with a focus on making everything reproducible. Date | Day | Presentation topic | Frameworks | Format