diff --git a/.all-contributorsrc b/.all-contributorsrc index 6766eaee6..77db9c441 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -22,7 +22,7 @@ }, { "login": "Naeemkh", - "name": "naeemkh", + "name": "Naeem Khoshnevis", "avatar_url": "https://avatars.githubusercontent.com/Naeemkh", "profile": "https://github.com/Naeemkh", "contributions": [] @@ -36,14 +36,14 @@ }, { "login": "shanzehbatool", - "name": "Shanzeh Batool", + "name": "shanzehbatool", "avatar_url": "https://avatars.githubusercontent.com/shanzehbatool", "profile": "https://github.com/shanzehbatool", "contributions": [] }, { "login": "kai4avaya", - "name": "Kleinbard", + "name": "kai4avaya", "avatar_url": "https://avatars.githubusercontent.com/kai4avaya", "profile": "https://github.com/kai4avaya", "contributions": [] @@ -57,14 +57,14 @@ }, { "login": "eliasab16", - "name": "eliasab16", + "name": "Elias Nuwara", "avatar_url": "https://avatars.githubusercontent.com/eliasab16", "profile": "https://github.com/eliasab16", "contributions": [] }, { "login": "ishapira1", - "name": "ishapira", + "name": "Itai Shapira", "avatar_url": "https://avatars.githubusercontent.com/ishapira1", "profile": "https://github.com/ishapira1", "contributions": [] @@ -97,13 +97,6 @@ "profile": "https://github.com/jaysonzlin", "contributions": [] }, - { - "login": "18jeffreyma", - "name": "Jeffrey Ma", - "avatar_url": "https://avatars.githubusercontent.com/18jeffreyma", - "profile": "https://github.com/18jeffreyma", - "contributions": [] - }, { "login": "sophiacho1", "name": "Sophia Cho", @@ -111,6 +104,13 @@ "profile": "https://github.com/sophiacho1", "contributions": [] }, + { + "login": "18jeffreyma", + "name": "Jeffrey Ma", + "avatar_url": "https://avatars.githubusercontent.com/18jeffreyma", + "profile": "https://github.com/18jeffreyma", + "contributions": [] + }, { "login": "korneelf1", "name": "Korneel Van den Berghe", @@ -118,9 +118,16 @@ "profile": "https://github.com/korneelf1", "contributions": [] }, + { + "login": "jasonjabbour", + "name": "jasonjabbour", + "avatar_url": "https://avatars.githubusercontent.com/jasonjabbour", + "profile": "https://github.com/jasonjabbour", + "contributions": [] + }, { "login": "zishenwan", - "name": "Zishen", + "name": "Zishen Wan", "avatar_url": "https://avatars.githubusercontent.com/zishenwan", "profile": "https://github.com/zishenwan", "contributions": [] @@ -134,18 +141,11 @@ }, { "login": "andreamurillomtz", - "name": "Andrea Murillo", + "name": "Andrea", "avatar_url": "https://avatars.githubusercontent.com/andreamurillomtz", "profile": "https://github.com/andreamurillomtz", "contributions": [] }, - { - "login": "alxrod", - "name": "Alex Rodriguez", - "avatar_url": "https://avatars.githubusercontent.com/alxrod", - "profile": "https://github.com/alxrod", - "contributions": [] - }, { "login": "srivatsankrishnan", "name": "Srivatsan Krishnan", @@ -153,13 +153,27 @@ "profile": "https://github.com/srivatsankrishnan", "contributions": [] }, + { + "login": "alxrod", + "name": "Alex Rodriguez", + "avatar_url": "https://avatars.githubusercontent.com/alxrod", + "profile": "https://github.com/alxrod", + "contributions": [] + }, { "login": "DivyaAmirtharaj", - "name": "Divya", + "name": "Divya Amirtharaj", "avatar_url": "https://avatars.githubusercontent.com/DivyaAmirtharaj", "profile": "https://github.com/DivyaAmirtharaj", "contributions": [] }, + { + "login": "arnaumarin", + "name": "arnaumarin", + "avatar_url": "https://avatars.githubusercontent.com/arnaumarin", + "profile": "https://github.com/arnaumarin", + "contributions": [] + }, { "login": "aptl26", "name": "Aghyad Deeb", @@ -167,6 +181,13 @@ "profile": "https://github.com/aptl26", "contributions": [] }, + { + "login": "ma3mool", + "name": "Abdulrahman Mahmoud", + "avatar_url": "https://avatars.githubusercontent.com/ma3mool", + "profile": "https://github.com/ma3mool", + "contributions": [] + }, { "login": "NaN", "name": "Aghyad Deeb", @@ -175,17 +196,10 @@ "contributions": [] }, { - "login": "arnaumarin", - "name": "arnaumarin", - "avatar_url": "https://avatars.githubusercontent.com/arnaumarin", - "profile": "https://github.com/arnaumarin", - "contributions": [] - }, - { - "login": "ma3mool", - "name": "Abdulrahman Mahmoud", - "avatar_url": "https://avatars.githubusercontent.com/ma3mool", - "profile": "https://github.com/ma3mool", + "login": "ELSuitorHarvard", + "name": "ELSuitorHarvard", + "avatar_url": "https://avatars.githubusercontent.com/ELSuitorHarvard", + "profile": "https://github.com/ELSuitorHarvard", "contributions": [] }, { @@ -195,6 +209,13 @@ "profile": "https://github.com/oishib", "contributions": [] }, + { + "login": "jared-ni", + "name": "Jared Ni", + "avatar_url": "https://avatars.githubusercontent.com/jared-ni", + "profile": "https://github.com/jared-ni", + "contributions": [] + }, { "login": "Ekhao", "name": "Emil Njor", @@ -210,17 +231,10 @@ "contributions": [] }, { - "login": "ELSuitorHarvard", - "name": "ELSuitorHarvard", - "avatar_url": "https://avatars.githubusercontent.com/ELSuitorHarvard", - "profile": "https://github.com/ELSuitorHarvard", - "contributions": [] - }, - { - "login": "jared-ni", - "name": "Jared Ni", - "avatar_url": "https://avatars.githubusercontent.com/jared-ni", - "profile": "https://github.com/jared-ni", + "login": "mmaz", + "name": "Mark Mazumder", + "avatar_url": "https://avatars.githubusercontent.com/mmaz", + "profile": "https://github.com/mmaz", "contributions": [] }, { @@ -231,26 +245,19 @@ "contributions": [] }, { - "login": "mmaz", - "name": "Mark Mazumder", - "avatar_url": "https://avatars.githubusercontent.com/mmaz", - "profile": "https://github.com/mmaz", + "login": "Sara-Khosravi", + "name": "Sara Khosravi", + "avatar_url": "https://avatars.githubusercontent.com/Sara-Khosravi", + "profile": "https://github.com/Sara-Khosravi", "contributions": [] }, { "login": "leo47007", - "name": "YU SHUN, HSIAO", + "name": "Yu-Shun Hsiao", "avatar_url": "https://avatars.githubusercontent.com/leo47007", "profile": "https://github.com/leo47007", "contributions": [] }, - { - "login": "Sara-Khosravi", - "name": "Sara Khosravi", - "avatar_url": "https://avatars.githubusercontent.com/Sara-Khosravi", - "profile": "https://github.com/Sara-Khosravi", - "contributions": [] - }, { "login": "BaeHenryS", "name": "Henry Bae", @@ -258,6 +265,13 @@ "profile": "https://github.com/BaeHenryS", "contributions": [] }, + { + "login": "eurashin", + "name": "eurashin", + "avatar_url": "https://avatars.githubusercontent.com/eurashin", + "profile": "https://github.com/eurashin", + "contributions": [] + }, { "login": "arbass22", "name": "Andrew Bass", @@ -265,6 +279,13 @@ "profile": "https://github.com/arbass22", "contributions": [] }, + { + "login": "ShvetankPrakash", + "name": "Shvetank Prakash", + "avatar_url": "https://avatars.githubusercontent.com/ShvetankPrakash", + "profile": "https://github.com/ShvetankPrakash", + "contributions": [] + }, { "login": "AditiR-42", "name": "Aditi Raju", @@ -273,10 +294,10 @@ "contributions": [] }, { - "login": "ShvetankPrakash", - "name": "Shvetank Prakash", - "avatar_url": "https://avatars.githubusercontent.com/ShvetankPrakash", - "profile": "https://github.com/ShvetankPrakash", + "login": "marcozennaro", + "name": "Marco Zennaro", + "avatar_url": "https://avatars.githubusercontent.com/marcozennaro", + "profile": "https://github.com/marcozennaro", "contributions": [] }, { @@ -286,13 +307,6 @@ "profile": "https://github.com/pongtr", "contributions": [] }, - { - "login": "eurashin", - "name": "eurashin", - "avatar_url": "https://avatars.githubusercontent.com/eurashin", - "profile": "https://github.com/eurashin", - "contributions": [] - }, { "login": "jzhou1318", "name": "Jennifer Zhou", @@ -301,10 +315,24 @@ "contributions": [] }, { - "login": "marcozennaro", - "name": "Marco Zennaro", - "avatar_url": "https://avatars.githubusercontent.com/marcozennaro", - "profile": "https://github.com/marcozennaro", + "login": "Gjain234", + "name": "Gauri Jain", + "avatar_url": "https://avatars.githubusercontent.com/Gjain234", + "profile": "https://github.com/Gjain234", + "contributions": [] + }, + { + "login": "Allen-Kuang", + "name": "Allen-Kuang", + "avatar_url": "https://avatars.githubusercontent.com/Allen-Kuang", + "profile": "https://github.com/Allen-Kuang", + "contributions": [] + }, + { + "login": "serco425", + "name": "Sercan Aygün", + "avatar_url": "https://avatars.githubusercontent.com/serco425", + "profile": "https://github.com/serco425", "contributions": [] }, { @@ -315,38 +343,31 @@ "contributions": [] }, { - "login": "Gjain234", - "name": "Gauri Jain", - "avatar_url": "https://avatars.githubusercontent.com/Gjain234", - "profile": "https://github.com/Gjain234", + "login": "gnodipac886", + "name": "gnodipac886", + "avatar_url": "https://avatars.githubusercontent.com/gnodipac886", + "profile": "https://github.com/gnodipac886", "contributions": [] }, { "login": "alex-oesterling", - "name": "Alex Oesterling", + "name": "alex-oesterling", "avatar_url": "https://avatars.githubusercontent.com/alex-oesterling", "profile": "https://github.com/alex-oesterling", "contributions": [] }, { - "login": "gnodipac886", - "name": "Eric D", - "avatar_url": "https://avatars.githubusercontent.com/gnodipac886", - "profile": "https://github.com/gnodipac886", - "contributions": [] - }, - { - "login": "Allen-Kuang", - "name": "Allen-Kuang", - "avatar_url": "https://avatars.githubusercontent.com/Allen-Kuang", - "profile": "https://github.com/Allen-Kuang", + "login": "NaN", + "name": "Annie Laurie Cook", + "avatar_url": "https://www.gravatar.com/avatar/7cd8d5dfd83071f23979019d97655dc5?d=identicon&s=100", + "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors", "contributions": [] }, { - "login": "serco425", - "name": "Sercan Aygün", - "avatar_url": "https://avatars.githubusercontent.com/serco425", - "profile": "https://github.com/serco425", + "login": "NaN", + "name": "Yu-Shun Hsiao", + "avatar_url": "https://www.gravatar.com/avatar/242dbc711f7056b6a276763473fc88b8?d=identicon&s=100", + "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors", "contributions": [] }, { @@ -358,15 +379,15 @@ }, { "login": "NaN", - "name": "Annie Laurie Cook", - "avatar_url": "https://www.gravatar.com/avatar/7cd8d5dfd83071f23979019d97655dc5?d=identicon&s=100", + "name": "Batur Arslan", + "avatar_url": "https://www.gravatar.com/avatar/35a8d9ffd03f05e79a2c6ce6206a56f2?d=identicon&s=100", "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors", "contributions": [] }, { "login": "NaN", - "name": "Batur Arslan", - "avatar_url": "https://www.gravatar.com/avatar/35a8d9ffd03f05e79a2c6ce6206a56f2?d=identicon&s=100", + "name": "Sophia Cho", + "avatar_url": "https://www.gravatar.com/avatar/da4d11538053ecff01479b351d152d92?d=identicon&s=100", "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors", "contributions": [] }, @@ -377,20 +398,6 @@ "profile": "https://github.com/eezike", "contributions": [] }, - { - "login": "ciyer64", - "name": "Curren Iyer", - "avatar_url": "https://avatars.githubusercontent.com/ciyer64", - "profile": "https://github.com/ciyer64", - "contributions": [] - }, - { - "login": "YangZhou1997", - "name": "Yang Zhou", - "avatar_url": "https://avatars.githubusercontent.com/YangZhou1997", - "profile": "https://github.com/YangZhou1997", - "contributions": [] - }, { "login": "abigailswallow", "name": "abigailswallow", @@ -398,6 +405,13 @@ "profile": "https://github.com/abigailswallow", "contributions": [] }, + { + "login": "ciyer64", + "name": "Curren Iyer", + "avatar_url": "https://avatars.githubusercontent.com/ciyer64", + "profile": "https://github.com/ciyer64", + "contributions": [] + }, { "login": "YLab-UChicago", "name": "yanjingl", @@ -406,10 +420,10 @@ "contributions": [] }, { - "login": "NaN", - "name": "Sophia Cho", - "avatar_url": "https://www.gravatar.com/avatar/da4d11538053ecff01479b351d152d92?d=identicon&s=100", - "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors", + "login": "FinAminToastCrunch", + "name": "Fin Amin", + "avatar_url": "https://avatars.githubusercontent.com/FinAminToastCrunch", + "profile": "https://github.com/FinAminToastCrunch", "contributions": [] }, { @@ -420,10 +434,10 @@ "contributions": [] }, { - "login": "NaN", - "name": "Yu-Shun Hsiao", - "avatar_url": "https://www.gravatar.com/avatar/242dbc711f7056b6a276763473fc88b8?d=identicon&s=100", - "profile": "https://github.com/harvard-edge/cs249r_book/graphs/contributors", + "login": "YangZhou1997", + "name": "Yang Zhou", + "avatar_url": "https://avatars.githubusercontent.com/YangZhou1997", + "profile": "https://github.com/YangZhou1997", "contributions": [] }, { diff --git a/.github/workflows/contributors/update_contributors.py b/.github/workflows/contributors/update_contributors.py index 1847d4ab2..3d10845f2 100644 --- a/.github/workflows/contributors/update_contributors.py +++ b/.github/workflows/contributors/update_contributors.py @@ -1,10 +1,10 @@ -import hashlib -import json import os +import json import random +import hashlib -import pandas as pd import requests +import pandas as pd from absl import app from absl import logging @@ -87,6 +87,7 @@ def main(_): next_page = res.links.get("next", {}).get("url", None) last_page = res.links.get("last", {}).get("url", None) + usernames = set() commit_data = [] for node in data: commit_message = node.get("commit", {}).get("message", pd.NA) @@ -111,6 +112,14 @@ def main(_): elif committer_login_info: username = committer_login_info["login"] + try: + if username: + usernames.add(username) + except Exception as e: + logging.error(f"Error parsing username: {username}") + + + commit_data.append( { "commit_message": commit_message, @@ -119,6 +128,12 @@ def main(_): } ) commit_data_df = pd.DataFrame(commit_data) + + username_to_fullname = {} + for username in usernames: + user_data = get_user_data_from_username(username) + username_to_fullname[username] = user_data['user_full_name'] + co_authors_list = [get_co_authors_from_commit_message(row["commit_message"]) for index, row in commit_data_df.iterrows()] co_authors_df = pd.concat(co_authors_list, ignore_index=True) @@ -194,6 +209,11 @@ def main(_): lambda row: row['username'] if '@' in row['user_full_name'] else row[ 'user_full_name'], axis=1) + + commit_data_df['user_full_name'] = commit_data_df.apply( + lambda row: username_to_fullname[row['username']] if row['username'] in username_to_fullname else row['user_full_name'], axis=1 + ) + def generate_gravatar_url(name): random.seed(name) diff --git a/README.md b/README.md index ddad8d410..af5dbe35c 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ -# MACHINE LEARNING SYSTEMS with TinyML +# MACHINE LEARNING SYSTEMS +*Principles and Practices of Engineering Artificially Intelligent Systems*

Cover Image

-

Contributors Badge Build Status Badge @@ -12,45 +12,73 @@ Markdown Linter Status Badge

-Welcome to the collaborative book repository for students of CS249r at Harvard! This repository -contains the source files of chapters and sections written by your peers. We're excited to see your contributions! - -## Contributing +

+ ⭐ Help Us Reach 1,000 GitHub Stars! ⭐
+ For every 25 stars, Arduino and SEEED will donate a Nicla Vision or XIAO ESP32S3 for AI education. +
Your ⭐ makes a difference. Click below to support our mission!
+

-To contribute to the repository, please see the [contribution guidelines](contribute.md). +

+ + GitHub Stars + +

--- -## Website +## 📚 Explore the Book -The book's website is automatically constructed from the `gh-pages` branch. Once reviewed, changes to `main` are merged -into `gh-pages`. +- **[Read Online](https://mlsysbook.ai)**: View the most recent and fully rendered version of the book on our website. +- **[Download PDF](https://mlsysbook.ai/Machine-Learning-Systems.pdf)**: Get a downloadable PDF version of the entire book for offline reading. +- **[Staging Version](https://harvard-edge.github.io/cs249r_book_dev/)**: Check out the latest changes before they go live on the main `mlsysbook.ai` site. + +--- -You can view the book's website -at: [https://harvard-edge.github.io/cs249r_book/](https://harvard-edge.github.io/cs249r_book/) +## 🌟 Why Star the Repository? -The most recent changes are staged at [https://harvard-edge.github.io/cs249r_book_dev/](https://harvard-edge.github.io/cs249r_book_dev/) +Starring our repository on GitHub helps others discover this valuable resource and supports ongoing improvements. Plus, your star contributes to our donation drive for AI education hardware! --- -## Local Rendering +## 🤝 Contributing -You need to have `quarto` installed for local book rendering. Please follow the [Quarto installation instructions here](https://quarto.org/docs/download/). +We believe that the best learning and development happen when people come together to share knowledge and ideas. Whether you're a seasoned expert or just starting your journey in machine learning, your contributions can be valuable to share with the community. -Once that's done, the following command can be -used to produce the HTML pages: +### Why Contribute? +- **Share Your Expertise**: If you have experience or insights in a specific area of machine learning or TinyML, your contributions can help others learn and apply these concepts. +- **Learn and Grow**: Contributing to this project is a great way to deepen your understanding of machine learning systems. As you write, code, or review content, you'll reinforce your own knowledge and discover new aspects of the field. +- **Collaborate and Network**: Join a community of like-minded individuals passionate about advancing AI education. Collaborate with peers, receive feedback, and make connections that could lead to exciting opportunities. +- **Make an Impact**: Your contributions can directly influence how others understand and engage with machine learning. By improving and expanding the content, you're helping shape the education of future engineers and AI practitioners. -```bash -cd cs249r_book -quarto render -``` +### How to Get Started +Getting started is easy! Whether you're interested in writing a new chapter, fixing a bug, or suggesting an improvement, we welcome all forms of contribution. Here's how you can begin: -## Code of Conduct +1. **Explore the Repository**: Familiarize yourself with the structure and content of the book by browsing the repository. +2. **Check Out the Guidelines**: Review our [contribution guidelines](contribute.md) to understand how to contribute effectively. +3. **Choose Your Area**: Pick a topic or area you’re passionate about. It could be anything from writing a new section, improving existing content, or even helping with code snippets and examples. +4. **Submit Your Contribution**: Once you're ready, submit a pull request. We review all contributions and provide feedback to help you refine your work. -Please note that the cs249r project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this project, you agree to abide by its terms. +### Need Help? +If you're unsure where to start or have any questions, feel free to reach out through our [GitHub Discussions](#) or open an issue. We're here to support you throughout the process! + +--- + +## 🛠️ Local Rendering Instructions + +Want to build the book locally? Here's how: + +1. **Install Quarto**: Follow the [Quarto installation instructions](https://quarto.org/docs/download/). +2. **Render the Book**: + ```bash + cd cs249r_book + quarto render + ``` +--- ## Contributors +This project follows the [all-contributors](https://allcontributors.org) specification. Contributions of any kind are welcome! + @@ -59,95 +87,97 @@ Please note that the cs249r project is released with a [Contributor Code of Cond Vijay Janapa Reddi
Vijay Janapa Reddi

Ikechukwu Uchendu
Ikechukwu Uchendu

- naeemkh
naeemkh

+ Naeem Khoshnevis
Naeem Khoshnevis

Douwe den Blanken
Douwe den Blanken

- Shanzeh Batool
Shanzeh Batool

+ shanzehbatool
shanzehbatool

- Kleinbard
Kleinbard

+ kai4avaya
kai4avaya

Jared Ping
Jared Ping

- eliasab16
eliasab16

- ishapira
ishapira

+ Elias Nuwara
Elias Nuwara

+ Itai Shapira
Itai Shapira

Maximilian Lam
Maximilian Lam

Marcelo Rovai
Marcelo Rovai

Matthew Stewart
Matthew Stewart

Jayson Lin
Jayson Lin

- Jeffrey Ma
Jeffrey Ma

Sophia Cho
Sophia Cho

+ Jeffrey Ma
Jeffrey Ma

Korneel Van den Berghe
Korneel Van den Berghe

- Zishen
Zishen

+ jasonjabbour
jasonjabbour

+ Zishen Wan
Zishen Wan

Colby Banbury
Colby Banbury

- Andrea Murillo
Andrea Murillo

- Alex Rodriguez
Alex Rodriguez

+ Andrea
Andrea

Srivatsan Krishnan
Srivatsan Krishnan

- Divya
Divya

- Aghyad Deeb
Aghyad Deeb

- Aghyad Deeb
Aghyad Deeb

+ Alex Rodriguez
Alex Rodriguez

+ Divya Amirtharaj
Divya Amirtharaj

arnaumarin
arnaumarin

+ Aghyad Deeb
Aghyad Deeb

Abdulrahman Mahmoud
Abdulrahman Mahmoud

- oishib
oishib

- Emil Njor
Emil Njor

- Michael Schnebly
Michael Schnebly

+ Aghyad Deeb
Aghyad Deeb

ELSuitorHarvard
ELSuitorHarvard

+ oishib
oishib

+ Jared Ni
Jared Ni

- Jared Ni
Jared Ni

- Jae-Won Chung
Jae-Won Chung

+ Emil Njor
Emil Njor

+ Michael Schnebly
Michael Schnebly

Mark Mazumder
Mark Mazumder

- YU SHUN, HSIAO
YU SHUN, HSIAO

+ Jae-Won Chung
Jae-Won Chung

Sara Khosravi
Sara Khosravi

+ Yu-Shun Hsiao
Yu-Shun Hsiao

Henry Bae
Henry Bae

+ eurashin
eurashin

Andrew Bass
Andrew Bass

- Aditi Raju
Aditi Raju

Shvetank Prakash
Shvetank Prakash

- Pong Trairatvorakul
Pong Trairatvorakul

- eurashin
eurashin

- Jennifer Zhou
Jennifer Zhou

+ Aditi Raju
Aditi Raju

Marco Zennaro
Marco Zennaro

- Bruno Scaglione
Bruno Scaglione

+ Pong Trairatvorakul
Pong Trairatvorakul

+ Jennifer Zhou
Jennifer Zhou

Gauri Jain
Gauri Jain

- Alex Oesterling
Alex Oesterling

- Eric D
Eric D

Allen-Kuang
Allen-Kuang

Sercan Aygün
Sercan Aygün

- Costin-Andrei Oncescu
Costin-Andrei Oncescu

+ Bruno Scaglione
Bruno Scaglione

+ gnodipac886
gnodipac886

+ alex-oesterling
alex-oesterling

Annie Laurie Cook
Annie Laurie Cook

+ Yu-Shun Hsiao
Yu-Shun Hsiao

+ Costin-Andrei Oncescu
Costin-Andrei Oncescu

Batur Arslan
Batur Arslan

- Emeka Ezike
Emeka Ezike

- Curren Iyer
Curren Iyer

- Yang Zhou
Yang Zhou

+ Sophia Cho
Sophia Cho

+ Emeka Ezike
Emeka Ezike

abigailswallow
abigailswallow

+ Curren Iyer
Curren Iyer

yanjingl
yanjingl

- Sophia Cho
Sophia Cho

- songhan
songhan

- Yu-Shun Hsiao
Yu-Shun Hsiao

+ Fin Amin
Fin Amin

+ songhan
songhan

+ Yang Zhou
Yang Zhou

Jessica Quaye
Jessica Quaye

Emmanuel Rassou
Emmanuel Rassou

happyappledog
happyappledog

- Jason Yik
Jason Yik

- Shreya Johri
Shreya Johri

+ Jason Yik
Jason Yik

+ Shreya Johri
Shreya Johri

Sonia Murthy
Sonia Murthy

Vijay Edupuganti
Vijay Edupuganti

The Random DIY
The Random DIY

@@ -159,6 +189,3 @@ Please note that the cs249r project is released with a [Contributor Code of Cond - -This project follows the [all-contributors](https://allcontributors.org) specification. Contributions of any kind are -welcome! diff --git a/_extensions/nmfs-opensci/titlepage/_author-affiliation-themes.tex b/_extensions/nmfs-opensci/titlepage/_author-affiliation-themes.tex new file mode 100644 index 000000000..1602ea223 --- /dev/null +++ b/_extensions/nmfs-opensci/titlepage/_author-affiliation-themes.tex @@ -0,0 +1,95 @@ +$if(false)$ +% This sectrion defines the author block and affiliation blocks +% based on the author and affiliation theme +$endif$ +\newcommand{\authorstyle}[1]{{$if(titlepage-theme.author-fontsize)$\fontsize{$titlepage-theme.author-fontsize$}{$titlepage-theme.author-spacing$}\selectfont +$endif$$for(titlepage-theme.author-fontstyle)$\$titlepage-theme.author-fontstyle${$endfor$#1$for(titlepage-theme.author-fontstyle)$}$endfor$}} + +\newcommand{\affiliationstyle}[1]{{$if(titlepage-theme.affiliation-fontsize)$\fontsize{$titlepage-theme.affiliation-fontsize$}{$titlepage-theme.affiliation-spacing$}\selectfont +$endif$$for(titlepage-theme.affiliation-fontstyle)$\$titlepage-theme.affiliation-fontstyle${$endfor$#1$for(titlepage-theme.affiliation-fontstyle)$}$endfor$}} + +$if(titlepage-style-code.author.none)$ +\newcommand{\titlepageauthorblock}{} +$endif$ +$if(titlepage-style-code.author.plain)$ +\newcommand{\titlepageauthorblock}{ +$if(titlepage-theme.author-color)$\textcolor{$titlepage-theme.author-color$}$endif${\authorstyle{$for(by-author)$\nohyphens{$by-author.name.literal$}$sep$$titlepage-theme.author-sep$$if(titlepage-theme.author-space-between)$~\\ \vspace{$titlepage-theme.author-space-between$}$endif$$endfor$\\}} +} +$endif$ +$if(titlepage-style-code.author.plain-with-and)$ +\newcommand{\titlepageauthorblock}{ +$if(titlepage-theme.author-color)$\textcolor{$titlepage-theme.author-color$}$endif${\authorstyle{$if(by-author/allbutlast)$$for(by-author/allbutlast)$\nohyphens{$by-author.name.literal$}$sep$$titlepage-theme.author-sep$ $if(titlepage-theme.author-space-between)$~\\ \vspace{$titlepage-theme.author-space-between$}$endif$$endfor$$for(by-author/last)$ and \nohyphens{$by-author.name.literal$}$endfor$$else$$for(by-author/last)$\nohyphens{$by-author.name.literal$}$if(titlepage-theme.author-space-between)$~\\ \vspace{$titlepage-theme.author-space-between$}$endif$$endfor$$endif$}} +} +$endif$ +$if(titlepage-style-code.author.superscript)$ +\newcommand{\titlepageauthorblock}{ +$if(titlepage-theme.author-color)$\textcolor{$titlepage-theme.author-color$}$endif${\authorstyle{$for(by-author)$\nohyphens{$by-author.name.literal$}$for(by-author.affiliations)${\textsuperscript{$it.number$}}$sep${\textsuperscript{,}}$endfor$$if(by-author.email)$$if(by-author.affiliations)$\textsuperscript{,}$endif${\textsuperscript{*}}$endif$$sep$$titlepage-theme.author-sep$ $if(titlepage-theme.author-space-between)$~\\ \vspace{$titlepage-theme.author-space-between$}$endif$$endfor$}} +} +$endif$ +$if(titlepage-style-code.author.superscript-with-and)$ +\newcommand{\titlepageauthorblock}{ +$if(titlepage-theme.author-color)$\textcolor{$titlepage-theme.author-color$}$endif${\authorstyle{$if(by-author/allbutlast)$$for(by-author/allbutlast)$\nohyphens{$by-author.name.literal$}$for(by-author.affiliations)${\textsuperscript{$it.number$}}$sep$\textsuperscript{,}$endfor$$if(by-author.email)$$if(by-author.affiliations)$\textsuperscript{,}$endif${\textsuperscript{*}}$endif$$sep$$titlepage-theme.author-sep$ $if(titlepage-theme.author-space-between)$~\\ \vspace{$titlepage-theme.author-space-between$}$endif$$endfor$$for(by-author/last)$ and $if(titlepage-theme.author-space-between)$~\\ \vspace{$titlepage-theme.author-space-between$}$endif$\nohyphens{$by-author.name.literal$}$for(by-author.affiliations)${\textsuperscript{$it.number$}}$sep$\textsuperscript{,}$endfor$$if(by-author.email)$$if(by-author.affiliations)$\textsuperscript{,}$endif${\textsuperscript{*}}$endif$$endfor$$else$$for(by-author/last)$\nohyphens{$by-author.name.literal$}$for(by-author.affiliations)${\textsuperscript{$it.number$}}$sep$\textsuperscript{,}$endfor$$if(by-author.email)$$if(by-author.affiliations)$\textsuperscript{,}$endif${\textsuperscript{,*}}$endif$$endfor$$endif$}}} +$endif$ +$if(titlepage-style-code.author.author-address)$ +\newcommand{\titlepageauthorblock}{ +$if(titlepage-theme.author-color)$\textcolor{$titlepage-theme.author-color$}$endif${\authorstyle{ +$for(by-author)$$by-author.name.literal$$if(by-author.affiliations)$$titlepage-theme.affiliation-sep$$for(by-author.affiliations)$$by-author.affiliations.name$$if(by-author.affiliations.department)$$titlepage-theme.affiliation-sep$$by-author.affiliations.department$$endif$$if(by-author.affiliations.address)$$titlepage-theme.affiliation-sep$$for(by-author.affiliations.address)$$by-author.affiliations.address$$sep$$titlepage-theme.affiliation-sep$$endfor$$endif$$sep$$titlepage-theme.affiliation-sep$$endfor$$endif$$if(by-author.email)$$titlepage-theme.affiliation-sep$$by-author.email$$endif$$sep$\\ +$if(titlepage-theme.author-space-between)$\vspace{$titlepage-theme.author-space-between$}$endif$$endfor$}}} +$endif$ +$if(titlepage-style-code.author.two-column)$ +\newcommand{\titlepageauthorblock}{ +\newlength{\miniA} +\setlength{\miniA}{0pt} +\newlength{\namelen} +$for(by-author)$\settowidth{\namelen}{$by-author.name.literal$}\setlength{\miniA}{\maxof{\miniA}{\namelen}}$endfor$ +\setlength{\miniA}{\miniA+0.05\textwidth} +\newlength{\miniB} +\setlength{\miniB}{0.99\textwidth - \miniA} +\begin{minipage}{\miniA} +\begin{flushleft} +$if(titlepage-theme.author-color)$\textcolor{$titlepage-theme.author-color$}$endif${\authorstyle{$for(by-author)$$by-author.name.literal$$sep$\\ $if(titlepage-theme.author-space-between)$ +\vspace{$titlepage-theme.author-space-between$}$endif$$endfor$}} +\end{flushleft} +\end{minipage} +\begin{minipage}{\miniB} +\begin{flushright} +$if(titlepage-theme.affiliation-color)$\textcolor{$titlepage-theme.affiliation-color$}$endif${\affiliationstyle{$for(by-author)$$for(by-author.affiliations/first)$$by-author.affiliations.name$ +$endfor$$sep$\\ +$if(titlepage-theme.affiliation-space-between)$\vspace{$titlepage-theme.affiliation-space-between$} +$endif$$endfor$\\}} +\end{flushright} +\end{minipage}} +$endif$ + +$if(titlepage-style-code.affiliation.none)$ +\newcommand{\titlepageaffiliationblock}{} +$endif$ +$if(titlepage-style-code.affiliation.numbered-list)$ +\newcommand{\titlepageaffiliationblock}{ +\hangindent=1em +\hangafter=1 +\affiliationstyle{$for(by-affiliation)${$it.number$}.~$if(by-affiliation.name)$$it.name$$endif$$if(by-affiliation.department)$$if(by-affiliation.name)$$titlepage-theme.affiliation-sep$$endif$$it.department$$endif$$if(by-affiliation.address)$$if(by-affiliation.name)$$titlepage-theme.affiliation-sep$$else$$if(by-affiliation.department)$$titlepage-theme.affiliation-sep$$endif$$endif$$it.address$$endif$% +$sep$\par\hangindent=1em\hangafter=1$if(titlepage-theme.affiliation-space-between)$ +\vspace{$titlepage-theme.affiliation-space-between$}$endif$ +$endfor$ +}} +$endif$ +$if(titlepage-style-code.affiliation.numbered-list-with-correspondence)$ +\newcommand{\titlepageaffiliationblock}{ +\hangindent=1em +\hangafter=1 +{\affiliationstyle{ +$for(by-affiliation)${$it.number$}.~$if(by-affiliation.name)$$it.name$$endif$$if(by-affiliation.department)$$if(by-affiliation.name)$$titlepage-theme.affiliation-sep$$endif$$it.department$$endif$$if(by-affiliation.address)$$if(by-affiliation.name)$$titlepage-theme.affiliation-sep$$else$$if(by-affiliation.department)$$titlepage-theme.affiliation-sep$$endif$$endif$$it.address$$endif$ +$sep$\par\hangindent=1em\hangafter=1$if(titlepage-theme.affiliation-space-between)$ +\vspace{$titlepage-theme.affiliation-space-between$}$endif$ +$endfor$ + +\vspace{1\baselineskip} +$if(author)$ +$for(by-author)$ +$if(by-author.email)$ +* \textit{Correspondence:}~$by-author.name.literal$~$by-author.email$ +$endif$$endfor$$endif$ +}} +} +$endif$ diff --git a/_extensions/nmfs-opensci/titlepage/_coverpage.tex b/_extensions/nmfs-opensci/titlepage/_coverpage.tex new file mode 100644 index 000000000..a3caa4701 --- /dev/null +++ b/_extensions/nmfs-opensci/titlepage/_coverpage.tex @@ -0,0 +1,121 @@ +% This is a combination of Pandoc templating and LaTeX +% Pandoc templating https://pandoc.org/MANUAL.html#templates +% See the README for help + +\thispagestyle{empty} + +$if(coverpage-geometry)$ +\newgeometry{$for(coverpage-geometry)$$coverpage-geometry$$sep$,$endfor$} +$else$ +\newgeometry{top=-100in} +$endif$ + +% Page color +$if(coverpage-theme.page-html-color)$ +\definecolor{pgcolor}{HTML}{$coverpage-theme.page-html-color$} +\pagecolor{pgcolor}\afterpage{\nopagecolor} +$endif$ + +\newcommand{\coverauthorstyle}[1]{{$if(coverpage-theme.author-fontsize)$\fontsize{$coverpage-theme.author-fontsize$}{$coverpage-theme.author-spacing$}\selectfont +$endif$$for(coverpage-theme.author-fontstyle)$\$coverpage-theme.author-fontstyle${$endfor$#1$for(coverpage-theme.author-fontstyle)$}$endfor$}} + +\begin{tikzpicture}[remember picture, overlay, inner sep=0pt, outer sep=0pt] + +$if(coverpage-bg-image)$ +\tikzfading[name=fadeout, inner color=transparent!0,outer color=transparent!100] +\tikzfading[name=fadein, inner color=transparent!100,outer color=transparent!0] +\node[$if(coverpage-theme.bg-image-fading)$ scope fading=$coverpage-theme.bg-image-fading$, $endif$anchor=south west$if(coverpage-theme.bg-image-rotate)$, rotate=$coverpage-theme.bg-image-rotate$$endif$$if(coverpage-theme.bg-image-opacity)$, opacity=$coverpage-theme.bg-image-opacity$$endif$] at ($$(current page.south west)+($if(coverpage-theme.bg-image-left)$$coverpage-theme.bg-image-left$$else$0cm$endif$, $if(coverpage-theme.bg-image-bottom)$$coverpage-theme.bg-image-bottom$$else$0cm$endif$)$$) { +\includegraphics[width=$coverpage-theme.bg-image-size$, keepaspectratio]{$coverpage-bg-image$}}; +$endif$ + +$if(coverpage-style-code.title.plain)$ +$if(coverpage-title)$ +% Title +\newcommand{\titlelocationleft}{$coverpage-theme.title-left$} +\newcommand{\titlelocationbottom}{$coverpage-theme.title-bottom$} +\newcommand{\titlealign}{$coverpage-theme.title-align$} + +\begin{scope}{% +$if(coverpage-theme.title-fontsize)$\fontsize{$coverpage-theme.title-fontsize$}{$coverpage-theme.title-spacing$}\selectfont +$endif$$if(coverpage-theme.title-fontfamily)$\coverpagetitlefont +$endif$\node[anchor=$coverpage-theme.title-anchor$, align=$coverpage-theme.title-align$, rotate=$coverpage-theme.title-rotate$] (Title1) at ($$(current page.south west)+(\titlelocationleft,\titlelocationbottom)$$) [text width = $coverpage-theme.title-width$$if(coverpage-theme.title-node-spec)$, $coverpage-theme.title-node-spec$$endif$] {$if(coverpage-theme.title-color)$\textcolor{$coverpage-theme.title-color$}$endif${$for(coverpage-theme.title-fontstyle)$\$coverpage-theme.title-fontstyle${$endfor$\nohyphens{$coverpage-title$}$for(coverpage-theme.title-fontstyle)$}$endfor$}}; +} +\end{scope} +$endif$ +$endif$ +$if(coverpage-style-code.author.plain)$ +$if(coverpage-author)$ + +% Author +\newcommand{\authorlocationleft}{$coverpage-theme.author-left$} +\newcommand{\authorlocationbottom}{$coverpage-theme.author-bottom$} +\newcommand{\authoralign}{$coverpage-theme.author-align$} + +\begin{scope} +{% +$if(coverpage-theme.author-fontsize)$\fontsize{$coverpage-theme.author-fontsize$}{$coverpage-theme.author-spacing$}\selectfont +$endif$$if(coverpage-theme.author-fontfamily)$\coverpageauthorfont +$endif$\node[anchor=$coverpage-theme.author-anchor$, align=$coverpage-theme.author-align$, rotate=$coverpage-theme.author-rotate$] (Author1) at ($$(current page.south west)+(\authorlocationleft,\authorlocationbottom)$$) [text width = $coverpage-theme.author-width$$if(coverpage-theme.author-node-spec)$, $coverpage-theme.author-node-spec$$endif$] { +$if(coverpage-theme.author-color)$\textcolor{$coverpage-theme.author-color$}$endif$\coverauthorstyle{$for(coverpage-author)$$coverpage-author$$sep$$coverpage-theme.author-sep$$endfor$\\}}; +} +\end{scope} +$endif$ +$endif$ +$if(coverpage-style-code.header.plain)$ +$if(coverpage-header)$ + +% Header +\newcommand{\headerlocationleft}{$coverpage-theme.header-left$} +\newcommand{\headerlocationbottom}{$coverpage-theme.header-bottom$} +\newcommand{\headerlocationalign}{$coverpage-theme.header-align$} + +\begin{scope} +{% +$if(coverpage-theme.header-fontsize)$\fontsize{$coverpage-theme.header-fontsize$}{$coverpage-theme.header-spacing$}\selectfont +$endif$ $if(coverpage-theme.header-fontfamily)$\coverpageheaderfont +$endif$\node[anchor=$coverpage-theme.header-anchor$, align=$coverpage-theme.header-align$, rotate=$coverpage-theme.header-rotate$] (Header1) at % +($$(current page.south west)+(\headerlocationleft,\headerlocationbottom)$$) [text width = $coverpage-theme.header-width$$if(coverpage-theme.header-node-spec)$, $coverpage-theme.header-node-spec$$endif$] {$if(coverpage-theme.header-color)$\textcolor{$coverpage-theme.header-color$}$endif${$for(coverpage-theme.header-fontstyle)$\$coverpage-theme.header-fontstyle${$endfor$\nohyphens{$coverpage-header$}$for(coverpage-theme.header-fontstyle)$}$endfor$}}; +} +\end{scope} +$endif$ +$endif$ +$if(coverpage-style-code.footer.plain)$ +$if(coverpage-footer)$ + +% Footer +\newcommand{\footerlocationleft}{$coverpage-theme.footer-left$} +\newcommand{\footerlocationbottom}{$coverpage-theme.footer-bottom$} +\newcommand{\footerlocationalign}{$coverpage-theme.footer-align$} + +\begin{scope} +{% +$if(coverpage-theme.footer-fontsize)$\fontsize{$coverpage-theme.footer-fontsize$}{$coverpage-theme.footer-spacing$}\selectfont +$endif$ $if(coverpage-theme.footer-fontfamily)$\coverpagefooterfont +$endif$\node[anchor=$coverpage-theme.footer-anchor$, align=$coverpage-theme.footer-align$, rotate=$coverpage-theme.footer-rotate$] (Footer1) at % +($$(current page.south west)+(\footerlocationleft,\footerlocationbottom)$$) [text width = $coverpage-theme.footer-width$$if(coverpage-theme.footer-node-spec)$, $coverpage-theme.footer-node-spec$$endif$] {$if(coverpage-theme.footer-color)$\textcolor{$coverpage-theme.footer-color$}$endif${$for(coverpage-theme.footer-fontstyle)$\$coverpage-theme.footer-fontstyle${$endfor$\nohyphens{$coverpage-footer$}$for(coverpage-theme.footer-fontstyle)$}$endfor$}}; +} +\end{scope} +$endif$ +$endif$ +$if(coverpage-style-code.date.plain)$ +$if(coverpage-date)$ + +% Date +\newcommand{\datelocationleft}{$coverpage-theme.date-left$} +\newcommand{\datelocationbottom}{$coverpage-theme.date-bottom$} +\newcommand{\datelocationalign}{$coverpage-theme.date-align$} + +\begin{scope} +{% +$if(coverpage-theme.date-fontsize)$\fontsize{$coverpage-theme.date-fontsize$}{$coverpage-theme.date-spacing$}\selectfont +$endif$ $if(coverpage-theme.date-fontfamily)$\coverpagedatefont +$endif$\node[anchor=$coverpage-theme.date-anchor$, align=$coverpage-theme.date-align$, rotate=$coverpage-theme.date-rotate$] (Date1) at % +($$(current page.south west)+(\datelocationleft,\datelocationbottom)$$) [text width = $coverpage-theme.date-width$$if(coverpage-theme.date-node-spec)$, $coverpage-theme.date-node-spec$$endif$] {$if(coverpage-theme.date-color)$\textcolor{$coverpage-theme.date-color$}$endif${$for(coverpage-theme.date-fontstyle)$\$coverpage-theme.date-fontstyle${$endfor$\nohyphens{$coverpage-date$}$for(coverpage-theme.date-fontstyle)$}$endfor$}}; +} +\end{scope} +$endif$ +$endif$ + +\end{tikzpicture} +\clearpage +\restoregeometry diff --git a/_extensions/nmfs-opensci/titlepage/_extension.yml b/_extensions/nmfs-opensci/titlepage/_extension.yml new file mode 100644 index 000000000..acefbc4f3 --- /dev/null +++ b/_extensions/nmfs-opensci/titlepage/_extension.yml @@ -0,0 +1,20 @@ +title: titlepage +author: Eli Holmes +quarto-required: ">=1.1.0" +version: 3.3.10 +contributes: + format: + pdf: + filters: + - titlepage-theme.lua + - coverpage-theme.lua + template-partials: + - "_coverpage.tex" + - "_author-affiliation-themes.tex" + - "_header-footer-date-themes.tex" + - "_title-themes.tex" + - "_titlepage.tex" + - "before-body.tex" + - "pandoc.tex" + format-resources: + - "fonts/qualitype/opentype/QTDublinIrish.otf" diff --git a/_extensions/nmfs-opensci/titlepage/_header-footer-date-themes.tex b/_extensions/nmfs-opensci/titlepage/_header-footer-date-themes.tex new file mode 100644 index 000000000..7b0bd951d --- /dev/null +++ b/_extensions/nmfs-opensci/titlepage/_header-footer-date-themes.tex @@ -0,0 +1,156 @@ +\newcommand{\headerstyled}{% +{$if(titlepage-theme.header-fontsize)$\fontsize{$titlepage-theme.header-fontsize$}{$titlepage-theme.header-spacing$}\selectfont +$endif$$for(titlepage-theme.header-fontstyle)$\$titlepage-theme.header-fontstyle${$endfor$$titlepage-header$$for(titlepage-theme.header-fontstyle)$}$endfor$} +} +\newcommand{\footerstyled}{% +{$if(titlepage-theme.footer-fontsize)$\fontsize{$titlepage-theme.footer-fontsize$}{$titlepage-theme.footer-spacing$}\selectfont +$endif$$for(titlepage-theme.footer-fontstyle)$\$titlepage-theme.footer-fontstyle${$endfor$$titlepage-footer$$for(titlepage-theme.footer-fontstyle)$}$endfor$} +} +\newcommand{\datestyled}{% +{$if(titlepage-theme.date-fontsize)$\fontsize{$titlepage-theme.date-fontsize$}{$titlepage-theme.date-spacing$}\selectfont +$endif$$for(titlepage-theme.date-fontstyle)$\$titlepage-theme.date-fontstyle${$endfor$$date$$for(titlepage-theme.date-fontstyle)$}$endfor$} +} + +$if(titlepage-style-code.header.none)$ +\newcommand{\titlepageheaderblock}{} +$endif$ + +$if(titlepage-style-code.header.plain)$ +\newcommand{\titlepageheaderblock}{\headerstyled} +$endif$ +$if(titlepage-style-code.header.colorbox)$ +\newcommand{\titlepageheaderblock}{ +{\setlength{\fboxrule}{$if(titlepage-theme.header-colorbox-borderwidth)$$titlepage-theme.header-colorbox-borderwidth$$else$0pt$endif$} +\fcolorbox{$if(titlepage-theme.header-colorbox-bordercolor)$$titlepage-theme.header-colorbox-bordercolor$$else$black$endif$}{$if(titlepage-theme.header-colorbox-fill)$$titlepage-theme.header-colorbox-fill$$else$cyan$endif$}{ +\parbox[t]{0.90\minipagewidth}{ % Outer full width box +\parbox[t]{0.85\minipagewidth}{ % Inner box for inner right text margin +$if(titlepage-theme.header-align)$\titlepageheaderalign$else$$if(titlepage-theme.page-align)$\titlepagepagealign$endif$$endif$ +\vspace{0.7cm} + +\headerstyled + +\vspace{0.7cm} +}} % end of parboxes +} % fcolorbox +} % ensure fbox set is restricted +} +$endif$ +$if(titlepage-style-code.header.doublelinewide)$ +\newcommand{\titlepageheaderblock}{ +\rule{\textwidth}{0.4pt} % Thin horizontal rule +\vspace{0.1\textheight} % Whitespace between the top rules and title + +\headerstyled + +\vspace{0.025\textheight} +\rule{0.3\textwidth}{0.4pt} % Short horizontal rule under the title +} +$endif$ +$if(titlepage-style-code.header.doublelinetight)$ +\newcommand{\titlepageheaderblock}{ +\newcommand{\HRule}{\rule{\linewidth}{0.5mm}} + +\HRule\\[0.4cm] + +\headerstyled + +\HRule\\ +} +$endif$ + +$if(titlepage-style-code.footer.none)$ +\newcommand{\titlepagefooterblock}{} +$endif$ +$if(titlepage-style-code.footer.plain)$ +\newcommand{\titlepagefooterblock}{ +\footerstyled +} +$endif$ +$if(titlepage-style-code.footer.colorbox)$ +\newcommand{\titlepagefooterblock}{ +{\setlength{\fboxrule}{$if(titlepage-theme.footer-colorbox-borderwidth)$$titlepage-theme.footer-colorbox-borderwidth$$else$0pt$endif$} +\fcolorbox{$if(titlepage-theme.footer-colorbox-bordercolor)$$titlepage-theme.footer-colorbox-bordercolor$$else$black$endif$}{$if(titlepage-theme.footer-colorbox-fill)$$titlepage-theme.footer-colorbox-fill$$else$cyan$endif$}{ +\parbox[t]{0.90\minipagewidth}{ % Outer full width box +\parbox[t]{0.85\minipagewidth}{ % Inner box for inner right text margin +$if(titlepage-theme.footer-align)$\titlepagefooteralign$else$$if(titlepage-theme.page-align)$\titlepagepagealign$endif$$endif$ +\vspace{0.7cm} + +\footerstyled + +\vspace{0.7cm} +}} % end of parboxes +} % fcolorbox +} % ensure fbox set is restricted +} +$endif$ +$if(titlepage-style-code.footer.doublelinewide)$ +\newcommand{\titlepagefooterblock}{ +\rule{\textwidth}{0.4pt} % Thin horizontal rule +\vspace{0.1\textheight} % Whitespace between the top rules and title + +\footerstyled + +\vspace{0.025\textheight} +\rule{0.3\textwidth}{0.4pt} % Short horizontal rule under the title +} +$endif$ +$if(titlepage-style-code.footer.doublelinetight)$ +\newcommand{\titlepagefooterblock}{ +\newcommand{\HRule}{\rule{\linewidth}{0.5mm}} + +\HRule\\[0.4cm] + +\footerstyled + +\HRule\\ +} +$endif$ + +$if(titlepage-style-code.date.none)$ +\newcommand{\titlepagedateblock}{} +$endif$ +$if(titlepage-style-code.date.plain)$ +\newcommand{\titlepagedateblock}{ +\datestyled +} +$endif$ +$if(titlepage-style-code.date.colorbox)$ +\newcommand{\titlepagedateblock}{ +{\setlength{\fboxrule}{$if(titlepage-theme.date-colorbox-borderwidth)$$titlepage-theme.date-colorbox-borderwidth$$else$0pt$endif$} +\fcolorbox{$if(titlepage-theme.date-colorbox-bordercolor)$$titlepage-theme.date-colorbox-bordercolor$$else$black$endif$}{$if(titlepage-theme.date-colorbox-fill)$$titlepage-theme.date-colorbox-fill$$else$cyan$endif$}{ +\parbox[t]{0.90\minipagewidth}{ % Outer full width box +\parbox[t]{0.85\minipagewidth}{ % Inner box for inner right text margin +$if(titlepage-theme.date-align)$\titlepagedatealign$else$$if(titlepage-theme.page-align)$\titlepagepagealign$endif$$endif$ +\vspace{0.7cm} + +\datestyled + +\vspace{0.7cm} +}} % end of parboxes +} % fcolorbox +} % ensure fbox set is restricted +} +$endif$ +$if(titlepage-style-code.date.doublelinewide)$ +\newcommand{\titlepagedateblock}{ +\rule{\textwidth}{0.4pt} % Thin horizontal rule +\vspace{0.1\textheight} % Whitespace between the top rules and title + +\datestyled + +\vspace{0.025\textheight} +\rule{0.3\textwidth}{0.4pt} % Short horizontal rule under the title +} +$endif$ +$if(titlepage-style-code.date.doublelinetight)$ +\newcommand{\titlepagedateblock}{ +\newcommand{\HRule}{\rule{\linewidth}{0.5mm}} + +\HRule\\[0.4cm] + +\datestyled + +\HRule\\ +} +$endif$ + diff --git a/_extensions/nmfs-opensci/titlepage/_title-themes.tex b/_extensions/nmfs-opensci/titlepage/_title-themes.tex new file mode 100644 index 000000000..a1c5c390b --- /dev/null +++ b/_extensions/nmfs-opensci/titlepage/_title-themes.tex @@ -0,0 +1,68 @@ +$if(false)$ +% This file defines the author block and affiliation block +% based on the author and affiliation theme +% none, plain, colorbox, doublelined +$endif$ + +\newcommand{\titleandsubtitle}{ +% Title and subtitle +$if(title)$ +{$if(titlepage-theme.title-fontsize)$\fontsize{$titlepage-theme.title-fontsize$}{$titlepage-theme.title-spacing$}\selectfont +$endif$$if(titlepage-theme.title-color)$\textcolor{$titlepage-theme.title-color$}$endif${$for(titlepage-theme.title-fontstyle)$\$titlepage-theme.title-fontstyle${$endfor$\nohyphens{$title$}$for(titlepage-theme.title-fontstyle)$}$endfor$}\par +}% +$endif$ +$if(subtitle)$ + +\vspace{\betweentitlesubtitle} +{ +$if(titlepage-theme.subtitle-fontsize)$\fontsize{$titlepage-theme.subtitle-fontsize$}{$titlepage-theme.subtitle-spacing$}\selectfont +$endif$$if(titlepage-theme.subtitle-color)$\textcolor{$titlepage-theme.subtitle-color$}$endif${$for(titlepage-theme.subtitle-fontstyle)$\$titlepage-theme.subtitle-fontstyle${$endfor$\nohyphens{$subtitle$}$for(titlepage-theme.subtitle-fontstyle)$}$endfor$}\par +}$endif$} +$-- +$if(titlepage-style-code.title.none)$ +\newcommand{\titlepagetitleblock}{}$endif$ +$-- +$if(titlepage-style-code.title.plain)$ +\newcommand{\titlepagetitleblock}{ +\titleandsubtitle +} +$endif$ +$-- +$if(titlepage-style-code.title.colorbox)$ +\newcommand{\titlepagetitleblock}{ +{\setlength{\fboxrule}{$if(titlepage-theme.title-colorbox-borderwidth)$$titlepage-theme.title-colorbox-borderwidth$$else$0pt$endif$} +\fcolorbox{$if(titlepage-theme.title-colorbox-bordercolor)$$titlepage-theme.title-colorbox-bordercolor$$else$black$endif$}{$if(titlepage-theme.title-colorbox-fill)$$titlepage-theme.title-colorbox-fill$$else$cyan$endif$}{ +\parbox[t]{0.90\minipagewidth}{ % Outer full width box +\parbox[t]{0.85\minipagewidth}{ % Inner box for inner right text margin +$if(titlepage-theme.title-align)$\titlepagetitlealign$else$$if(titlepage-theme.page-align)$\titlepagepagealign$endif$$endif$ +\vspace{0.7cm} + +\titleandsubtitle + +\vspace{0.7cm} +}} % end of parboxes +} % fcolorbox +} % ensure fbox set is restricted +}$endif$ +$-- +$if(titlepage-style-code.title.doublelinewide)$ +\newcommand{\titlepagetitleblock}{ +\rule{\textwidth}{0.4pt} % Thin horizontal rule +\vspace{0.025\textheight} % Whitespace between the top rules and title + +\titleandsubtitle + +\vspace{0.025\textheight} +\rule{0.3\textwidth}{0.4pt} % Short horizontal rule under the title +}$endif$ +$-- +$if(titlepage-style-code.title.doublelinetight)$ +\newcommand{\titlepagetitleblock}{ +\newcommand{\HRule}{\rule{\linewidth}{0.5mm}} + +\HRule\\[0.4cm] + +\titleandsubtitle + +\HRule\\ +}$endif$ diff --git a/_extensions/nmfs-opensci/titlepage/_titlepage.tex b/_extensions/nmfs-opensci/titlepage/_titlepage.tex new file mode 100644 index 000000000..e2ebe6082 --- /dev/null +++ b/_extensions/nmfs-opensci/titlepage/_titlepage.tex @@ -0,0 +1,213 @@ +%%% TITLE PAGE START + +% Set up alignment commands +$if(titlepage-theme.page-align)$ +%Page +\newcommand{\titlepagepagealign}{ +\ifthenelse{\equal{$titlepage-theme.page-align$}{right}}{\raggedleft}{} +\ifthenelse{\equal{$titlepage-theme.page-align$}{center}}{\centering}{} +\ifthenelse{\equal{$titlepage-theme.page-align$}{left}}{\raggedright}{} +} +$endif$ +$if(titlepage-theme.title-align)$ +%% Titles +\newcommand{\titlepagetitlealign}{ +\ifthenelse{\equal{$titlepage-theme.title-align$}{right}}{\raggedleft}{} +\ifthenelse{\equal{$titlepage-theme.title-align$}{center}}{\centering}{} +\ifthenelse{\equal{$titlepage-theme.title-align$}{left}}{\raggedright}{} +\ifthenelse{\equal{$titlepage-theme.title-align$}{spread}}{\makebox[\linewidth][s]}{} +} +$endif$ +$if(titlepage-theme.author-align)$ +%Author +\newcommand{\titlepageauthoralign}{ +\ifthenelse{\equal{$titlepage-theme.author-align$}{right}}{\raggedleft}{} +\ifthenelse{\equal{$titlepage-theme.author-align$}{center}}{\centering}{} +\ifthenelse{\equal{$titlepage-theme.author-align$}{left}}{\raggedright}{} +\ifthenelse{\equal{$titlepage-theme.author-align$}{spread}}{\makebox[\linewidth][s]}{} +} +$endif$ +$if(titlepage-theme.affiliation-align)$ +%Affiliation +\newcommand{\titlepageaffiliationalign}{ +\ifthenelse{\equal{$titlepage-theme.affiliation-align$}{right}}{\raggedleft}{} +\ifthenelse{\equal{$titlepage-theme.affiliation-align$}{center}}{\centering}{} +\ifthenelse{\equal{$titlepage-theme.affiliation-align$}{left}}{\raggedright}{} +\ifthenelse{\equal{$titlepage-theme.affiliation-align$}{spread}}{\makebox[\linewidth][s]}{} +} +$endif$ +$if(titlepage-theme.footer-align)$ +%Footer +\newcommand{\titlepagefooteralign}{ +\ifthenelse{\equal{$titlepage-theme.footer-align$}{right}}{\raggedleft}{} +\ifthenelse{\equal{$titlepage-theme.footer-align$}{center}}{\centering}{} +\ifthenelse{\equal{$titlepage-theme.footer-align$}{left}}{\raggedright}{} +\ifthenelse{\equal{$titlepage-theme.footer-align$}{spread}}{\makebox[\linewidth][s]}{} +} +$endif$ +$if(titlepage-theme.header-align)$ +%Header +\newcommand{\titlepageheaderalign}{ +\ifthenelse{\equal{$titlepage-theme.header-align$}{right}}{\raggedleft}{} +\ifthenelse{\equal{$titlepage-theme.header-align$}{center}}{\centering}{} +\ifthenelse{\equal{$titlepage-theme.header-align$}{left}}{\raggedright}{} +\ifthenelse{\equal{$titlepage-theme.header-align$}{spread}}{\makebox[\linewidth][s]}{} +} +$endif$ +$if(titlepage-theme.logo-align)$ +%Logo +\newcommand{\titlepagelogoalign}{ +\ifthenelse{\equal{$titlepage-theme.logo-align$}{right}}{\raggedleft}{} +\ifthenelse{\equal{$titlepage-theme.logo-align$}{center}}{\centering}{} +\ifthenelse{\equal{$titlepage-theme.logo-align$}{left}}{\raggedright}{} +} +$endif$ +$if(titlepage-theme.date-align)$ +%% Titles +\newcommand{\titlepagedatealign}{ +\ifthenelse{\equal{$titlepage-theme.title-align$}{right}}{\raggedleft}{} +\ifthenelse{\equal{$titlepage-theme.title-align$}{center}}{\centering}{} +\ifthenelse{\equal{$titlepage-theme.title-align$}{left}}{\raggedright}{} +} +$endif$ + +$-- % Set up commands based on themes +$_title-themes.tex()$ +$_author-affiliation-themes.tex()$ +$_header-footer-date-themes.tex()$ + +%set up blocks so user can specify order +\newcommand{\titleblock}{$if(title)$ +$if(subtitle)$ +\newlength{\betweentitlesubtitle} +\setlength{\betweentitlesubtitle}{$if(titlepage-theme.title-subtitle-space-between)$$titlepage-theme.title-subtitle-space-between$$else$\baselineskip$endif$} +$endif$ +{$if(titlepage-theme.title-align)$\titlepagetitlealign$endif$ +$if(titlepage-theme.title-fontfamily)$\titlepagetitlefont$endif$ +{\titlepagetitleblock} +} + +\vspace{$if(titlepage-theme.title-space-after)$$titlepage-theme.title-space-after$$else$0pt$endif$} +$else$$endif$} + +\newcommand{\authorblock}{$if(author)$ +{$if(titlepage-theme.author-align)$\titlepageauthoralign +$endif$$if(titlepage-theme.author-fontfamily)$\titlepageauthorfont +$endif$\titlepageauthorblock} + +\vspace{$if(titlepage-theme.author-space-after)$$titlepage-theme.author-space-after$$else$0pt$endif$} +$else$$endif$} + +\newcommand{\affiliationblock}{$if(author)$ +$if(titlepage-theme.affiliation-color)$\textcolor{$titlepage-theme.affiliation-color$}$endif${$if(titlepage-theme.affiliation-align)$\titlepageaffiliationalign +$endif$$if(titlepage-theme.affiliation-fontfamily)$\titlepageaffiliationfont +$endif$\titlepageaffiliationblock} + +\vspace{$if(titlepage-theme.affiliation-space-after)$$titlepage-theme.affiliation-space-after$$else$0pt$endif$} +$else$$endif$} + +\newcommand{\logoblock}{$if(titlepage-logo)$ +{$if(titlepage-theme.logo-align)$\titlepagelogoalign +$endif$\includegraphics[width=$if(titlepage-theme.logo-size)$$titlepage-theme.logo-size$$else$0.2\textwidth$endif$]{$titlepage-logo$}} + +\vspace{$if(titlepage-theme.logo-space-after)$$titlepage-theme.logo-space-after$$else$0pt$endif$} +$else$$endif$} + +\newcommand{\footerblock}{$if(titlepage-footer)$ +$if(titlepage-theme.footer-color)$\textcolor{$titlepage-theme.footer-color$}$endif${$if(titlepage-theme.footer-align)$\titlepagefooteralign +$endif$$if(titlepage-theme.footer-fontfamily)$\titlepagefooterfont +$endif$\titlepagefooterblock} + +\vspace{$if(titlepage-theme.footer-space-after)$$titlepage-theme.footer-space-after$$else$0pt$endif$} +$else$$endif$} + +\newcommand{\dateblock}{$if(date)$ +$if(titlepage-theme.date-color)$\textcolor{$titlepage-theme.date-color$}$endif${$if(titlepage-theme.date-align)$\titlepagedatealign +$endif$$if(titlepage-theme.date-fontfamily)$\titlepagedatefont +$endif$\titlepagedateblock} + +\vspace{$if(titlepage-theme.date-space-after)$$titlepage-theme.date-space-after$$else$0pt$endif$} +$else$$endif$} + +\newcommand{\headerblock}{$if(titlepage-header)$ +$if(titlepage-theme.header-color)$\textcolor{$titlepage-theme.header-color$}$endif${$if(titlepage-theme.header-align)$\titlepageheaderalign +$endif$$if(titlepage-theme.header-fontfamily)$\titlepageheaderfont$endif$\titlepageheaderblock + +\vspace{$if(titlepage-theme.header-space-after)$$titlepage-theme.header-space-after$$else$0pt$endif$} +}$else$$endif$} +$-- +$if(titlepage-geometry)$ +\newgeometry{$for(titlepage-geometry)$$titlepage-geometry$$sep$,$endfor$} +$endif$ +$-- +$if(titlepage-theme.page-html-color)$ +\definecolor{pgcolor}{HTML}{$titlepage-theme.page-html-color$} +\pagecolor{pgcolor}\afterpage{\nopagecolor} +$else$$if(titlepage-theme.page-color)$ +\colorlet{pgcolor}{$titlepage-theme.page-color$} +\pagecolor{pgcolor}\afterpage{\nopagecolor} +$endif$$endif$ +$-- +$if(titlepage-bg-image)$ +% background image +\newlength{\bgimagesize} +\setlength{\bgimagesize}{$if(titlepage-theme.bg-image-size)$$titlepage-theme.bg-image-size$$else$\paperwidth$endif$} +\LENGTHDIVIDE{\bgimagesize}{\paperwidth}{\theRatio} % from calculator pkg +\This$if(titlepage-theme.bg-image-location)$$titlepage-theme.bg-image-location$$else$ULCorner$endif$WallPaper{\theRatio}{$titlepage-bg-image$} +$endif$ + +\thispagestyle{empty} % no page numbers on titlepages +$if(titlepage-theme.page-fontfamily)$\titlepagefont$endif$ + +$if(titlepage-theme.vrule-width)$ +$if(titlepage-theme.vrule-text)$ +\newcommand{\vrulecode}{\noindent\colorbox{$titlepage-theme.vrule-color$}{\begin{minipage}[b][0.99\textheight][c]{\vrulewidth}$if(titlepage-theme.vrule-text)$ +\centering\rotatebox{90}{\makebox[0.88\textheight][r]{$if(titlepage-theme.vrule-text-fontstyle)$$for(titlepage-theme.vrule-text-fontstyle)$\$titlepage-theme.vrule-text-fontstyle${$endfor$$endif$$if(titlepage-theme.vrule-text-color)$\color{$titlepage-theme.vrule-text-color$}$endif$$titlepage-theme.vrule-text$$for(titlepage-theme.vrule-text-fontstyle)$}$endfor$}}$endif$\end{minipage}}} +$else$ +\newcommand{\vrulecode}{\rule{\vrulewidth}{\textheight}} +$endif$ +\newlength{\vrulewidth} +\setlength{\vrulewidth}{$titlepage-theme.vrule-width$} +\newlength{\B} +\setlength{\B}{\ifdim\vrulewidth > 0pt $titlepage-theme.vrule-space$\else 0pt\fi} +\newlength{\minipagewidth} +\ifthenelse{\equal{$titlepage-theme.vrule-align$}{left} \OR \equal{$titlepage-theme.vrule-align$}{right} } +{% True case +\setlength{\minipagewidth}{\textwidth - \vrulewidth - \B - 0.1\textwidth} +}{ +\setlength{\minipagewidth}{\textwidth - 2\vrulewidth - 2\B - 0.1\textwidth} +} +$else$ +\newlength{\minipagewidth} +\setlength{\minipagewidth}{\textwidth} +$endif$ +$if(titlepage-theme.vrule-width)$ +\ifthenelse{\equal{$titlepage-theme.vrule-align$}{left} \OR \equal{$titlepage-theme.vrule-align$}{leftright}} +{% True case +\raggedleft % needed for the minipage to work +\vrulecode +\hspace{\B} +}{% +\raggedright % else it is right only and width is not 0 +} +$else$ +\raggedright % single minipage +$endif$ +% [position of box][box height][inner position]{width} +% [s] means stretch out vertically; assuming there is a vfill +\begin{minipage}[b][\textheight][s]{\minipagewidth} +$if(titlepage-theme.page-align)$\titlepagepagealign$endif$ +$for(titlepage-theme.elements)$ +$titlepage-theme.elements$ +$sep$ + +$endfor$ +\par + +\end{minipage}\ifthenelse{\equal{$titlepage-theme.vrule-align$}{right} \OR \equal{$titlepage-theme.vrule-align$}{leftright} }{ +\hspace{\B} +\vrulecode}{} +\clearpage +$if(titlepage-geometry)$\restoregeometry +$endif$%%% TITLE PAGE END + diff --git a/_extensions/nmfs-opensci/titlepage/before-body.tex b/_extensions/nmfs-opensci/titlepage/before-body.tex new file mode 100644 index 000000000..2f483ecf3 --- /dev/null +++ b/_extensions/nmfs-opensci/titlepage/before-body.tex @@ -0,0 +1,39 @@ +%%%%% begin titlepage extension code + +$if(has-frontmatter)$ + \begin{frontmatter} +$endif$ + +\begin{titlepage} +$-- % Coverpage +$if(coverpage-true)$ +$_coverpage.tex()$ +$endif$ +$if(coverpage-include-file)$ + +$for(coverpage-include-file)$\input{$coverpage-include-file$} +\clearpage +$endfor$$endif$ + +$-- % Titlepage +$if(titlepage-true)$ +$if(titlepage-file)$ +% Use the file +\input{$titlepage-filename$} +$else$ +$_titlepage.tex()$ +$endif$ +$endif$ +$if(titlepage-include-file)$ + +$for(titlepage-include-file)$\input{$titlepage-include-file$} +\clearpage +$endfor$$endif$ +\end{titlepage} +\setcounter{page}{1} +$if(has-frontmatter)$ +\end{frontmatter} +$endif$ + +%%%%% end titlepage extension code + diff --git a/_extensions/nmfs-opensci/titlepage/coverpage-theme.lua b/_extensions/nmfs-opensci/titlepage/coverpage-theme.lua new file mode 100644 index 000000000..82c09455a --- /dev/null +++ b/_extensions/nmfs-opensci/titlepage/coverpage-theme.lua @@ -0,0 +1,491 @@ +local function isEmpty(s) + return s == nil or s == '' +end + +local function file_exists(name) + local f=io.open(name,"r") + if f~=nil then io.close(f) return true else return false end +end + +local function getVal(s) + return pandoc.utils.stringify(s) +end + +function script_path() + local str = debug.getinfo(2, "S").source:sub(2) + return str:match("(.*/)") +end + +local function has_value (tab, val) + for index, value in ipairs(tab) do + if value == val then + return true + end + end + + return false +end + +local function dump(o) + if type(o) == 'table' then + local s = '{ ' + for k,v in pairs(o) do + if type(k) ~= 'number' then k = '"'..k..'"' end + s = s .. '['..k..'] = ' .. dump(v) .. ',' + end + return s .. '} ' + else + return tostring(o) + end +end + +local function table_concat(t1,t2) + for _,v in ipairs(t2) do table.insert(t1, v) end + return t1 +end + +function Meta(m) +--[[ +This function checks that the value the user set is ok and stops with an error message if no. +yamlelement: the yaml metadata. e.g. m["coverpage-theme"]["page-align"] +yamltext: page, how to print the yaml value in the error message. e.g. coverpage-theme: page-align +okvals: a text table of ok styles. e.g. {"right", "center"} +--]] + local function check_yaml (yamlelement, yamltext, okvals) + choice = pandoc.utils.stringify(yamlelement) + if not has_value(okvals, choice) then + print("\n\ntitlepage extension error: " .. yamltext .. " is set to " .. choice .. ". It can be " .. pandoc.utils.stringify(table.concat(okvals, ", ")) .. ".\n\n") + return false + else + return true + end + + return true + end + +--[[ +This function gets the value of something like coverpage-theme.title-style and sets a value coverpage-theme.title-style.plain (for example). It also +does error checking against okvals. "plain" is always ok and if no value is set then the style is set to plain. +page: titlepage or coverpage +styleelement: page, title, subtitle, header, footer, affiliation, date, etc +okvals: a text table of ok styles. e.g. {"plain", "two-column"} +--]] + local function set_style (page, styleelement, okvals) + yamltext = page .. "-theme" .. ": " .. styleelement .. "-style" + yamlelement = m[page .. "-theme"][styleelement .. "-style"] + if not isEmpty(yamlelement) then + ok = check_yaml (yamlelement, yamltext, okvals) + if ok then + m[page .. "-style-code"][styleelement] = {} + m[page .. "-style-code"][styleelement][getVal(yamlelement)] = true + else + error() + end + else + m[page .. "-style-code"][styleelement] = {} + m[page .. "-style-code"][styleelement]["plain"] = true + end + end + +--[[ +This function assigns the themevals to the meta data +--]] + local function assign_value (tab) + for i, value in pairs(tab) do + if isEmpty(m['coverpage-theme'][i]) then + m['coverpage-theme'][i] = value + end + end + + return m + end + + local coverpage_table = { + ["title"] = function (m) + themevals = { + ["page-align"] = "left", + ["title-style"] = "plain", + ["author-style"] = "none", + ["footer-style"] = "none", + ["header-style"] = "none", + ["date-style"] = "none", + } + assign_value(themevals) + + return m + end, + ["author"] = function (m) + themevals = { + ["page-align"] = "left", + ["title-style"] = "none", + ["author-style"] = "plain", + ["footer-style"] = "none", + ["header-style"] = "none", + ["date-style"] = "none", + } + assign_value(themevals) + + return m + end, + ["titleauthor"] = function (m) + themevals = { + ["page-align"] = "left", + ["title-style"] = "plain", + ["author-style"] = "plain", + ["footer-style"] = "none", + ["header-style"] = "none", + ["date-style"] = "none", + } + assign_value(themevals) + + return m + end, + ["true"] = function (m) + themevals = { + ["page-align"] = "left" + } + assign_value(themevals) + + return m + end, + ["great-wave"] = function (m) + themevals = { + ["page-align"] = "right", + ["title-style"] = "plain", + ["author-style"] = "none", + ["footer-style"] = "plain", + ["header-style"] = "none", + ["date-style"] = "none", + } + assign_value(themevals) + + return m + end, + ["otter"] = function (m) + themevals = { + ["page-align"] = "left", + ["title-style"] = "plain", + ["author-style"] = "plain", + ["footer-style"] = "none", + ["header-style"] = "none", + ["date-style"] = "none", + } + assign_value(themevals) + + return m + end, + } + + m['coverpage-file'] = false + if m.coverpage then + choice = pandoc.utils.stringify(m.coverpage) + okvals = {"none", "true", "title", "author", "titleauthor", "otter", "great-wave"} + isatheme = has_value (okvals, choice) + if not isatheme then + if not file_exists(choice) then + error("titlepage extension error: coverpage can be a tex file or one of the themes: " .. pandoc.utils.stringify(table.concat(okvals, ", ")) .. ".") + else + m['coverpage-file'] = true + m['coverpage-filename'] = choice + m['coverpage'] = "file" + end + else + ok = check_yaml (m.coverpage, "coverpage", okvals) + if not ok then error("") end + end + if not m['coverpage-file'] and choice ~= "none" then + m["coverpage-true"] = true + if isEmpty(m['coverpage-theme']) then + m['coverpage-theme'] = {} + end + coverpage_table[choice](m) -- add the theme defaults + end + if m['coverpage-file'] then + m["coverpage-true"] = true + if not isEmpty(m['coverpage-theme']) then + print("\n\ntitlepage extension message: since you passed in a static coverpage file, coverpage-theme is ignored.n\n") + end + end + if choice == "none" then + m["coverpage-true"] = false + end + else -- coverpage is false or not passed in + m["coverpage-true"] = false + m.coverpage = "none" + end + +-- Only for themes +-- coverpage-theme will exist if using a theme +if not m['coverpage-file'] and m['coverpage-true'] then + +--[[ +Set up the demos +--]] + choice = pandoc.utils.stringify(m.coverpage) + if choice == "great-wave" then + if isEmpty(m['coverpage-bg-image']) then +-- m['coverpage-bg-image'] = script_path().."images/TheGreatWaveoffKanagawa.jpeg" + m['coverpage-bg-image'] = "img/TheGreatWaveoffKanagawa.jpeg" + end + if isEmpty(m['coverpage-title']) then + m['coverpage-title'] = "quarto_titlepages" + end + if isEmpty(m['coverpage-footer']) then + m['coverpage-footer'] = "Templates for title pages and covers" + end + demovals = {["title-align"] = "right", ["title-fontsize"] = 40, ["title-fontfamily"] = "QTDublinIrish.otf", ["title-bottom"] = "10in", ["author-style"] = "none", ["footer-fontsize"] = 20, ["footer-fontfamily"] = "QTDublinIrish.otf", ["footer-align"] = "right", ["footer-bottom"] = "9.5in", ["page-html-color"] = "F6D5A8", ["bg-image-fading"] = "north"} + for dkey, val in pairs(demovals) do + if isEmpty(m['coverpage-theme'][dkey]) then + m['coverpage-theme'][dkey] = val + end + end + end + if choice == "otter" then + if isEmpty(m['coverpage-bg-image']) then +-- m['coverpage-bg-image'] = script_path().."images/otter-bar.jpeg" + m['coverpage-bg-image'] = "img/otter-bar.jpeg" + end + if isEmpty(m['coverpage-title']) then + m['coverpage-title'] = "Otters" + end + if isEmpty(m['coverpage-author']) then + m['coverpage-author'] = {"EE", "Holmes"} + end + demovals = {["title-color"] = "white", ["title-fontfamily"] = "QTDublinIrish.otf", ["title-fontsize"] = 100, ["author-fontstyle"] = {"textsc"}, ["author-sep"] = "newline", ["author-align"] = "right", ["author-fontsize"] = 30, ["author-bottom"] = "2in"} + for dkey, val in pairs(demovals) do + if isEmpty(m['coverpage-theme'][dkey]) then + m['coverpage-theme'][dkey] = val + end + end + end + +-- set the coverpage values unless user passed them in as coverpage-key + for key, val in pairs({"title", "author", "date"}) do + if isEmpty(m['coverpage-' .. val]) then + if not isEmpty(m[val]) then + m['coverpage-' .. val] = m[val] + end + end + end +-- make a bit more robust to whatever user passes in for coverpage-author + if not isEmpty(m['coverpage-author']) then + for key, val in pairs(m['coverpage-author']) do + m['coverpage-author'][key] = getVal(m['coverpage-author'][key]) + end + end + +-- fix "true" to figure out what was passed in + if choice == "true" then + for key, val in pairs({"title", "author", "footer", "header", "date"}) do + if not isEmpty(m['coverpage-' .. val]) then + if isEmpty(m['coverpage-theme'][val .. "-style"]) then + m['coverpage-theme'][val .. "-style"] = "plain" + end + else + m['coverpage-theme'][val .. "-style"] = "none" + end + end + end + + +--[[ +Error checking and setting the style codes +--]] + -- Style codes + m["coverpage-style-code"] = {} + okvals = {"none", "plain"} + set_style("coverpage", "title", okvals) + set_style("coverpage", "footer", okvals) + set_style("coverpage", "header", okvals) + set_style("coverpage", "author", okvals) + set_style("coverpage", "date", okvals) + + if isEmpty(m['coverpage-bg-image']) then + m['coverpage-bg-image'] = "none" -- need for stringify to work + end + choice = pandoc.utils.stringify(m['coverpage-bg-image']) + if choice == "none" then + m['coverpage-bg-image'] = false + else + m['coverpage-theme']['bg-image-anchor'] = "south west" -- fixed + image_table = {["bottom"] = 0.0, ["left"] = 0.0, ["rotate"] = 0.0, ["opacity"] = 1.0} + for key, val in pairs(image_table) do + if isEmpty(m['coverpage-theme']['bg-image-' .. key]) then + m['coverpage-theme']['bg-image-' .. key] = val + end + end + if isEmpty(m['coverpage-theme']['bg-image-size']) then + m['coverpage-theme']['bg-image-size'] = pandoc.MetaInlines{ + pandoc.RawInline("latex","\\paperwidth")} + end + if not isEmpty(m['coverpage-theme']['bg-image-fading']) then + okvals = {"top", "bottom", "left", "right", "north", "south", "east", "west", "fadeout" } + ok = check_yaml (m["coverpage-theme"]["bg-image-fading"], "coverpage-theme: bg-image-fading", okvals) + if not ok then error("") end + if getVal(m['coverpage-theme']['bg-image-fading']) == "left" then m['coverpage-theme']['bg-image-fading'] = "west" end + if getVal(m['coverpage-theme']['bg-image-fading']) == "right" then m['coverpage-theme']['bg-image-fading'] = "east" end + if getVal(m['coverpage-theme']['bg-image-fading']) == "top" then m['coverpage-theme']['bg-image-fading'] = "north" end + if getVal(m['coverpage-theme']['bg-image-fading']) == "bottom" then m['coverpage-theme']['bg-image-fading'] = "south" end + end + end -- bg-image attributes + if m['coverpage-bg-image'] then -- not false + choice = pandoc.utils.stringify(m['coverpage-bg-image']) + if not file_exists(choice) then + error("\n\ntitlepage extension error: coverpage-bg-image file " .. choice .. " cannot be opened. Is the file path and name correct? Using a demo? Demo options are great-wave and otter.\n\n") + end + end + +--[[ +Set the fontsize spacing defaults +if page-fontsize was passed in or if fontsize passed in but not spacing +--]] + + -- if not passed in then it will take page-fontsize and page-spacing + for key, val in pairs({"title", "author", "footer", "header", "date"}) do + if getVal(m["coverpage-theme"][val .. "-style"]) ~= "none" then + if not isEmpty(m["coverpage-theme"]["page-fontsize"]) then + if isEmpty(m["coverpage-theme"][val .. "-fontsize"]) then + m["coverpage-theme"][val .. "-fontsize"] = getVal(m["coverpage-theme"]["page-fontsize"]) + end + end + end + end + -- make sure spacing is set if user passed in fontsize + for key, val in pairs({"page", "title", "author", "footer", "header", "date"}) do + if not isEmpty(m['coverpage-theme'][val .. "-fontsize"]) then + if isEmpty(m['coverpage-theme'][val .. "-spacing"]) then + m['coverpage-theme'][val .. "-spacing"] = 1.2*getVal(m['coverpage-theme'][val .. "-fontsize"]) + end + end + end + +--[[ +Set author sep character +--]] + if isEmpty(m['coverpage-theme']["author-sep"]) then + m['coverpage-theme']["author-sep"] = pandoc.MetaInlines{ + pandoc.RawInline("latex",", ")} + end + if getVal(m['coverpage-theme']["author-sep"]) == "newline" then + m['coverpage-theme']["author-sep"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","\\\\")} + end + +--[[ +Set affiliation sep character +--]] + if isEmpty(m['coverpage-theme']["affiliation-sep"]) then + m['coverpage-theme']["affiliation-sep"] = pandoc.MetaInlines{ + pandoc.RawInline("latex",",~")} + end + if getVal(m['coverpage-theme']["affiliation-sep"]) == "newline" then + m['coverpage-theme']["affiliation-sep"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","\\\\")} + end + +--[[ +Set the defaults for the coverpage alignments +default coverpage alignment is left +because coverpage uses tikzpicture, the alignments of the elements must be set +--]] + if isEmpty(m['coverpage-theme']["page-align"]) then + m['coverpage-theme']["page-align"] = "left" + end + for key, val in pairs({"page", "title", "author", "footer", "header", "logo", "date"}) do + if not isEmpty(m["coverpage-theme"][val .. "-align"]) then + okvals = {"right", "left", "center"} + if has_value({"title", "author", "footer", "header", "date"}, val) then table.insert(okvals, "spread") end + ok = check_yaml (m["coverpage-theme"][val .. "-align"], "coverpage-theme: " .. val .. "-align", okvals) + if not ok then error("") end + else + m["coverpage-theme"][val .. "-align"] = getVal(m['coverpage-theme']["page-align"]) + end + end + +--[[ +Set left and width alignments, bottom distance and rotation +--]] + for key, val in pairs({"title", "author", "footer", "header", "date"}) do + if m['coverpage-theme'][val .. "-style"] ~= "none" then + if getVal(m['coverpage-theme'][val .. "-align"]) == "left" then + m['coverpage-theme'][val .. "-anchor"] = "north west" -- not user controlled + if isEmpty(m['coverpage-theme'][val .. "-left"]) then + m['coverpage-theme'][val .. '-left'] = pandoc.MetaInlines{ + pandoc.RawInline("latex", "0.2\\paperwidth")} + if isEmpty(m['coverpage-theme'][val .. '-width']) then + m['coverpage-theme'][val .. '-width'] = pandoc.MetaInlines{ + pandoc.RawInline("latex", "0.7\\paperwidth")} + end + else + if isEmpty(m['coverpage-theme'][val .. '-width']) then + error("titlepage extension error: if you specify coverpage-theme "..val.."-left, you must also specify "..val.."-width.") + end + end + end -- left + if getVal(m['coverpage-theme'][val .. '-align']) == "right" then + m['coverpage-theme'][val .. '-anchor'] = "north east" -- not user controlled + if isEmpty(m['coverpage-theme'][val .. '-left']) then + m['coverpage-theme'][val .. '-left'] = pandoc.MetaInlines{ + pandoc.RawInline("latex", "0.8\\paperwidth")} + if isEmpty(m['coverpage-theme'][val .. '-width']) then + m['coverpage-theme'][val .. '-width'] = pandoc.MetaInlines{ + pandoc.RawInline("latex", "0.7\\paperwidth")} + end + else + if isEmpty(m['coverpage-theme'][val .. '-width']) then + error("titlepage extension error: if you specify coverpage-theme "..val.."-left, you must also specify "..val.."-width.") + end + end + end -- right + if getVal(m['coverpage-theme'][val .. '-align']) == "center" then + m['coverpage-theme'][val .. '-anchor'] = "north" -- not user controlled + if isEmpty(m['coverpage-theme'][val .. '-left']) then + m['coverpage-theme'][val .. '-left'] = pandoc.MetaInlines{ + pandoc.RawInline("latex", "0.5\\paperwidth")} + if isEmpty(m['coverpage-theme'][val .. '-width']) then + m['coverpage-theme'][val .. '-width'] = pandoc.MetaInlines{ + pandoc.RawInline("latex", "0.8\\paperwidth")} + end + else + if isEmpty(m['coverpage-theme'][val .. '-width']) then + error("titlepage extension error: if you specify coverpage-theme "..val.."-left, you must also specify "..val.."-width.") + end + end + end -- center + -- Set the bottom distances + bottom_table = {["title"] = pandoc.MetaInlines{ + pandoc.RawInline("latex", "0.8\\paperheight")}, ["author"] = pandoc.MetaInlines{ + pandoc.RawInline("latex", "0.25\\paperheight")}, ["footer"] = pandoc.MetaInlines{ + pandoc.RawInline("latex", "0.1\\paperheight")}, ["header"] = pandoc.MetaInlines{ + pandoc.RawInline("latex", "0.9\\paperheight")}, ["date"] = pandoc.MetaInlines{ + pandoc.RawInline("latex", "0.05\\paperheight")}} + for bkey, bval in pairs(bottom_table) do + if isEmpty(m['coverpage-theme'][bkey .. '-bottom']) then + m['coverpage-theme'][bkey .. '-bottom'] = bval + end + end -- bottom distance + -- set rotation + if isEmpty(m['coverpage-theme'][val .. '-rotate']) then + m['coverpage-theme'][val .. '-rotate'] = 0 + end -- rotate + end -- if style not none + end -- for loop + + +--[[ +Set logo defaults +--]] + if not isEmpty(m['coverpage-logo']) then + if isEmpty(m['coverpage-theme']["logo-size"]) then + m['coverpage-theme']["logo-size"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","0.2\\paperwidth")} + end + end + +end -- end the theme section + + return m + +end + + diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/COPYING-QUALITYPE b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/COPYING-QUALITYPE new file mode 100644 index 000000000..be6400ccc --- /dev/null +++ b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/COPYING-QUALITYPE @@ -0,0 +1,449 @@ +Copyright (c) 1992 QualiType. + +These fonts are distributed, at your option, under the terms of the SIL Open +Font License (https://scripts.sil.org/OFL) or the GNU General Public License +(https://www.gnu.org/licenses/gpl-2.0.txt), either version 2.0 or, at your +option, any later version. As a special exception, if you create a document +that uses this font, and embed this font or unaltered portions of this font in +the document, this font does not by itself cause the resulting document to be +covered by the GNU General Public License. This exception does not, however, +invalidate any other reasons why the document might be covered by the GNU +General Public License. + +*** + +This Font Software is licensed under the SIL Open Font License, Version 1.1. + +This license is copied below, and is also available with a FAQ at: +http://scripts.sil.org/OFL + +SIL OPEN FONT LICENSE + +Version 1.1 – 26 February 2007 + +PREAMBLE + +The goals of the Open Font License (OFL) are to stimulate worldwide +development of collaborative font projects, to support the font creation +efforts of academic and linguistic communities, and to provide a free and open +framework in which fonts may be shared and improved in partnership with +others. + +The OFL allows the licensed fonts to be used, studied, modified and +redistributed freely as long as they are not sold by themselves. The fonts, +including any derivative works, can be bundled, embedded, redistributed and/or +sold with any software provided that any reserved names are not used by +derivative works. The fonts and derivatives, however, cannot be released under +any other type of license. The requirement for fonts to remain under this +license does not apply to any document created using the fonts or their +derivatives. + +DEFINITIONS + +“Font Software” refers to the set of files released by the Copyright Holder(s) +under this license and clearly marked as such. This may include source files, +build scripts and documentation. + +“Reserved Font Name” refers to any names specified as such after the copyright +statement(s). + +“Original Version” refers to the collection of Font Software components as +distributed by the Copyright Holder(s). + +“Modified Version” refers to any derivative made by adding to, deleting, or +substituting — in part or in whole — any of the components of the Original +Version, by changing formats or by porting the Font Software to a new +environment. + +“Author” refers to any designer, engineer, programmer, technical writer or +other person who contributed to the Font Software. + +PERMISSION & CONDITIONS + +Permission is hereby granted, free of charge, to any person obtaining a copy +of the Font Software, to use, study, copy, merge, embed, modify, redistribute, +and sell modified and unmodified copies of the Font Software, subject to the +following conditions: + +1) Neither the Font Software nor any of its individual components, in Original +or Modified Versions, may be sold by itself. + +2) Original or Modified Versions of the Font Software may be +bundled,redistributed and/or sold with any software, provided that each copy +contains the above copyright notice and this license. These can be included +either as stand-alone text files, human-readable headers or in the appropriate +machine-readable metadata fields within text or binary files as long as those +fields can be easily viewed by the user. + +3) No Modified Version of the Font Software may use the Reserved Font Νame(s) +unless explicit written permission is granted by the corresponding Copyright +Holder. This restriction only applies to the primary font name as presented to +the users. + +4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font +Software shall not be used to promote, endorse or advertise any Modified +Version, except to acknowledge the contribution(s) of the Copyright Holder(s) +and the Author(s) or with their explicit written permission. + +5) The Font Software, modified or unmodified, in part or in whole, must be +distributed entirely under this license, and must not be distributed under any +other license. The requirement for fonts to remain under this license does not +apply to any document created using the Font Software. + +TERMINATION + +This license becomes null and void if any of the above conditions are not met. + +DISCLAIMER + +THE FONT SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF COPYRIGHT, PATENT, +TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE +FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, INCLUDING ANY GENERAL, SPECIAL, +INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, WHETHER IN AN ACTION OF +CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF THE USE OR INABILITY TO USE +THE FONT SOFTWARE OR FROM OTHER DEALINGS IN THE FONT SOFTWARE. + +*** + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. + diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/README b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/README new file mode 100644 index 000000000..15363a651 --- /dev/null +++ b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/README @@ -0,0 +1,10 @@ +QualiType font collection +Copyright (c) 1992 QualiType +Version 2019-12-26 +*** +These 45 fonts were created by QualiType. With the kind permisison of John +Colletti, these fonts have been released as free and open-source. The fonts +are usable under the SIL OFL 1.1 or the GNU GPL 2.0 (or later, at your option) +with a font exception. See COPYING for more details. + +This package is maintained by Daniel Benjamin Miller diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/doc/qualitype-doc.pdf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/doc/qualitype-doc.pdf new file mode 100644 index 000000000..92fa054a2 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/doc/qualitype-doc.pdf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/doc/qualitype-doc.tex b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/doc/qualitype-doc.tex new file mode 100644 index 000000000..862be3eb8 --- /dev/null +++ b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/doc/qualitype-doc.tex @@ -0,0 +1,18 @@ +\documentclass[12pt]{article} +\usepackage{fontspec} +\setmainfont{QTCaslan} +\usepackage{microtype} +\author{Daniel Benjamin Miller} +\title{The QualiType font collection} +\begin{document} +\maketitle +\section{Description} +These 45 fonts were created by QualiType. With the kind permisison of John +Colletti, these fonts have been released as free and open-source. The fonts +are usable under the SIL OFL 1.1 or the GNU GPL 2.0 (or later, at your option) +with a font exception. See \texttt{COPYING} for more details. The fonts do not include small capitals, old-style figures or any other OpenType goodies. They are released in part with future extension in mind. The \texttt{qualitype} package will only carry the OpenType versions of the original 1992 fonts; improvements should be given a new name and a corresponding separate package. +\section{Fonts Included} +\begin{itemize} +\item QTAbbie\item QTAgateType-Bold\item QTAgateType-Italic\item QTAgateType\item QTAncientOlive-Bold\item QTAncientOlive\item QTAntiquePost\item QTArabian\item QTArnieB\item QTArtiston\item QTAtchen\item QTAvanti-Italic\item QTAvanti\item QTBasker-Bold\item QTBasker-Italic\item QTBasker\item QTBeckman\item QTBengal-Bold\item QTBengal\item QTBlackForest\item QTBlimpo\item QTBodini-Bold\item QTBodini-Italic\item QTBodini\item QTBodiniPoster-Italic\item QTBodiniPoster\item QTBookmann-Bold\item QTBookmann-BoldItalic\item QTBookmann-Italic\item QTBookmann\item QTBoulevard\item QTBrushStroke\item QTCaligulatype\item QTCanaithtype\item QTCascadetype\item QTCaslan-Bold\item QTCaslan-BoldItalic\item QTCaslan-Italic\item QTCaslan\item QTCaslanOpen\item QTCasual\item QTChanceryType-Bold\item QTChanceryType-Italic\item QTChanceryType\item QTChicagoland\item QTClaytablet\item QTCloisteredMonk\item QTCoronation\item QTDeuce\item QTDingBits\item QTDoghaus\item QTDoghausHeavy\item QTDoghausLight\item QTDublinIrish\item QTEraType-Bold\item QTEraType\item QTEurotype-Bold\item QTEurotype\item QTFloraline-Bold\item QTFloraline\item QTFlorencia\item QTFraktur\item QTFrank\item QTFrankHeavy\item QTFrizQuad-Bold\item QTFrizQuad\item QTFuture-Italic\item QTFuture\item QTFuturePoster\item QTGaromand-Bold\item QTGaromand-BoldItalic\item QTGaromand-Italic\item QTGaromand\item QTGhoulFace\item QTGraphLite\item QTGraveure-Bold\item QTGraveure\item QTGreece\item QTHandwriting\item QTHeidelbergType\item QTHelvet-Black\item QTHelvet-BoldOutline\item QTHelvetCnd-Black\item QTHelvetCnd-Light\item QTHelvetCnd\item QTHoboken\item QTHowardType\item QTHowardTypeFat\item QTImpromptu\item QTJupiter\item QTKooper-Italic\item QTKooper\item QTKorrin-Italic\item QTKorrin\item QTKung-Fu\item QTLautrecType\item QTLetterGoth-Bold\item QTLetterGoth-BoldItalic\item QTLetterGoth-Italic\item QTLetterGoth\item QTLinoscroll\item QTLinostroke\item QTLondonScroll\item QTMagicMarker\item QTMerryScript\item QTMilitary\item QTOKCorral-Cnd\item QTOKCorral-Ext\item QTOKCorral\item QTOldGoudy-Bold\item QTOldGoudy-Italic\item QTOldGoudy\item QTOptimum-Bold\item QTOptimum-BoldItalic\item QTOptimum-Italic\item QTOptimum\item QTPalatine-Bold\item QTPalatine-Italic\item QTPalatine\item QTPandora\item QTParisFrance\item QTPeignoir-Lite\item QTPeignoir\item QTPiltdown\item QTPristine-Bold\item QTPristine-BoldItalic\item QTPristine-Italic\item QTPristine\item QTRobotic2000\item QTSanDiego\item QTSchoolCentury-Bold\item QTSchoolCentury-BoldItalic\item QTSchoolCentury-Italic\item QTSchoolCentury\item QTSlogantype\item QTSnowCaps\item QTStoryTimeCaps\item QTTechtone-Bold\item QTTechtone-BoldItalic\item QTTechtone-Italic\item QTTechtone\item QTTheatre\item QTTimeOutline\item QTTumbleweed\item QTUSA-Uncial\item QTVagaRound-Bold\item QTVagaRound\item QTWeise-Bold\item QTWeise-Italic\item QTWeise\item QTWestEnd +\end{itemize} +\end{document} diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAbbie.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAbbie.otf new file mode 100644 index 000000000..53c0879c7 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAbbie.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAgateType-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAgateType-Bold.otf new file mode 100644 index 000000000..2dc04f47f Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAgateType-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAgateType-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAgateType-Italic.otf new file mode 100644 index 000000000..906e6e098 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAgateType-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAgateType.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAgateType.otf new file mode 100644 index 000000000..c823a6eca Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAgateType.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAncientOlive-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAncientOlive-Bold.otf new file mode 100644 index 000000000..524ed4d2c Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAncientOlive-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAncientOlive.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAncientOlive.otf new file mode 100644 index 000000000..1bcbf2464 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAncientOlive.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAntiquePost.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAntiquePost.otf new file mode 100644 index 000000000..a414578c9 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAntiquePost.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTArabian.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTArabian.otf new file mode 100644 index 000000000..68a396002 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTArabian.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTArnieB.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTArnieB.otf new file mode 100644 index 000000000..5f5c3ebc7 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTArnieB.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTArtiston.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTArtiston.otf new file mode 100644 index 000000000..e2e5a8cb1 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTArtiston.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAtchen.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAtchen.otf new file mode 100644 index 000000000..6038a6f33 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAtchen.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAvanti-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAvanti-Italic.otf new file mode 100644 index 000000000..db4683846 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAvanti-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAvanti.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAvanti.otf new file mode 100644 index 000000000..7fc1f35f1 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTAvanti.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBasker-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBasker-Bold.otf new file mode 100644 index 000000000..d3cc207dc Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBasker-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBasker-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBasker-Italic.otf new file mode 100644 index 000000000..328295ab8 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBasker-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBasker.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBasker.otf new file mode 100644 index 000000000..7408c8dda Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBasker.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBeckman.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBeckman.otf new file mode 100644 index 000000000..e5fec165c Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBeckman.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBengal-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBengal-Bold.otf new file mode 100644 index 000000000..09561816c Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBengal-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBengal.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBengal.otf new file mode 100644 index 000000000..bbb84d35f Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBengal.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBlackForest.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBlackForest.otf new file mode 100644 index 000000000..dfe335953 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBlackForest.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBlimpo.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBlimpo.otf new file mode 100644 index 000000000..fa2c6443b Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBlimpo.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBodini-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBodini-Bold.otf new file mode 100644 index 000000000..e8fbb8669 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBodini-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBodini-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBodini-Italic.otf new file mode 100644 index 000000000..7e4b55377 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBodini-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBodini.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBodini.otf new file mode 100644 index 000000000..129f01ce1 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBodini.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBodiniPoster-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBodiniPoster-Italic.otf new file mode 100644 index 000000000..4ce6720b1 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBodiniPoster-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBodiniPoster.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBodiniPoster.otf new file mode 100644 index 000000000..ce4c4476c Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBodiniPoster.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBookmann-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBookmann-Bold.otf new file mode 100644 index 000000000..921f7a4f4 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBookmann-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBookmann-BoldItalic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBookmann-BoldItalic.otf new file mode 100644 index 000000000..e302cd3da Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBookmann-BoldItalic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBookmann-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBookmann-Italic.otf new file mode 100644 index 000000000..8c3590201 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBookmann-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBookmann.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBookmann.otf new file mode 100644 index 000000000..5ae056806 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBookmann.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBoulevard.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBoulevard.otf new file mode 100644 index 000000000..84b84fc3c Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBoulevard.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBrushStroke.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBrushStroke.otf new file mode 100644 index 000000000..1195d8ea9 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTBrushStroke.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCaligulatype.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCaligulatype.otf new file mode 100644 index 000000000..a4449763a Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCaligulatype.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCanaithtype.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCanaithtype.otf new file mode 100644 index 000000000..8d12667e6 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCanaithtype.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCascadetype.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCascadetype.otf new file mode 100644 index 000000000..0763ac053 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCascadetype.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCaslan-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCaslan-Bold.otf new file mode 100644 index 000000000..2ca89a550 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCaslan-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCaslan-BoldItalic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCaslan-BoldItalic.otf new file mode 100644 index 000000000..d4564e472 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCaslan-BoldItalic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCaslan-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCaslan-Italic.otf new file mode 100644 index 000000000..ec7ba1dfc Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCaslan-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCaslan.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCaslan.otf new file mode 100644 index 000000000..37a91b247 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCaslan.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCaslanOpen.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCaslanOpen.otf new file mode 100644 index 000000000..15cda142d Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCaslanOpen.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCasual.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCasual.otf new file mode 100644 index 000000000..a0fe65852 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCasual.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTChanceryType-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTChanceryType-Bold.otf new file mode 100644 index 000000000..1cf640339 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTChanceryType-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTChanceryType-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTChanceryType-Italic.otf new file mode 100644 index 000000000..eb568dfe7 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTChanceryType-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTChanceryType.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTChanceryType.otf new file mode 100644 index 000000000..bdc24a22c Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTChanceryType.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTChicagoland.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTChicagoland.otf new file mode 100644 index 000000000..06cf15501 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTChicagoland.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTClaytablet.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTClaytablet.otf new file mode 100644 index 000000000..e328ea2cc Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTClaytablet.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCloisteredMonk.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCloisteredMonk.otf new file mode 100644 index 000000000..77362bfe1 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCloisteredMonk.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCoronation.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCoronation.otf new file mode 100644 index 000000000..be310fde1 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTCoronation.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTDeuce.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTDeuce.otf new file mode 100644 index 000000000..4aa326da0 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTDeuce.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTDingBits.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTDingBits.otf new file mode 100644 index 000000000..cf30c494b Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTDingBits.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTDoghaus.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTDoghaus.otf new file mode 100644 index 000000000..672c24068 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTDoghaus.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTDoghausHeavy.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTDoghausHeavy.otf new file mode 100644 index 000000000..6b0b90a77 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTDoghausHeavy.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTDoghausLight.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTDoghausLight.otf new file mode 100644 index 000000000..2157fa4df Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTDoghausLight.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTDublinIrish.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTDublinIrish.otf new file mode 100644 index 000000000..6067986ec Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTDublinIrish.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTEraType-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTEraType-Bold.otf new file mode 100644 index 000000000..7a1e3d066 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTEraType-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTEraType.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTEraType.otf new file mode 100644 index 000000000..c2b2c4d33 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTEraType.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTEurotype-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTEurotype-Bold.otf new file mode 100644 index 000000000..81208f74b Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTEurotype-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTEurotype.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTEurotype.otf new file mode 100644 index 000000000..2209ab5fa Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTEurotype.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFloraline-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFloraline-Bold.otf new file mode 100644 index 000000000..5cee6ddb0 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFloraline-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFloraline.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFloraline.otf new file mode 100644 index 000000000..a233a2af6 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFloraline.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFlorencia.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFlorencia.otf new file mode 100644 index 000000000..d6d27a137 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFlorencia.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFraktur.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFraktur.otf new file mode 100644 index 000000000..baf6b9e15 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFraktur.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFrank.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFrank.otf new file mode 100644 index 000000000..fa84ecabc Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFrank.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFrankHeavy.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFrankHeavy.otf new file mode 100644 index 000000000..0c312e510 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFrankHeavy.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFrizQuad-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFrizQuad-Bold.otf new file mode 100644 index 000000000..44d7a56da Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFrizQuad-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFrizQuad.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFrizQuad.otf new file mode 100644 index 000000000..8efa492eb Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFrizQuad.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFuture-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFuture-Italic.otf new file mode 100644 index 000000000..7a3ff8245 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFuture-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFuture.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFuture.otf new file mode 100644 index 000000000..3d232d7b9 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFuture.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFuturePoster.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFuturePoster.otf new file mode 100644 index 000000000..a8e41fa5c Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTFuturePoster.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGaromand-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGaromand-Bold.otf new file mode 100644 index 000000000..a971e0566 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGaromand-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGaromand-BoldItalic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGaromand-BoldItalic.otf new file mode 100644 index 000000000..e867c830b Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGaromand-BoldItalic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGaromand-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGaromand-Italic.otf new file mode 100644 index 000000000..70858a764 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGaromand-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGaromand.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGaromand.otf new file mode 100644 index 000000000..400e880a6 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGaromand.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGhoulFace.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGhoulFace.otf new file mode 100644 index 000000000..5625f1997 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGhoulFace.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGraphLite.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGraphLite.otf new file mode 100644 index 000000000..3fe3aff5d Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGraphLite.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGraveure-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGraveure-Bold.otf new file mode 100644 index 000000000..cc9d2e175 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGraveure-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGraveure.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGraveure.otf new file mode 100644 index 000000000..ce0fabac5 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGraveure.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGreece.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGreece.otf new file mode 100644 index 000000000..82377f544 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTGreece.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHandwriting.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHandwriting.otf new file mode 100644 index 000000000..070a4c345 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHandwriting.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHeidelbergType.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHeidelbergType.otf new file mode 100644 index 000000000..69bdc238a Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHeidelbergType.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHelvet-Black.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHelvet-Black.otf new file mode 100644 index 000000000..f28031470 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHelvet-Black.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHelvet-BoldOutline.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHelvet-BoldOutline.otf new file mode 100644 index 000000000..4ea293ce4 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHelvet-BoldOutline.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHelvetCnd-Black.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHelvetCnd-Black.otf new file mode 100644 index 000000000..d7cd3fc83 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHelvetCnd-Black.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHelvetCnd-Light.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHelvetCnd-Light.otf new file mode 100644 index 000000000..01443e692 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHelvetCnd-Light.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHelvetCnd.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHelvetCnd.otf new file mode 100644 index 000000000..2a7b96d4b Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHelvetCnd.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHoboken.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHoboken.otf new file mode 100644 index 000000000..17abd2356 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHoboken.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHowardType.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHowardType.otf new file mode 100644 index 000000000..67863e760 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHowardType.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHowardTypeFat.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHowardTypeFat.otf new file mode 100644 index 000000000..22b90b736 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTHowardTypeFat.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTImpromptu.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTImpromptu.otf new file mode 100644 index 000000000..052f075ef Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTImpromptu.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTJupiter.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTJupiter.otf new file mode 100644 index 000000000..d9eb343a7 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTJupiter.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTKooper-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTKooper-Italic.otf new file mode 100644 index 000000000..76c9b082a Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTKooper-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTKooper.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTKooper.otf new file mode 100644 index 000000000..b95216eab Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTKooper.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTKorrin-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTKorrin-Italic.otf new file mode 100644 index 000000000..46e0ceef1 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTKorrin-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTKorrin.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTKorrin.otf new file mode 100644 index 000000000..60d0d1e7f Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTKorrin.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTKung-Fu.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTKung-Fu.otf new file mode 100644 index 000000000..1d73fdd7e Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTKung-Fu.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLautrecType.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLautrecType.otf new file mode 100644 index 000000000..0cd067b4b Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLautrecType.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLetterGoth-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLetterGoth-Bold.otf new file mode 100644 index 000000000..b1f679843 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLetterGoth-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLetterGoth-BoldItalic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLetterGoth-BoldItalic.otf new file mode 100644 index 000000000..c5f171dcf Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLetterGoth-BoldItalic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLetterGoth-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLetterGoth-Italic.otf new file mode 100644 index 000000000..392368aaf Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLetterGoth-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLetterGoth.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLetterGoth.otf new file mode 100644 index 000000000..d55c8783c Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLetterGoth.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLinoscroll.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLinoscroll.otf new file mode 100644 index 000000000..1024dbb53 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLinoscroll.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLinostroke.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLinostroke.otf new file mode 100644 index 000000000..0a7ab7514 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLinostroke.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLondonScroll.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLondonScroll.otf new file mode 100644 index 000000000..37e39656c Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTLondonScroll.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTMagicMarker.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTMagicMarker.otf new file mode 100644 index 000000000..daf24942c Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTMagicMarker.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTMerryScript.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTMerryScript.otf new file mode 100644 index 000000000..bfae7a5c9 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTMerryScript.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTMilitary.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTMilitary.otf new file mode 100644 index 000000000..b3a06a5b7 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTMilitary.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOKCorral-Cnd.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOKCorral-Cnd.otf new file mode 100644 index 000000000..1dfebd070 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOKCorral-Cnd.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOKCorral-Ext.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOKCorral-Ext.otf new file mode 100644 index 000000000..33cff4737 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOKCorral-Ext.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOKCorral.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOKCorral.otf new file mode 100644 index 000000000..fbbabccdc Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOKCorral.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOldGoudy-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOldGoudy-Bold.otf new file mode 100644 index 000000000..40c3bb4c5 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOldGoudy-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOldGoudy-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOldGoudy-Italic.otf new file mode 100644 index 000000000..6cc6233bc Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOldGoudy-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOldGoudy.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOldGoudy.otf new file mode 100644 index 000000000..462fca382 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOldGoudy.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOptimum-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOptimum-Bold.otf new file mode 100644 index 000000000..24adcf81b Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOptimum-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOptimum-BoldItalic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOptimum-BoldItalic.otf new file mode 100644 index 000000000..c5f373125 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOptimum-BoldItalic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOptimum-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOptimum-Italic.otf new file mode 100644 index 000000000..bf82803d2 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOptimum-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOptimum.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOptimum.otf new file mode 100644 index 000000000..ca17f0099 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTOptimum.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPalatine-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPalatine-Bold.otf new file mode 100644 index 000000000..86c4cfc22 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPalatine-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPalatine-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPalatine-Italic.otf new file mode 100644 index 000000000..24f08680e Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPalatine-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPalatine.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPalatine.otf new file mode 100644 index 000000000..7ce3a5322 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPalatine.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPandora.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPandora.otf new file mode 100644 index 000000000..a280442f4 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPandora.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTParisFrance.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTParisFrance.otf new file mode 100644 index 000000000..37e2c2720 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTParisFrance.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPeignoir-Lite.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPeignoir-Lite.otf new file mode 100644 index 000000000..eeecfd076 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPeignoir-Lite.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPeignoir.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPeignoir.otf new file mode 100644 index 000000000..8ed236951 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPeignoir.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPiltdown.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPiltdown.otf new file mode 100644 index 000000000..f41097c66 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPiltdown.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPristine-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPristine-Bold.otf new file mode 100644 index 000000000..d5cd2c752 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPristine-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPristine-BoldItalic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPristine-BoldItalic.otf new file mode 100644 index 000000000..5a6832dab Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPristine-BoldItalic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPristine-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPristine-Italic.otf new file mode 100644 index 000000000..aa09bbfef Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPristine-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPristine.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPristine.otf new file mode 100644 index 000000000..daf305c24 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTPristine.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTRobotic2000.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTRobotic2000.otf new file mode 100644 index 000000000..622f0e9a2 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTRobotic2000.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSanDiego.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSanDiego.otf new file mode 100644 index 000000000..5e1fd8141 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSanDiego.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSchoolCentury-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSchoolCentury-Bold.otf new file mode 100644 index 000000000..111b71a76 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSchoolCentury-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSchoolCentury-BoldItalic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSchoolCentury-BoldItalic.otf new file mode 100644 index 000000000..aa1734bb5 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSchoolCentury-BoldItalic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSchoolCentury-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSchoolCentury-Italic.otf new file mode 100644 index 000000000..bd2f2ebe7 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSchoolCentury-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSchoolCentury.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSchoolCentury.otf new file mode 100644 index 000000000..dbff5fb84 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSchoolCentury.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSlogantype.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSlogantype.otf new file mode 100644 index 000000000..e7da0bf0d Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSlogantype.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSnowCaps.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSnowCaps.otf new file mode 100644 index 000000000..382d077a7 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTSnowCaps.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTStoryTimeCaps.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTStoryTimeCaps.otf new file mode 100644 index 000000000..6d80ea643 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTStoryTimeCaps.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTechtone-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTechtone-Bold.otf new file mode 100644 index 000000000..d427375cf Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTechtone-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTechtone-BoldItalic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTechtone-BoldItalic.otf new file mode 100644 index 000000000..6f1e096f2 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTechtone-BoldItalic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTechtone-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTechtone-Italic.otf new file mode 100644 index 000000000..bfff756eb Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTechtone-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTechtone.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTechtone.otf new file mode 100644 index 000000000..e3fdfe506 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTechtone.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTheatre.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTheatre.otf new file mode 100644 index 000000000..116486be7 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTheatre.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTimeOutline.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTimeOutline.otf new file mode 100644 index 000000000..5d2ed373d Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTimeOutline.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTumbleweed.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTumbleweed.otf new file mode 100644 index 000000000..8a7cfec91 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTTumbleweed.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTUSA-Uncial.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTUSA-Uncial.otf new file mode 100644 index 000000000..8d5773983 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTUSA-Uncial.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTVagaRound-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTVagaRound-Bold.otf new file mode 100644 index 000000000..981882e7c Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTVagaRound-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTVagaRound.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTVagaRound.otf new file mode 100644 index 000000000..0db86c649 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTVagaRound.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTWeise-Bold.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTWeise-Bold.otf new file mode 100644 index 000000000..c60438efa Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTWeise-Bold.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTWeise-Italic.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTWeise-Italic.otf new file mode 100644 index 000000000..913652603 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTWeise-Italic.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTWeise.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTWeise.otf new file mode 100644 index 000000000..c9a551de5 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTWeise.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTWestEnd.otf b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTWestEnd.otf new file mode 100644 index 000000000..b5462ecf6 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/fonts/qualitype/opentype/QTWestEnd.otf differ diff --git a/_extensions/nmfs-opensci/titlepage/images/TheGreatWaveoffKanagawa.jpeg b/_extensions/nmfs-opensci/titlepage/images/TheGreatWaveoffKanagawa.jpeg new file mode 100755 index 000000000..a6d11c0ef Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/images/TheGreatWaveoffKanagawa.jpeg differ diff --git a/_extensions/nmfs-opensci/titlepage/images/corner-bg.png b/_extensions/nmfs-opensci/titlepage/images/corner-bg.png new file mode 100755 index 000000000..02e4127aa Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/images/corner-bg.png differ diff --git a/_extensions/nmfs-opensci/titlepage/images/logo.png b/_extensions/nmfs-opensci/titlepage/images/logo.png new file mode 100755 index 000000000..bc5e6a666 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/images/logo.png differ diff --git a/_extensions/nmfs-opensci/titlepage/images/nmfs-opensci-logo.png b/_extensions/nmfs-opensci/titlepage/images/nmfs-opensci-logo.png new file mode 100755 index 000000000..8014cbff2 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/images/nmfs-opensci-logo.png differ diff --git a/_extensions/nmfs-opensci/titlepage/images/otter-bar.jpeg b/_extensions/nmfs-opensci/titlepage/images/otter-bar.jpeg new file mode 100644 index 000000000..ceb5dafe7 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/images/otter-bar.jpeg differ diff --git a/_extensions/nmfs-opensci/titlepage/images/ringed-seal.png b/_extensions/nmfs-opensci/titlepage/images/ringed-seal.png new file mode 100755 index 000000000..0f604cf28 Binary files /dev/null and b/_extensions/nmfs-opensci/titlepage/images/ringed-seal.png differ diff --git a/_extensions/nmfs-opensci/titlepage/mathjax.html b/_extensions/nmfs-opensci/titlepage/mathjax.html new file mode 100644 index 000000000..226a06192 --- /dev/null +++ b/_extensions/nmfs-opensci/titlepage/mathjax.html @@ -0,0 +1,15 @@ + + + + \ No newline at end of file diff --git a/_extensions/nmfs-opensci/titlepage/pandoc.tex b/_extensions/nmfs-opensci/titlepage/pandoc.tex new file mode 100644 index 000000000..62b14cdf6 --- /dev/null +++ b/_extensions/nmfs-opensci/titlepage/pandoc.tex @@ -0,0 +1,104 @@ +$if(highlighting-macros)$ +$highlighting-macros$ +$endif$ + +$tightlist.tex()$ +$tables.tex()$ +$graphics.tex()$ +$citations.tex()$ + +$for(header-includes)$ +$header-includes$ +$endfor$ + +\usepackage{hyphenat} +\usepackage{ifthen} +\usepackage{calc} +\usepackage{calculator} + +$if(titlepage-bg-image)$ +\usepackage{graphicx} +\usepackage{wallpaper} +$endif$ + +$if(titlepage-geometry)$ +\usepackage{geometry} +$endif$ + +$if(coverpage)$ +\usepackage{graphicx} +\usepackage{geometry} +\usepackage{afterpage} +\usepackage{tikz} +\usetikzlibrary{calc} +\usetikzlibrary{fadings} +\usepackage[pagecolor=none]{pagecolor} +$endif$ + +$if(titlepage-theme.page-color)$ +\usepackage[pagecolor=none]{pagecolor} +$endif$ + +$if(titlepage)$ +% Set the titlepage font families +$if(titlepage-theme.page-fontfamily)$ +\usepackage{fontspec} +\newfontfamily{\titlepagefont}{$titlepage-theme.page-fontfamily$} +$endif$ + +$if(titlepage-theme.title-fontfamily)$ +\usepackage{fontspec} +\newfontfamily{\titlepagetitlefont}{$titlepage-theme.title-fontfamily$} +$endif$ + +$if(titlepage-theme.author-fontfamily)$ +\usepackage{fontspec} +\newfontfamily{\titlepageauthorfont}{$titlepage-theme.author-fontfamily$} +$endif$ + +$if(titlepage-theme.affiliation-fontfamily)$ +\usepackage{fontspec} +\newfontfamily{\titlepageaffiliationfont}{$titlepage-theme.affiliation-fontfamily$} +$endif$ + +$if(titlepage-theme.footer-fontfamily)$ +\usepackage{fontspec} +\newfontfamily{\titlepagefooterfont}{$titlepage-theme.footer-fontfamily$} +$endif$ + +$if(titlepage-theme.header-fontfamily)$ +\usepackage{fontspec} +\newfontfamily{\titlepageheaderfont}{$titlepage-theme.header-fontfamily$} +$endif$ + +$endif$ + +$if(coverpage)$ +% Set the coverpage font families +$if(coverpage-theme.page-fontfamily)$ +\usepackage{fontspec} +\newfontfamily{\coverpagefont}{$coverpage-theme.page-fontfamily$} +$endif$ +$if(coverpage-theme.title-fontfamily)$ +\usepackage{fontspec} +\newfontfamily{\coverpagetitlefont}{$coverpage-theme.title-fontfamily$} +$endif$ +$if(coverpage-theme.author-fontfamily)$ +\usepackage{fontspec} +\newfontfamily{\coverpageauthorfont}{$coverpage-theme.author-fontfamily$} +$endif$ +$if(coverpage-theme.footer-fontfamily)$ +\usepackage{fontspec} +\newfontfamily{\coverpagefooterfont}{$coverpage-theme.footer-fontfamily$} +$endif$ +$if(coverpage-theme.header-fontfamily)$ +\usepackage{fontspec} +\newfontfamily{\coverpageheaderfont}{$coverpage-theme.header-fontfamily$} +$endif$ +$if(coverpage-theme.date-fontfamily)$ +\usepackage{fontspec} +\newfontfamily{\coverpagedatefont}{$coverpage-theme.date-fontfamily$} +$endif$ + +$endif$ + diff --git a/_extensions/nmfs-opensci/titlepage/titlepage-theme.lua b/_extensions/nmfs-opensci/titlepage/titlepage-theme.lua new file mode 100644 index 000000000..7b1f8fb95 --- /dev/null +++ b/_extensions/nmfs-opensci/titlepage/titlepage-theme.lua @@ -0,0 +1,558 @@ +local function isEmpty(s) + return s == nil or s == '' +end + +local function file_exists(name) + local f=io.open(name,"r") + if f~=nil then io.close(f) return true else return false end +end + +local function getVal(s) + return pandoc.utils.stringify(s) +end + +local function is_equal (s, val) + if isEmpty(s) then return false end + if getVal(s) == val then return true end + + return false +end + +local function has_value (tab, val) + for index, value in ipairs(tab) do + if value == val then + return true + end + end + + return false +end + +local function dump(o) + if type(o) == 'table' then + local s = '{ ' + for k,v in pairs(o) do + if type(k) ~= 'number' then k = '"'..k..'"' end + s = s .. '['..k..'] = ' .. dump(v) .. ',' + end + return s .. '} ' + else + return tostring(o) + end +end + +function Meta(m) +--[[ +This function checks that the value the user set is ok and stops with an error message if no. +yamlelement: the yaml metadata. e.g. m["titlepage-theme"]["page-align"] +yamltext: page, how to print the yaml value in the error message. e.g. titlepage-theme: page-align +okvals: a text table of ok styles. e.g. {"right", "center"} +--]] + local function check_yaml (yamlelement, yamltext, okvals) + choice = pandoc.utils.stringify(yamlelement) + if not has_value(okvals, choice) then + print("\n\ntitlepage extension error: " .. yamltext .. " is set to " .. choice .. ". It can be " .. pandoc.utils.stringify(table.concat(okvals, ", ")) .. ".\n\n") + return false + else + return true + end + + return true + end + +--[[ +This function gets the value of something like titlepage-theme.title-style and sets a value titlepage-theme.title-style.plain (for example). It also +does error checking against okvals. "plain" is always ok and if no value is set then the style is set to plain. +page: titlepage or coverpage +styleement: page, title, subtitle, header, footer, affiliation, etc +okvals: a text table of ok styles. e.g. {"plain", "two-column"} +--]] + local function set_style (page, styleelement, okvals) + yamltext = page .. "-theme" .. ": " .. styleelement .. "-style" + yamlelement = m[page .. "-theme"][styleelement .. "-style"] + if not isEmpty(yamlelement) then + ok = check_yaml (yamlelement, yamltext, okvals) + if ok then + m[page .. "-style-code"][styleelement] = {} + m[page .. "-style-code"][styleelement][getVal(yamlelement)] = true + else + error() + end + else +-- print("\n\ntitlepage extension error: " .. yamltext .. " needs a value. Should have been set in titlepage-theme lua filter.\n\n") +-- error() + m[page .. "-style-code"][styleelement] = {} + m[page .. "-style-code"][styleelement]["plain"] = true + end + end + +--[[ +This function assigns the themevals to the meta data +--]] + local function assign_value (tab) + for i, value in pairs(tab) do + if isEmpty(m['titlepage-theme'][i]) then + m['titlepage-theme'][i] = value + end + end + + return m + end + + local titlepage_table = { + ["academic"] = function (m) + themevals = { + ["elements"] = { + pandoc.MetaInlines{pandoc.RawInline("latex","\\headerblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\logoblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\titleblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\authorblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\vfill")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\dateblock")} + }, + ["page-align"] = "center", + ["title-style"] = "doublelinetight", + ["title-fontstyle"] = {"huge", "bfseries"}, + ["title-space-after"] = "1.5cm", + ["subtitle-fontstyle"] = {"Large"}, + ["author-style"] = "two-column", + ["affiliation-style"] = "none", + ["author-fontstyle"] = {"textsc"}, + ["affiliation-fontstyle"] = {"large"}, + ["logo-space-after"] = pandoc.MetaInlines{pandoc.RawInline("latex","2\\baselineskip")}, + ["header-fontstyle"] = {"textsc", "LARGE"}, + ["header-space-after"] = "1.5cm", + ["date-fontstyle"] = {"large"} + } + assign_value(themevals) + + return m + end, + ["bg-image"] = function (m) + if isEmpty(m['titlepage-bg-image']) then + m['titlepage-bg-image'] = "corner-bg.png" + end + if isEmpty(m['titlepage-geometry']) then + m['titlepage-geometry'] = pandoc.List({"top=3in", "bottom=1in", "right=1in", "left=1in"}) + end + themevals = { + ["elements"] = { + pandoc.MetaInlines{pandoc.RawInline("latex","\\titleblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\authorblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\affiliationblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\vfill")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\logoblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\footerblock")} + }, + ["page-align"] = "left", + ["title-style"] = "plain", + ["title-fontstyle"] = {"large", "bfseries"}, + ["title-space-after"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","4\\baselineskip")}, + ["subtitle-fontstyle"] = {"large", "textit"}, + ["author-style"] = "superscript-with-and", + ["author-fontstyle"] = {"large"}, + ["author-space-after"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","2\\baselineskip")}, + ["affiliation-style"] = "numbered-list-with-correspondence", + ["affiliation-fontstyle"] = {"large"}, + ["footer-space-after"] = "1pt", + ["affiliation-space-after"] = "1pt", + ["footer-style"] = "plain", + ["footer-fontstyle"] = {"large"}, + ["logo-size"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","0.25\\textheight")}, + ["logo-space-after"] = pandoc.MetaInlines{pandoc.RawInline("latex","2\\baselineskip")}, + ["vrule-width"] = "1pt", + ["bg-image-size"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","0.5\\paperwidth")}, + ["bg-image-location"] = "ULCorner", + } + assign_value(themevals) + + return m + end, + ["classic-lined"] = function (m) + themevals = { + ["elements"] = { + pandoc.MetaInlines{pandoc.RawInline("latex","\\titleblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\authorblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\vfill")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\logoblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\footerblock")} + }, + ["page-align"] = "center", + ["title-style"] = "doublelinewide", + ["title-fontsize"] = 30, + ["title-fontstyle"] = {"uppercase"}, + ["title-space-after"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","0.1\\textheight")}, + ["subtitle-fontstyle"] = {"Large", "textit"}, + ["author-style"] = "plain", + ["author-sep"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","\\hskip1em")}, + ["author-fontstyle"] = {"Large"}, + ["author-space-after"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","2\\baselineskip")}, + ["affiliation-style"] = "numbered-list-with-correspondence", + ["affiliation-fontstyle"] = {"large"}, + ["affiliation-space-after"] = "1pt", + ["footer-style"] = "plain", + ["footer-fontstyle"] = {"large", "textsc"}, + ["footer-space-after"] = "1pt", + ["logo-size"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","0.25\\textheight")}, + ["logo-space-after"] = "1cm", + } + assign_value(themevals) + + return m + end, + ["colorbox"] = function (m) + themevals = { + ["elements"] = { + pandoc.MetaInlines{pandoc.RawInline("latex","\\titleblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\vfill")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\authorblock")} + }, + ["page-align"] = "left", + ["title-style"] = "colorbox", + ["title-fontsize"] = 40, + ["title-space-after"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","2\\baselineskip")}, + ["subtitle-fontsize"] = 25, + ["subtitle-fontstyle"] = {"bfseries"}, + ["title-subtitle-space-between"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","5\\baselineskip")}, + ["author-style"] = "plain", + ["author-sep"] = "newline", + ["author-fontstyle"] = {"Large"}, + ["author-align"] = "right", + ["author-space-after"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","2\\baselineskip")}, + ["title-colorbox-borderwidth"] = "2mm", + ["title-colorbox-bordercolor"] = "black", + } + assign_value(themevals) + + return m + end, + ["formal"] = function (m) + themevals = { + ["elements"] = { + pandoc.MetaInlines{pandoc.RawInline("latex","\\titleblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\authorblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\vfill")}, + pandoc.MetaInlines{pandoc.RawInline("latex","A report presented at the annual\\\\meeting on 10 August 2025\\\\ \\vspace{0.8cm}")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\logoblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\footerblock")} + }, + ["page-align"] = "center", + ["title-style"] = "plain", + ["title-fontstyle"] = {"Huge", "textbf"}, + ["title-space-after"] = "1.5cm", + ["subtitle-fontstyle"] = {"LARGE"}, + ["title-subtitle-space-between"] = "0.5cm", + ["author-style"] = "plain", + ["author-sep"] = "newline", + ["author-fontstyle"] = {"textbf"}, + ["author-space-after"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","2\\baselineskip")}, + ["affiliation-style"] = "numbered-list-with-correspondence", + ["affiliation-fontstyle"] = {"large"}, + ["affiliation-space-after"] = "1pt", + ["footer-style"] = "plain", + ["footer-fontstyle"] = {"Large", "textsc"}, + ["footer-space-after"] = "1pt", + ["logo-size"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","0.4\\textwidth")}, + ["logo-space-after"] = "1cm", + } + assign_value(themevals) + + return m + end, + ["vline"] = function (m) + themevals = { + ["elements"] = { + pandoc.MetaInlines{pandoc.RawInline("latex","\\titleblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\authorblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\affiliationblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\vfill")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\logoblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\footerblock")} + }, + ["page-align"] = "left", + ["title-style"] = "plain", + ["title-fontstyle"] = {"large", "bfseries"}, + ["title-space-after"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","4\\baselineskip")}, + ["subtitle-fontstyle"] = {"large", "textit"}, + ["author-style"] = "superscript-with-and", + ["author-fontstyle"] = {"large"}, + ["author-space-after"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","2\\baselineskip")}, + ["affiliation-style"] = "numbered-list-with-correspondence", + ["affiliation-fontstyle"] = {"large"}, + ["affiliation-space-after"] = "1pt", + ["footer-style"] = "plain", + ["footer-fontstyle"] = {"large"}, + ["footer-space-after"] = "1pt", + ["logo-size"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","0.15\\textheight")}, + ["logo-space-after"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","0.1\\textheight")}, + ["vrule-width"] = "2pt", + ["vrule-align"] = "left", + ["vrule-color"] = "black", + } + assign_value(themevals) + + return m + end, + ["vline-text"] = function (m) + themevals = { + ["elements"] = { + pandoc.MetaInlines{pandoc.RawInline("latex","\\titleblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\authorblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\affiliationblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\vfill")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\logoblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\footerblock")} + }, + ["page-align"] = "left", + ["title-style"] = "plain", + ["title-fontstyle"] = {"large", "bfseries"}, + ["title-space-after"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","4\\baselineskip")}, + ["subtitle-fontstyle"] = {"large", "textit"}, + ["author-style"] = "superscript-with-and", + ["author-fontstyle"] = {"large"}, + ["author-space-after"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","2\\baselineskip")}, + ["affiliation-style"] = "numbered-list-with-correspondence", + ["affiliation-fontstyle"] = {"large"}, + ["affiliation-space-after"] = "1pt", + ["footer-style"] = "plain", + ["footer-fontstyle"] = {"large"}, + ["footer-space-after"] = "1pt", + ["logo-size"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","0.15\\textheight")}, + ["logo-space-after"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","0.1\\textheight")}, + ["vrule-width"] = "0.5in", + ["vrule-align"] = "left", + ["vrule-color"] = "blue", + ["vrule-text-color"] = "white", + ["vrule-text-fontstyle"] = {"bfseries", "Large"}, + ["vrule-text"] = "Add your text in vrule-text" + } + assign_value(themevals) + + return m + end, + ["plain"] = function (m) + themevals = { + ["elements"] = { + pandoc.MetaInlines{pandoc.RawInline("latex","\\headerblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\titleblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\authorblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\affiliationblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\vfill")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\logoblock")}, + pandoc.MetaInlines{pandoc.RawInline("latex","\\footerblock")} + }, + ["page-align"] = "left", + ["title-style"] = "plain", + ["title-fontstyle"] = {"Large"}, + ["title-space-after"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","4\\baselineskip")}, + ["title-subtitle-space-between"] = "1pt", + ["subtitle-fontstyle"] = {"textit"}, + ["author-style"] = "superscript-with-and", + ["author-space-after"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","2\\baselineskip")}, + ["affiliation-style"] = "numbered-list-with-correspondence", + ["affiliation-space-after"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","2\\baselineskip")}, + ["header-style"] = "plain", + ["header-space-after"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","0.2\\textheight")}, + ["footer-style"] = "plain", + ["footer-space-after"] = "1pt", + ["logo-size"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","0.1\\textheight")}, + ["logo-space-after"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","1\\baselineskip")}, + } + assign_value(themevals) + + return m + end, + ["none"] = function (m) return m end + } + + m['titlepage-file'] = false + if isEmpty(m.titlepage) then m['titlepage'] = "plain" end + if getVal(m.titlepage) == "false" then m['titlepage'] = "none" end + if getVal(m.titlepage) == "true" then m['titlepage'] = "plain" end + if getVal(m.titlepage) == "none" then + m['titlepage-true'] = false + else + m['titlepage-true'] = true + end + choice = pandoc.utils.stringify(m.titlepage) + okvals = {"plain", "vline", "vline-text", "bg-image", "colorbox", "academic", "formal", "classic-lined"} + isatheme = has_value (okvals, choice) + if not isatheme and choice ~= "none" then + if not file_exists(choice) then + error("titlepage extension error: titlepage can be a tex file or one of the themes: " .. pandoc.utils.stringify(table.concat(okvals, ", ")) .. ".") + else + m['titlepage-file'] = true + m['titlepage-filename'] = choice + m['titlepage'] = "file" + end + end + if m['titlepage-file'] and not isEmpty(m['titlepage-theme']) then + print("\n\ntitlepage extension message: since you passed in a static titlepage file, titlepage-theme is ignored.n\n") + end + if not m['titlepage-file'] and choice ~= "none" then + if isEmpty(m['titlepage-theme']) then + m['titlepage-theme'] = {} + end + titlepage_table[choice](m) -- add the theme defaults + end + +-- Only for themes +-- titlepage-theme will exist if using a theme +if not m['titlepage-file'] and m['titlepage-true'] then +--[[ +Error checking and setting the style codes +--]] + -- Style codes + m["titlepage-style-code"] = {} + okvals = {"none", "plain", "colorbox", "doublelinewide", "doublelinetight"} + set_style("titlepage", "title", okvals) + set_style("titlepage", "footer", okvals) + set_style("titlepage", "header", okvals) + set_style("titlepage", "date", okvals) + okvals = {"none", "plain", "plain-with-and", "superscript", "superscript-with-and", "two-column", "author-address"} + set_style("titlepage", "author", okvals) + okvals = {"none", "numbered-list", "numbered-list-with-correspondence"} + set_style("titlepage", "affiliation", okvals) + if is_equal(m['titlepage-theme']["author-style"], "author-address") and is_equal(m['titlepage-theme']["author-align"], "spread") then + error("\n\nquarto_titlepages error: If author-style is two-column, then author-align cannot be spread.\n\n") + end + +--[[ +Set the fontsize defaults +if page-fontsize was passed in or if fontsize passed in but not spacing +--]] + for key, val in pairs({"title", "author", "affiliation", "footer", "header", "date"}) do + if isEmpty(m["titlepage-theme"][val .. "-fontsize"]) then + if not isEmpty(m["titlepage-theme"]["page-fontsize"]) then + m["titlepage-theme"][val .. "-fontsize"] = getVal(m["titlepage-theme"]["page-fontsize"]) + end + end + end + for key, val in pairs({"page", "title", "subtitle", "author", "affiliation", "footer", "header", "date"}) do + if not isEmpty(m['titlepage-theme'][val .. "-fontsize"]) then + if isEmpty(m['titlepage-theme'][val .. "-spacing"]) then + m['titlepage-theme'][val .. "-spacing"] = 1.2*getVal(m['titlepage-theme'][val .. "-fontsize"]) + end + end + end + +--[[ +Set author sep character +--]] + if isEmpty(m['titlepage-theme']["author-sep"]) then + m['titlepage-theme']["author-sep"] = pandoc.MetaInlines{ + pandoc.RawInline("latex",", ")} + end + if getVal(m['titlepage-theme']["author-sep"]) == "newline" then + m['titlepage-theme']["author-sep"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","\\\\")} + end + +--[[ +Set affiliation sep character +--]] + if isEmpty(m['titlepage-theme']["affiliation-sep"]) then + m['titlepage-theme']["affiliation-sep"] = pandoc.MetaInlines{ + pandoc.RawInline("latex",",~")} + end + if getVal(m['titlepage-theme']["affiliation-sep"]) == "newline" then + m['titlepage-theme']["affiliation-sep"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","\\\\")} + end + +--[[ +Set vrule defaults +--]] + if not isEmpty(m['titlepage-theme']["vrule-width"]) then + if isEmpty(m['titlepage-theme']["vrule-color"]) then + m['titlepage-theme']["vrule-color"] = "black" + end + if isEmpty(m['titlepage-theme']["vrule-space"]) then + m['titlepage-theme']["vrule-space"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","0.05\\textwidth")} + end + if isEmpty(m['titlepage-theme']["vrule-align"]) then + m['titlepage-theme']["vrule-align"] = "left" + end + end + if not isEmpty(m["titlepage-theme"]["vrule-align"]) then + okvals = {"left", "right", "leftright"} + ok = check_yaml (m["titlepage-theme"]["vrule-align"], "titlepage-theme: vrule-align", okvals) + if not ok then error("") end + end + +--[[ +Set the defaults for the titlepage alignments +default titlepage alignment is left +--]] + if isEmpty(m['titlepage-theme']["page-align"]) then + m['titlepage-theme']["page-align"] = "left" + end + for key, val in pairs({"page", "title", "author", "affiliation", "footer", "header", "logo", "date"}) do + if not isEmpty(m["titlepage-theme"][val .. "-align"]) then + okvals = {"right", "left", "center"} + if has_value({"title", "author", "footer", "header"}, val) then table.insert(okvals, "spread") end + ok = check_yaml (m["titlepage-theme"][val .. "-align"], "titlepage-theme: " .. val .. "-align", okvals) + if not ok then error("") end + end + end + +--[[ +Set bg-image defaults +--]] + if not isEmpty(m['titlepage-bg-image']) then + if isEmpty(m['titlepage-theme']["bg-image-size"]) then + m['titlepage-theme']["bg-image-size"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","\\paperwidth")} + end + if not isEmpty(m["titlepage-theme"]["bg-image-location"]) then + okvals = {"ULCorner", "URCorner", "LLCorner", "LRCorner", "TileSquare", "Center"} + ok = check_yaml (m["titlepage-theme"]["bg-image-location"], "titlepage-theme: bg-image-location", okvals) + if not ok then error("") end + end + end + +--[[ +Set logo defaults +--]] + if not isEmpty(m['titlepage-logo']) then + if isEmpty(m['titlepage-theme']["logo-size"]) then + m['titlepage-theme']["logo-size"] = pandoc.MetaInlines{ + pandoc.RawInline("latex","0.2\\paperwidth")} + end + end + +end -- end the theme section + + return m + +end + + diff --git a/_quarto.yml b/_quarto.yml index 39b16bb6e..dfb9eb409 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -4,13 +4,14 @@ project: preview: browser: true navigate: true - render: - - "*.qmd" - - "contents/*.qmd" - - "contents/*/*.qmd" - - "contents/*/*/*.qmd" - - "contents/*/*/*/*.qmd" - - "contents/*/*/*/*/*.qmd" # contents/labs////*.qmd + +website: + announcement: + icon: star-half + dismissable: false + content: 🌟 Help Us Reach 1,000 GitHub Stars! 🌟 For every 25 stars, Arduino and SEEED will donate a NiclaVision or XIAO ESP32S3 for AI education. Click here to ⭐ + type: info + position: below-navbar book: google-analytics: @@ -31,10 +32,6 @@ book: pinned: false collapse: true back-to-top-navigation: true - right: - - text: 🌟 Help Us Reach 1,000 GitHub Stars! 🌟
For every 25 stars, Arduino and SEEED donate
a NiclaVision or XIAO ESP32S3 for AI education.
- href: https://github.com/harvard-edge/cs249r_book - favicon: favicon.png cover-image: cover-image-transparent.png cover-image-alt: "Cover image." @@ -49,8 +46,29 @@ book: page-navigation: true title: "Machine Learning Systems" - subtitle: "with tinyML" - abstract: "{{< var title.long >}} offers readers an entry point to understand machine learning (ML) systems by grounding concepts in applied ML. As the demand for efficient and scalable ML solutions grows, the ability to construct robust ML pipelines becomes increasingly crucial. This book aims to demystify the process of developing complete ML systems suitable for deployment, spanning key phases like data collection, model design, optimization, acceleration, security hardening, and integration, all from a systems perspective. The text covers a wide range of concepts relevant to general ML engineering across industries and applications, using TinyML as a pedagogical tool due to its global accessibility. Readers will learn basic principles around designing ML model architectures, hardware-aware training strategies, performant inference optimization, and benchmarking methodologies. The book also explores crucial systems considerations in areas like reliability, privacy, responsible AI, and solution validation. Enjoy reading it!" + subtitle: "Principles and Practices of Engineering Aritifically Intelligent Systems" + date: "`r format(Sys.Date(), '%B %d, %Y')`" + + author: + name: Vijay Janapa Reddi + affiliation: Harvard University + + abstract: | + {{< var title.long >}} offers readers an entry point to understand + machine learning (ML) systems by grounding concepts in applied ML. As + the demand for efficient and scalable ML solutions grows, the ability + to construct robust ML pipelines becomes increasingly crucial. This + book aims to demystify the process of developing complete ML systems + suitable for deployment, spanning key phases like data collection, + model design, optimization, acceleration, security hardening, and + integration, all from a systems perspective. The text covers a wide + range of concepts relevant to general ML engineering across industries + and applications, using TinyML as a pedagogical tool due to its global + accessibility. Readers will learn basic principles around designing ML + model architectures, hardware-aware training strategies, performant + inference optimization, and benchmarking methodologies. The book also + explores crucial systems considerations in areas like reliability, + privacy, responsible AI, and solution validation. Enjoy reading it! repo-url: https://github.com/harvard-edge/cs249r_book repo-branch: dev @@ -185,22 +203,47 @@ comments: giscus: repo: harvard-edge/cs249r_book +crossref: + appendix-title: "Appendix" + appendix-delim: ":" + + custom: + - kind: float + reference-prefix: Lab + key: labq + latex-env: lab + + - kind: float + reference-prefix: Exercise + key: exr + latex-env: exr + + - kind: float + reference-prefix: Video + key: vid + latex-env: vid + +editor: + render-on-save: true + format: html: - reference-location: margin - citation-location: margin - theme: light: - - spacelab + - default - style.scss - style-light.scss dark: - darkly - style.scss - style-dark.scss - mainfont: Nunito - fontsize: 1rem + + table: + classes: [table-striped, table-hover] + + reference-location: margin + citation-location: margin + sidenote: true # Enable sidenotes for Tufte style linkcolor: "#A51C30" urlcolor: "#A51C30" highlight-style: github @@ -215,132 +258,104 @@ format: fig-height: 6 number-depth: 3 toc: true - toc-depth: 4 + toc-depth: 4 include-in-header: text: | - - - - - - +# +# +# +# +# + citeproc: true - pdf: + titlepage-pdf: documentclass: scrbook - classoption: [abstract] - keep-tex: true - toc: true # Table of Contents - toc-depth: 3 # Depth of headings to include in TOC - number-sections: true - latex-engine: xelatex # or pdflatex, lualatex, etc. - geometry: margin=1in - fontsize: 10pt - papersize: letter # or a4, etc. - fig_caption: true - link-citations: true - citation_package: natbib # or biblatex - fig-cap-location: bottom - tbl-cap-location: top - - include-in-header: - text: | - \usepackage{fancyhdr} - \usepackage{graphicx} - \usepackage{mathptmx} - \usepackage{fontspec} - \usepackage{underscore} - \usepackage[english]{babel} - \usepackage{etoolbox} - \usepackage{fontspec} - \usepackage{newpxtext} % Palatino-like font - \usepackage{hyperref} % For hyperlinks - \usepackage{xcolor} - \usepackage[format=plain, - labelfont={bf,it}, - textfont=it, labelsep=space]{caption} - - \definecolor{crimson}{RGB}{165, 28, 48} - - \hypersetup{ - colorlinks=true, % Enable colored links - linkcolor=crimson, % Color of internal links - citecolor=crimson, % Color of citations - urlcolor=crimson % Color of URLs - } - - \patchcmd{\chapter}{\thispagestyle{plain}}{\thispagestyle{fancy}}{}{} - - %\newfontfamily\tocfont{Times New Roman} - - \let\endtitlepage\relax - - \AtBeginDocument{ - \begin{titlepage} - \centering - \vspace{-3em} - \includegraphics[width=\textwidth]{cover-image-white.png} % Adjust the size and path to your image - - {{\Huge\bfseries Machine Learning Systems}\\[1em] \Large with TinyML\par} - - \vspace*{\fill} - {\large Written, edited and curated by \\[.2cm] Prof. Vijay Janapa Reddi \\[.2cm] Harvard University \\[1em] \normalsize {\itshape With special thanks to the community for their contributions and support.} \\[1em] \pagebreak \vfill \scriptsize Last Modified: \today\par \vfill} - \vspace*{\fill} - - \end{titlepage} - - %\addtocontents{toc}{\tocfont} - } - - \let\endtitlepage\relax - - \pagestyle{fancy} - \fancyhf{} % Clear all header and footer fields - \fancyhead[LE,RO]{\thepage} % Page number on the left on even pages, right on odd pages - \fancyhead[RE,LO]{\nouppercase{\leftmark}} % Chapter name on both sides - \renewcommand{\headrulewidth}{0.4pt} - \renewcommand{\footrulewidth}{0pt} + classoption: [abstract,titlepage] + + coverpage: true + coverpage-title: "Machine Learning Systems" + coverpage-bg-image: "cover-image-transparent.png" + coverpage-author: ["Vijay", "Janapa Reddi"] + coverpage-theme: + page-text-align: "center" - \fancypagestyle{plain}{% - \fancyhf{} % clear all header and footer fields - \fancyhead[LE,RO]{\thepage} % Page number - \renewcommand{\headrulewidth}{0.4pt} - \renewcommand{\footrulewidth}{0pt} - } - - \addtokomafont{disposition}{\rmfamily\color{crimson}} - \addtokomafont{chapter}{\color{crimson}} - \addtokomafont{section}{\color{crimson}} - - % Define the abstract environment - \newenvironment{abstract}{% - \chapter*{\abstractname}% - \addcontentsline{toc}{chapter}{\abstractname}% - \small - }{% - \clearpage - } + bg-image-left: "0.225\\paperwidth" + bg-image-bottom: 7 + bg-image-rotate: 0 + bg-image-opacity: 1.0 + + author-style: "plain" + author-sep: "newline" + author-fontsize: 20 + author-align: "right" + author-bottom: "0.15\\paperwidth" + author-left: 7in + author-width: 6in -crossref: - appendix-title: "Appendix" - appendix-delim: ":" - - custom: - - kind: float - reference-prefix: Lab - key: labq - latex-env: lab - - - kind: float - reference-prefix: Exercise - key: exr - latex-env: exr - - - kind: float - reference-prefix: Video - key: vid - latex-env: vid + footer-style: "none" + header-style: "none" + date-style: "none" + + title-fontsize: 57 + title-left: "0.075\\paperwidth" + title-bottom: "0.375\\paperwidth" + title-width: "0.9\\paperwidth" + + titlepage: true + titlepage-theme: + elements: [ "\\titleblock", + "Prof. Vijay Janapa Reddi", + "School of Engineering and Applied Sciences", + "Harvard University", + "\\vfill", + "With heartfelt gratitude to the community for their invaluable contributions and steadfast support.", + "\\vfill"] + + page-align: "left" + title-style: "plain" + title-fontstyle: ["huge", "bfseries"] + title-space-after: "4\\baselineskip" + title-subtitle-space-between: "0.05\\textheight" + subtitle-fontstyle: ["large", "textit"] + author-style: "superscript-with-and" + author-fontstyle: "large" + affiliation-style: "numbered-list-with-correspondence" + affiliation-fontstyle: "large" + affiliation-space-after: "0pt" + footer-style: "plain" + footer-fontstyle: "large" + logo-size: "0.15\\textheight" + logo-space-after: "1\\baselineskip" + vrule-width: "2pt" + vrule-align: "left" + vrule-color: "black" -editor: - render-on-save: true + toc: true + lof: true + lot: true + top-level-division: chapter + latex-engine: xelatex + number-sections: true + toc-depth: 3 + keep-tex: true + citation-package: natbib + fig-caption: true + link-citations: true + biblio-title: "References" + cite-method: citeproc + reference-location: margin + citation-location: block + title-block-style: none + indent: 0px + fontsize: 10pt + fig-cap-location: margin + tbl-cap-location: margin + hyperrefoptions: + - linktoc=all + - pdfwindowui + - pdfpagemode=FullScreen + - pdfpagelayout=TwoPageRight + include-in-header: + - file: "tex/header-includes.tex" diff --git a/_variables.yml b/_variables.yml index a9ad80e26..23ea952bf 100644 --- a/_variables.yml +++ b/_variables.yml @@ -4,5 +4,5 @@ email: info: mailto:vj@eecs.harvard.edu?subject="CS249r%20MLSys%20with%20TinyML%20Book%20-%20" title: - long: "Machine Learning Systems with TinyML" + long: "Machine Learning Systems" short: "Machine Learning Systems" diff --git a/contents/ai_for_good/ai_for_good.qmd b/contents/ai_for_good/ai_for_good.qmd index 0ba45801e..4f1ce2668 100644 --- a/contents/ai_for_good/ai_for_good.qmd +++ b/contents/ai_for_good/ai_for_good.qmd @@ -38,7 +38,7 @@ What is special about the SDGs is that they are a collection of interlinked obje A recent study [@vinuesa2020role] highlights the influence of AI on all aspects of sustainable development, particularly on the 17 Sustainable Development Goals (SDGs) and 169 targets internationally defined in the 2030 Agenda for Sustainable Development. The study shows that AI can act as an enabler for 134 targets through technological improvements, but it also highlights the challenges of AI on some targets. The study shows that AI can benefit 67 targets when considering AI and societal outcomes. Still, it also warns about the issues related to the implementation of AI in countries with different cultural values and wealth. -![United Nations Sustainable Development Goals (SDG). Credit: [United Nations](https://sdgs.un.org/goals).](https://www.un.org/sustainabledevelopment/wp-content/uploads/2015/12/english_SDG_17goals_poster_all_languages_with_UN_emblem_1.png){#fig-sdg} +![United Nations Sustainable Development Goals (SDG). Source: [United Nations](https://sdgs.un.org/goals).](https://www.un.org/sustainabledevelopment/wp-content/uploads/2015/12/english_SDG_17goals_poster_all_languages_with_UN_emblem_1.png){#fig-sdg} In our book's context, TinyML could help advance at least some of these SDG goals. @@ -166,7 +166,7 @@ When buildings collapse after earthquakes, small drones can prove invaluable. Eq # Learning to Seek -{{< video >}} +{{< video https://www.youtube.com/watch?v=wmVKbX7MOnU >}} ::: @@ -174,7 +174,7 @@ Crucially, onboard sensors and TinyML processors analyze real-time data to ident :::{#vid-sniffybug .callout-important} -{{< video >}} +{{< video https://www.youtube.com/watch?v=hj_SBSpK5qg >}} ::: @@ -206,7 +206,7 @@ Similarly, mobility devices could use on-device vision processing to identify ob :::{#vid-envision .callout-important} -{{< video >}} +{{< video https://www.youtube.com/watch?v=oGWinIKDOdc >}} ::: @@ -290,3 +290,6 @@ In addition to exercises, we offer a series of hands-on labs allowing students t * _Coming soon._ ::: + + + diff --git a/contents/benchmarking/benchmarking.qmd b/contents/benchmarking/benchmarking.qmd index be40b9324..0740c22e8 100644 --- a/contents/benchmarking/benchmarking.qmd +++ b/contents/benchmarking/benchmarking.qmd @@ -551,7 +551,7 @@ One way to achieve transparency is through the use of open-source benchmarks. Op One example is the MLPerf Tiny. It's an open-source framework designed to make it easy to compare different solutions in the world of TinyML. Its modular design allows components to be swapped out for comparison or improvement. The reference implementations, shown in green and orange in @fig-ml-perf, act as the baseline for results. TinyML often needs optimization across the entire system, and users can contribute by focusing on specific parts, like quantization. The modular benchmark design allows users to showcase their contributions and competitive advantage by modifying a reference implementation. In short, MLPerf Tiny offers a flexible and modular way to assess and enhance TinyML applications, making it easier to compare and improve different aspects of the technology. -![MLPerf Tiny modular design. Credit: @mattson2020mlperf.](images/png/mlperf_tiny.png){#fig-ml-perf} +![MLPerf Tiny modular design. Source: @mattson2020mlperf.](images/png/mlperf_tiny.png){#fig-ml-perf} Another method for achieving transparency is through peer review of benchmarks. This involves having independent experts review and validate the benchmark's methodology, data sets, and results to ensure their credibility and reliability. Peer review can provide a valuable means of verifying the accuracy of benchmark tests and help build confidence in the results. @@ -573,7 +573,7 @@ Machine learning datasets have a rich history and have evolved significantly ove The [MNIST dataset](https://www.tensorflow.org/datasets/catalog/mnist), created by Yann LeCun, Corinna Cortes, and Christopher J.C. Burges in 1998, can be considered a cornerstone in the history of machine learning datasets. It comprises 70,000 labeled 28x28 pixel grayscale images of handwritten digits (0-9). MNIST has been widely used for benchmarking algorithms in image processing and machine learning as a starting point for many researchers and practitioners. @fig-mnist shows some examples of handwritten digits. -![MNIST handwritten digits. Credit: [Suvanjanprasai.](https://en.wikipedia.org/wiki/File:MnistExamplesModified.png)](images/png/mnist.png){#fig-mnist} +![MNIST handwritten digits. Source: [Suvanjanprasai.](https://en.wikipedia.org/wiki/File:MnistExamplesModified.png)](images/png/mnist.png){#fig-mnist} #### ImageNet (2009) @@ -583,7 +583,7 @@ Fast forward to 2009, and we see the introduction of the [ImageNet dataset](http The [Common Objects in Context (COCO) dataset](https://cocodataset.org/) [@lin2014microsoft], released in 2014, further expanded the landscape of machine learning datasets by introducing a richer set of annotations. COCO consists of images containing complex scenes with multiple objects, and each image is annotated with object bounding boxes, segmentation masks, and captions. This dataset has been instrumental in advancing research in object detection, segmentation, and image captioning. -![Coco dataset. Credit: Coco.](images/png/coco.png){#fig-coco} +![Coco dataset. Source: Coco.](images/png/coco.png){#fig-coco} #### GPT-3 (2020) @@ -659,7 +659,7 @@ FLOPs measure the number of floating-point operations a model performs to genera @fig-flops, from [@bianco2018benchmark], shows the relationship between Top-1 Accuracy on ImageNet (y-axis), the model's G-FLOPs (x-axis), and the model's parameter count (circle-size). -![A graph that depicts the top-1 imagenet accuracy vs. the FLOP count of a model along with the model's parameter count. The figure shows a overall tradeoff between model complexity and accuracy, although some model architectures are more efficiency than others. Credit: @bianco2018benchmark.](images/png/model_FLOPS_VS_TOP_1.png){#fig-flops} +![A graph that depicts the top-1 imagenet accuracy vs. the FLOP count of a model along with the model's parameter count. The figure shows a overall tradeoff between model complexity and accuracy, although some model architectures are more efficiency than others. Source: @bianco2018benchmark.](images/png/model_FLOPS_VS_TOP_1.png){#fig-flops} Let's consider an example. BERT [Bidirectional Encoder Representations from Transformers] [@devlin2018bert], a popular natural language processing model, has over 340 million parameters, making it a large model with high accuracy and impressive performance across various tasks. However, the sheer size of BERT, coupled with its high FLOP count, makes it a computationally intensive model that may not be suitable for real-time applications or deployment on edge devices with limited computational capabilities. @@ -723,7 +723,7 @@ For the past several years, AI has focused on developing increasingly sophistica However, growing concerns about issues like bias, safety, and robustness persist even in models that achieve high accuracy on standard benchmarks. Additionally, some popular datasets used for evaluating models are beginning to saturate, with models reaching near-perfect performance on existing test splits [@kiela2021dynabench]. As a simple example, there are test images in the classic MNIST handwritten digit dataset that may look indecipherable to most human evaluators but were assigned a label when the dataset was created - models that happen to agree with those labels may appear to exhibit superhuman performance but instead may only be capturing idiosyncrasies of the labeling and acquisition process from the dataset's creation in 1994. In the same spirit, computer vision researchers now ask, "Are we done with ImageNet?" [@beyer2020we]. This highlights limitations in the conventional model-centric approach of optimizing accuracy on fixed datasets through architectural innovations. -![AI vs human performane. Credit: @kiela2021dynabench.](images/png/dynabench.png){#fig-superhuman-perf} +![AI vs human performane. Source: @kiela2021dynabench.](images/png/dynabench.png){#fig-superhuman-perf} An alternative paradigm is emerging called data-centric AI. Rather than treating data as static and focusing narrowly on model performance, this approach recognizes that models are only as good as their training data. So, the emphasis shifts to curating high-quality datasets that better reflect real-world complexity, developing more informative evaluation benchmarks, and carefully considering how data is sampled, preprocessed, and augmented. The goal is to optimize model behavior by improving the data rather than just optimizing metrics on flawed datasets. Data-centric AI critically examines and enhances the data itself to produce beneficial AI. This reflects an important evolution in mindset as the field addresses the shortcomings of narrow benchmarking. diff --git a/contents/contributors.qmd b/contents/contributors.qmd index 9b26cedfa..690734dd2 100644 --- a/contents/contributors.qmd +++ b/contents/contributors.qmd @@ -75,95 +75,97 @@ We extend our sincere thanks to the diverse group of individuals who have genero Vijay Janapa Reddi
Vijay Janapa Reddi

Ikechukwu Uchendu
Ikechukwu Uchendu

- naeemkh
naeemkh

+ Naeem Khoshnevis
Naeem Khoshnevis

Douwe den Blanken
Douwe den Blanken

- Shanzeh Batool
Shanzeh Batool

+ shanzehbatool
shanzehbatool

- Kleinbard
Kleinbard

+ kai4avaya
kai4avaya

Jared Ping
Jared Ping

- eliasab16
eliasab16

- ishapira
ishapira

+ Elias Nuwara
Elias Nuwara

+ Itai Shapira
Itai Shapira

Maximilian Lam
Maximilian Lam

Marcelo Rovai
Marcelo Rovai

Matthew Stewart
Matthew Stewart

Jayson Lin
Jayson Lin

- Jeffrey Ma
Jeffrey Ma

Sophia Cho
Sophia Cho

+ Jeffrey Ma
Jeffrey Ma

Korneel Van den Berghe
Korneel Van den Berghe

- Zishen
Zishen

+ jasonjabbour
jasonjabbour

+ Zishen Wan
Zishen Wan

Colby Banbury
Colby Banbury

- Andrea Murillo
Andrea Murillo

- Alex Rodriguez
Alex Rodriguez

+ Andrea
Andrea

Srivatsan Krishnan
Srivatsan Krishnan

- Divya
Divya

- Aghyad Deeb
Aghyad Deeb

- Aghyad Deeb
Aghyad Deeb

+ Alex Rodriguez
Alex Rodriguez

+ Divya Amirtharaj
Divya Amirtharaj

arnaumarin
arnaumarin

+ Aghyad Deeb
Aghyad Deeb

Abdulrahman Mahmoud
Abdulrahman Mahmoud

- oishib
oishib

- Emil Njor
Emil Njor

- Michael Schnebly
Michael Schnebly

+ Aghyad Deeb
Aghyad Deeb

ELSuitorHarvard
ELSuitorHarvard

+ oishib
oishib

+ Jared Ni
Jared Ni

- Jared Ni
Jared Ni

- Jae-Won Chung
Jae-Won Chung

+ Emil Njor
Emil Njor

+ Michael Schnebly
Michael Schnebly

Mark Mazumder
Mark Mazumder

- YU SHUN, HSIAO
YU SHUN, HSIAO

+ Jae-Won Chung
Jae-Won Chung

Sara Khosravi
Sara Khosravi

+ Yu-Shun Hsiao
Yu-Shun Hsiao

Henry Bae
Henry Bae

+ eurashin
eurashin

Andrew Bass
Andrew Bass

- Aditi Raju
Aditi Raju

Shvetank Prakash
Shvetank Prakash

- Pong Trairatvorakul
Pong Trairatvorakul

- eurashin
eurashin

- Jennifer Zhou
Jennifer Zhou

+ Aditi Raju
Aditi Raju

Marco Zennaro
Marco Zennaro

- Bruno Scaglione
Bruno Scaglione

+ Pong Trairatvorakul
Pong Trairatvorakul

+ Jennifer Zhou
Jennifer Zhou

Gauri Jain
Gauri Jain

- Alex Oesterling
Alex Oesterling

- Eric D
Eric D

Allen-Kuang
Allen-Kuang

Sercan Aygün
Sercan Aygün

- Costin-Andrei Oncescu
Costin-Andrei Oncescu

+ Bruno Scaglione
Bruno Scaglione

+ gnodipac886
gnodipac886

+ alex-oesterling
alex-oesterling

Annie Laurie Cook
Annie Laurie Cook

+ Yu-Shun Hsiao
Yu-Shun Hsiao

+ Costin-Andrei Oncescu
Costin-Andrei Oncescu

Batur Arslan
Batur Arslan

- Emeka Ezike
Emeka Ezike

- Curren Iyer
Curren Iyer

- Yang Zhou
Yang Zhou

+ Sophia Cho
Sophia Cho

+ Emeka Ezike
Emeka Ezike

abigailswallow
abigailswallow

+ Curren Iyer
Curren Iyer

yanjingl
yanjingl

- Sophia Cho
Sophia Cho

- songhan
songhan

- Yu-Shun Hsiao
Yu-Shun Hsiao

+ Fin Amin
Fin Amin

+ songhan
songhan

+ Yang Zhou
Yang Zhou

Jessica Quaye
Jessica Quaye

Emmanuel Rassou
Emmanuel Rassou

happyappledog
happyappledog

- Jason Yik
Jason Yik

- Shreya Johri
Shreya Johri

+ Jason Yik
Jason Yik

+ Shreya Johri
Shreya Johri

Sonia Murthy
Sonia Murthy

Vijay Edupuganti
Vijay Edupuganti

The Random DIY
The Random DIY

diff --git a/contents/data_engineering/data_engineering.bib b/contents/data_engineering/data_engineering.bib index d2b9c7821..f69bf481e 100644 --- a/contents/data_engineering/data_engineering.bib +++ b/contents/data_engineering/data_engineering.bib @@ -222,3 +222,11 @@ @article{kuznetsova2020open year = {2020}, publisher = {Springer}, } + +@inproceedings{sambasivan2021everyone, + author = {Sambasivan, Nithya and Kapania, Shivani and Highfill, Hannah and Akrong, Diana and Paritosh, Praveen and Aroyo, Lora M}, + title = {{{\textquotedblleft}Everyone} wants to do the model work, not the data work{\textquotedblright}: {Data} Cascades in High-Stakes {AI}}, + booktitle = {Proceedings of the 2021 CHI Conference on Human Factors in Computing Systems}, + pages = {1--15}, + year = {2021}, +} diff --git a/contents/data_engineering/data_engineering.qmd b/contents/data_engineering/data_engineering.qmd index 533f1930a..034977c29 100644 --- a/contents/data_engineering/data_engineering.qmd +++ b/contents/data_engineering/data_engineering.qmd @@ -45,9 +45,9 @@ We begin by discussing data collection: Where do we source data, and how do we g ## Problem Definition -In many machine learning domains, sophisticated algorithms take center stage, while the fundamental importance of data quality is often overlooked. This neglect gives rise to ["Data Cascades"](https://research.google/pubs/pub49953/) by @Data_Cascades_2021 (see @fig-cascades)—events where lapses in data quality compound, leading to negative downstream consequences such as flawed predictions, project terminations, and even potential harm to communities. In @fig-cascades, we have an illustration of potential data pitfalls at every stage and how they influence the entire process down the line. The influence of data collection errors is especially pronounced. Any lapses in this stage will become apparent at later stages (in model evaluation and deployment) and might lead to costly consequences, such as abandoning the entire model and restarting anew. Therefore, investing in data engineering techniques from the onset will help us detect errors early. +In many machine learning domains, sophisticated algorithms take center stage, while the fundamental importance of data quality is often overlooked. This neglect gives rise to ["Data Cascades"](https://research.google/pubs/pub49953/) by @sambasivan2021everyone (see @fig-cascades)—events where lapses in data quality compound, leading to negative downstream consequences such as flawed predictions, project terminations, and even potential harm to communities. In @fig-cascades, we have an illustration of potential data pitfalls at every stage and how they influence the entire process down the line. The influence of data collection errors is especially pronounced. Any lapses in this stage will become apparent at later stages (in model evaluation and deployment) and might lead to costly consequences, such as abandoning the entire model and restarting anew. Therefore, investing in data engineering techniques from the onset will help us detect errors early. -![Data cascades: compounded costs. Credit: @Data_Cascades_2021.](images/png/data_engineering_cascades.png){#fig-cascades} +![Data cascades: compounded costs. Source: @sambasivan2021everyone.](images/png/data_engineering_cascades.png){#fig-cascades} Despite many ML professionals recognizing the importance of data, numerous practitioners report facing these cascades. This highlights a systemic issue: while the allure of developing advanced models remains, data often needs to be more appreciated. @@ -55,7 +55,7 @@ Take, for example, Keyword Spotting (KWS) (see @fig-keywords). KWS is a prime ex It is important to appreciate that these keyword-spotting technologies are not isolated; they integrate seamlessly into larger systems, processing signals continuously while managing low power consumption. These systems extend beyond simple keyword recognition, evolving to facilitate diverse sound detections, such as glass breaking. This evolution is geared towards creating intelligent devices capable of understanding and responding to vocal commands, heralding a future where even household appliances can be controlled through voice interactions. -![Keyword Spotting example: interacting with Alexa. Credit: Amazon.](images/png/data_engineering_kws.png){#fig-keywords} +![Keyword Spotting example: interacting with Alexa. Source: Amazon.](images/png/data_engineering_kws.png){#fig-keywords} Building a reliable KWS model is a complex task. It demands a deep understanding of the deployment scenario, encompassing where and how these devices will operate. For instance, a KWS model's effectiveness is not just about recognizing a word; it's about discerning it among various accents and background noises, whether in a bustling cafe or amid the blaring sound of a television in a living room or a kitchen where these devices are commonly found. It's about ensuring that a whispered "Alexa" in the dead of night or a shouted "OK Google" in a noisy marketplace are recognized with equal precision. @@ -146,7 +146,7 @@ While platforms like Kaggle and UCI Machine Learning Repository are invaluable r In addition, bias, validity, and reproducibility issues may exist in these datasets, and there has been a growing awareness of these issues in recent years. Furthermore, using the same dataset to train different models as shown in @fig-misalignment can sometimes create misalignment: training multiple models using the same dataset resultsi in a 'misalignment' between the models and the world, in which an entire ecosystem of models reflects only a narrow subset of the real-world data. -![Training different models on the same dataset. Credit: (icons from left to right: Becris; Freepik; Freepik; Paul J; SBTS2018).](images/png/dataset_myopia.png){#fig-misalignment} +![Training different models on the same dataset. Source: (icons from left to right: Becris; Freepik; Freepik; Paul J; SBTS2018).](images/png/dataset_myopia.png){#fig-misalignment} ### Web Scraping @@ -170,7 +170,7 @@ While web scraping can be a scalable method to amass large training datasets for Web scraping can yield inconsistent or inaccurate data. For example, the photo in @fig-traffic-light shows up when you search for 'traffic light' on Google Images. It is an image from 1914 that shows outdated traffic lights, which are also barely discernible because of the image's poor quality. This can be problematic for web-scraped datasets, as it pollutes the dataset with inapplicable (old) data samples. -![A picture of old traffic lights (1914). Credit: [Vox.](https://www.vox.com/2015/8/5/9097713/when-was-the-first-traffic-light-installed)](images/jpg/1914_traffic.jpeg){#fig-traffic-light} +![A picture of old traffic lights (1914). Source: [Vox.](https://www.vox.com/2015/8/5/9097713/when-was-the-first-traffic-light-installed)](images/jpg/1914_traffic.jpeg){#fig-traffic-light} :::{#exr-ws .callout-caution collapse="true"} @@ -215,7 +215,7 @@ Many embedded use cases deal with unique situations, such as manufacturing plant While synthetic data offers numerous advantages, it is essential to use it judiciously. Care must be taken to ensure that the generated data accurately represents the underlying real-world distributions and does not introduce unintended biases. -![Increasing training data size with synthetic data generation. Credit: [AnyLogic](https://www.anylogic.com/features/artificial-intelligence/synthetic-data/).](images/jpg/synthetic_data.jpg){#fig-synthetic-data} +![Increasing training data size with synthetic data generation. Source: [AnyLogic](https://www.anylogic.com/features/artificial-intelligence/synthetic-data/).](images/jpg/synthetic_data.jpg){#fig-synthetic-data} :::{#exr-sd .callout-caution collapse="true"} @@ -228,18 +228,26 @@ Let us learn about synthetic data generation using Generative Adversarial Networ ## Data Storage -Data sourcing and data storage go hand in hand, and data must be stored in a format that facilitates easy access and processing. Depending on the use case, various kinds of data storage systems can be used to store your datasets. Some examples are shown in @tbl-databases. +Data sourcing and data storage go hand in hand, and data must be stored in a format that facilitates easy access and processing. Depending on the use case, various kinds of data storage systems can be used to store your datasets. Some examples are shown in @tbl-storage. -| **Database** | **Data Warehouse** | **Data Lake** | -|------------------------------------|--------------------------|------------------------| -| **Purpose** | Operational and transactional | Analytical | -| **Data type** | Structured | Structured, semi-structured, and/or unstructured | -| **Scale** | Small to large volumes of data | Large volumes of integrated data | Large volumes of diverse data | -| **Examples** | MySQL | Google BigQuery, Amazon Redshift, Microsoft Azure Synapse, Google Cloud Storage, AWS S3, Azure Data Lake Storage | ++-------------------------------------+--------------------------+-------------------------------------------------------------+ +| Database | Data Warehouse | Data Lake | ++:====================================+:=========================+:============================================================+ +| Purpose | Operational and | Analytical | +| | transactional | | ++-------------------------------------+--------------------------+-------------------------------------------------------------+ +| Data type | Structured | Structured, semi-structured, and/or unstructured | ++-------------------------------------+--------------------------+-------------------------------------------------------------+ +| Scale | Small to large volumes | Large volumes of integrated data | +| | of data | Large volumes of diverse data | ++-------------------------------------+--------------------------+-------------------------------------------------------------+ +| Examples | MySQL | Google BigQuery, Amazon Redshift, Microsoft Azure Synapse, | +| | | Google Cloud Storage, AWS S3, Azure Data Lake Storage | ++-------------------------------------+--------------------------+-------------------------------------------------------------+ - : Comparative overview of the database, data warehouse, and data lake. {#tbl-databases} +: Comparative overview of the database, data warehouse, and data lake. {#tbl-storage .striped .hover} -The stored data is often accompanied by metadata, defined as 'data about data .'It provides detailed contextual information about the data, such as means of data creation, time of creation, attached data use license, etc. For example, [[Hugging Face]{.underline}](https://huggingface.co/) has [[Dataset Cards]{.underline}](https://huggingface.co/docs/hub/datasets-cards). To promote responsible data use, dataset creators should disclose potential biases through the dataset cards. These cards can educate users about a dataset's contents and limitations. The cards also give vital context on appropriate dataset usage by highlighting biases and other important details. Having this type of metadata can also allow fast retrieval if structured properly. Once the model is developed and deployed to edge devices, the storage systems can continue to store incoming data, model updates, or analytical results. +The stored data is often accompanied by metadata, defined as 'data about data .'It provides detailed contextual information about the data, such as means of data creation, time of creation, attached data use license, etc. For example, [Hugging Face](https://huggingface.co/) has [Dataset Cards](https://huggingface.co/docs/hub/datasets-cards). To promote responsible data use, dataset creators should disclose potential biases through the dataset cards. These cards can educate users about a dataset's contents and limitations. The cards also give vital context on appropriate dataset usage by highlighting biases and other important details. Having this type of metadata can also allow fast retrieval if structured properly. Once the model is developed and deployed to edge devices, the storage systems can continue to store incoming data, model updates, or analytical results. **Data Governance:** With a large amount of data storage, it is also imperative to have policies and practices (i.e., data governance) that help manage data during its life cycle, from acquisition to disposal. Data governance frames how data is managed and includes making pivotal decisions about data access and control. @fig-governance illustrates the different domains involved in data governance. It involves exercising authority and making decisions concerning data to uphold its quality, ensure compliance, maintain security, and derive value. Data governance is operationalized by developing policies, incentives, and penalties, cultivating a culture that perceives data as a valuable asset. Specific procedures and assigned authorities are implemented to safeguard data quality and monitor its utilization and related risks. @@ -251,13 +259,13 @@ Data governance utilizes three integrative approaches: planning and control, org * **The risk-based approach**, intensified by AI advancements, focuses on identifying and managing inherent risks in data and algorithms. It especially addresses AI-specific issues through regular assessments and proactive risk management strategies, allowing for incidental and preventive actions to mitigate undesired algorithm impacts. -![An overview of the data governance framework. Credit: [StarCIO.](https://www.groundwatergovernance.org/the-importance-of-governance-for-all-stakeholders/).](images/jpg/data_governance.jpg){#fig-governance} +![An overview of the data governance framework. Source: [StarCIO.](https://www.groundwatergovernance.org/the-importance-of-governance-for-all-stakeholders/).](images/jpg/data_governance.jpg){#fig-governance} Some examples of data governance across different sectors include: -* **Medicine:** [[Health Information Exchanges(HIEs)]{.underline}](https://www.healthit.gov/topic/health-it-and-health-information-exchange-basics/what-hie) enable the sharing of health information across different healthcare providers to improve patient care. They implement strict data governance practices to maintain data accuracy, integrity, privacy, and security, complying with regulations such as the [[Health Insurance Portability and Accountability Act (HIPAA)]{.underline}](https://www.cdc.gov/phlp/publications/topic/hipaa.html). Governance policies ensure that patient data is only shared with authorized entities and that patients can control access to their information. +* **Medicine:** [Health Information Exchanges(HIEs)](https://www.healthit.gov/topic/health-it-and-health-information-exchange-basics/what-hie) enable the sharing of health information across different healthcare providers to improve patient care. They implement strict data governance practices to maintain data accuracy, integrity, privacy, and security, complying with regulations such as the [Health Insurance Portability and Accountability Act (HIPAA)](https://www.cdc.gov/phlp/publications/topic/hipaa.html). Governance policies ensure that patient data is only shared with authorized entities and that patients can control access to their information. -* **Finance:** [[Basel III Framework]{.underline}](https://www.bis.org/bcbs/basel3.htm) is an international regulatory framework for banks. It ensures that banks establish clear policies, practices, and responsibilities for data management, ensuring data accuracy, completeness, and timeliness. Not only does it enable banks to meet regulatory compliance, but it also prevents financial crises by more effectively managing risks. +* **Finance:** [Basel III Framework](https://www.bis.org/bcbs/basel3.htm) is an international regulatory framework for banks. It ensures that banks establish clear policies, practices, and responsibilities for data management, ensuring data accuracy, completeness, and timeliness. Not only does it enable banks to meet regulatory compliance, but it also prevents financial crises by more effectively managing risks. * **Government:** Government agencies managing citizen data, public records, and administrative information implement data governance to manage data transparently and securely. The Social Security System in the US and the Aadhar system in India are good examples of such governance systems. @@ -281,7 +289,7 @@ _**Selective Network Output Storage:**_ Another technique for reducing storage i Data processing refers to the steps involved in transforming raw data into a format suitable for feeding into machine learning algorithms. It is a crucial stage in any ML workflow, yet often overlooked. With proper data processing, ML models are likely to achieve optimal performance. @fig-data-engineering shows a breakdown of a data scientist's time allocation, highlighting the significant portion spent on data cleaning and organizing (%60). -![Data scientists' tasks breakdown by time spent. Credit: [Forbes.](https://www.forbes.com/sites/gilpress/2016/03/23/data-preparation-most-time-consuming-least-enjoyable-data-science-task-survey-says/?sh=20c55a266f63)](images/jpg/data_engineering_features.jpg){#fig-data-engineering} +![Data scientists' tasks breakdown by time spent. Source: [Forbes.](https://www.forbes.com/sites/gilpress/2016/03/23/data-preparation-most-time-consuming-least-enjoyable-data-science-task-survey-says/?sh=20c55a266f63)](images/jpg/data_engineering_features.jpg){#fig-data-engineering} Proper data cleaning is a crucial step that directly impacts model performance. Real-world data is often dirty, containing errors, missing values, noise, anomalies, and inconsistencies. Data cleaning involves detecting and fixing these issues to prepare high-quality data for modeling. By carefully selecting appropriate techniques, data scientists can improve model accuracy, reduce overfitting, and enable algorithms to learn more robust patterns. Overall, thoughtful data processing allows machine learning systems to uncover insights better and make predictions from real-world data. @@ -294,7 +302,7 @@ Data often comes from diverse sources and can be unstructured or semi-structured Data validation serves a broader role than ensuring adherence to certain standards, like preventing temperature values from falling below absolute zero. These issues arise in TinyML because sensors may malfunction or temporarily produce incorrect readings; such transients are not uncommon. Therefore, it is imperative to catch data errors early before propagating through the data pipeline. Rigorous validation processes, including verifying the initial annotation practices, detecting outliers, and handling missing values through techniques like mean imputation, contribute directly to the quality of datasets. This, in turn, impacts the performance, fairness, and safety of the models trained on them. Let’s take a look at @fig-data-engineering-kws2 for an example of a data processing pipeline. In the context of TinyML, the Multilingual Spoken Words Corpus (MSWC) is an example of data processing pipelines—systematic and automated workflows for data transformation, storage, and processing. The input data (which's a collection of short recordings) goes through sevreral phases of processing, such as audio-word alignemnt and keyword extraction. By streamlining the data flow, from raw data to usable datasets, data pipelines enhance productivity and facilitate the rapid development of machine learning models. The MSWC is an expansive and expanding collection of audio recordings of spoken words in 50 different languages, which are collectively used by over 5 billion people. This dataset is intended for academic study and business uses in areas like keyword identification and speech-based search. It is openly licensed under Creative Commons Attribution 4.0 for broad usage. -![An overview of the Multilingual Spoken Words Corpus (MSWC) data processing pipeline. Credit: @mazumder2021multilingual.](images/png/data_engineering_kws2.png){#fig-data-engineering-kws2} +![An overview of the Multilingual Spoken Words Corpus (MSWC) data processing pipeline. Source: @mazumder2021multilingual.](images/png/data_engineering_kws2.png){#fig-data-engineering-kws2} The MSWC used a [forced alignment](https://montreal-forced-aligner.readthedocs.io/en/latest/) method to automatically extract individual word recordings to train keyword-spotting models from the [Common Voice](https://commonvoice.mozilla.org/) project, which features crowdsourced sentence-level recordings. Forced alignment refers to long-standing methods in speech processing that predict when speech phenomena like syllables, words, or sentences start and end within an audio recording. In the MSWC data, crowdsourced recordings often feature background noises, such as static and wind. Depending on the model's requirements, these noises can be removed or intentionally retained. @@ -354,7 +362,7 @@ Let's get started! There is no guarantee that the data labels are actually correct. @fig-hard-labels shows some examples of hard labeling cases: some errors arise from blurred pictures that make them hard to identify (the frog image), and others stem from a lack of domain knowledge (the black stork case). It is possible that despite the best instructions being given to labelers, they still mislabel some images [@northcutt2021pervasive]. Strategies like quality checks, training annotators, and collecting multiple labels per datapoint can help ensure label quality. For ambiguous tasks, multiple annotators can help identify controversial datapoints and quantify disagreement levels. -![Some examples of hard labeling cases. Credit: @northcutt2021pervasive.](https://raw.githubusercontent.com/cleanlab/assets/master/cleanlab/label-errors-examples.png){#fig-hard-labels} +![Some examples of hard labeling cases. Source: @northcutt2021pervasive.](https://raw.githubusercontent.com/cleanlab/assets/master/cleanlab/label-errors-examples.png){#fig-hard-labels} When working with human annotators, offering fair compensation and otherwise prioritizing ethical treatment is important, as annotators can be exploited or otherwise harmed during the labeling process (Perrigo, 2023). For example, if a dataset is likely to contain disturbing content, annotators may benefit from having the option to view images in grayscale [@googleinformation]. @@ -372,7 +380,7 @@ Here are some examples of how AI-assisted annotation has been proposed to be use * **Self-driving cars:** AI-assisted annotation is being used to label images and videos from self-driving cars. This can help to train AI models to identify objects on the road, such as other vehicles, pedestrians, and traffic signs. * **Social media:** AI-assisted annotation labels social media posts like images and videos. This can help to train AI models to identify and classify different types of content, such as news, advertising, and personal posts. -![Strategies for acquiring additional labeled training data. Credit: [Standford AI Lab.](https://ai.stanford.edu/blog/weak-supervision/)](https://dawn.cs.stanford.edu/assets/img/2017-07-16-weak-supervision/WS_mapping.png){#fig-weak-supervision} +![Strategies for acquiring additional labeled training data. Source: [Standford AI Lab.](https://ai.stanford.edu/blog/weak-supervision/)](https://dawn.cs.stanford.edu/assets/img/2017-07-16-weak-supervision/WS_mapping.png){#fig-weak-supervision} ## Data Version Control @@ -403,9 +411,9 @@ With data version control in place, we can track the changes shown in @fig-data- [**[DVC]{.underline}**](https://dvc.org/doc): It stands for Data Version Control in short and is an open-source, lightweight tool that works on top of Git Hub and supports all kinds of data formats. It can seamlessly integrate into the workflow if Git is used to manage code. It captures the versions of data and models in the Git commits while storing them on-premises or on the cloud (e.g., AWS, Google Cloud, Azure). These data and models (e.g., ML artifacts) are defined in the metadata files, which get updated in every commit. It can allow metrics tracking of models on different versions of the data. -**[[lakeFS]{.underline}](https://docs.lakefs.io/):** It is an open-source tool that supports the data version control on data lakes. It supports many git-like operations, such as branching and merging of data, as well as reverting to previous versions of the data. It also has a unique UI feature, making exploring and managing data much easier. +**[lakeFS](https://docs.lakefs.io/):** It is an open-source tool that supports the data version control on data lakes. It supports many git-like operations, such as branching and merging of data, as well as reverting to previous versions of the data. It also has a unique UI feature, making exploring and managing data much easier. -**[[Git LFS]{.underline}](https://git-lfs.com/):** It is useful for data version control on smaller-sized datasets. It uses Git's inbuilt branching and merging features but is limited in tracking metrics, reverting to previous versions, or integrating with data lakes. +**[Git LFS](https://git-lfs.com/):** It is useful for data version control on smaller-sized datasets. It uses Git's inbuilt branching and merging features but is limited in tracking metrics, reverting to previous versions, or integrating with data lakes. ## Optimizing Data for Embedded AI @@ -426,7 +434,7 @@ By providing clear, detailed documentation, creators can help developers underst @fig-data-card shows an example of a data card for a computer vision (CV) dataset. It includes some basic information about the dataset and instructions on how to use it, including known biases. -![Data card describing a CV dataset. Credit: @pushkarna2022data.](images/png/data_card.png){#fig-data-card} +![Data card describing a CV dataset. Source: @pushkarna2022data.](images/png/data_card.png){#fig-data-card} Keeping track of data provenance- essentially the origins and the journey of each data point through the data pipeline- is not merely a good practice but an essential requirement for data quality. Data provenance contributes significantly to the transparency of machine learning systems. Transparent systems make it easier to scrutinize data points, enabling better identification and rectification of errors, biases, or inconsistencies. For instance, if an ML model trained on medical data is underperforming in particular areas, tracing the provenance can help identify whether the issue is with the data collection methods, the demographic groups represented in the data or other factors. This level of transparency doesn't just help debug the system but also plays a crucial role in enhancing the overall data quality. By improving the reliability and credibility of the dataset, data provenance also enhances the model's performance and its acceptability among end-users. @@ -443,13 +451,13 @@ Ensured data transparency presents several challenges, especially because it req Many high-quality datasets either come from proprietary sources or contain copyrighted information. This introduces licensing as a challenging legal domain. Companies eager to train ML systems must engage in negotiations to obtain licenses that grant legal access to these datasets. Furthermore, licensing terms can impose restrictions on data applications and sharing methods. Failure to comply with these licenses can have severe consequences. -For instance, ImageNet, one of the most extensively utilized datasets for computer vision research, is a case in point. Most of its images were procured from public online sources without explicit permission, sparking ethical concerns (Prabhu and Birhane, 2020). Accessing the ImageNet dataset for corporations requires registration and adherence to its terms of use, which restricts commercial usage ([[ImageNet]{.underline}](https://www.image-net.org/#), 2021). Major players like Google and Microsoft invest significantly in licensing datasets to enhance their ML vision systems. However, the cost factor restricts accessibility for researchers from smaller companies with constrained budgets. +For instance, ImageNet, one of the most extensively utilized datasets for computer vision research, is a case in point. Most of its images were procured from public online sources without explicit permission, sparking ethical concerns (Prabhu and Birhane, 2020). Accessing the ImageNet dataset for corporations requires registration and adherence to its terms of use, which restricts commercial usage ([ImageNet](https://www.image-net.org/#), 2021). Major players like Google and Microsoft invest significantly in licensing datasets to enhance their ML vision systems. However, the cost factor restricts accessibility for researchers from smaller companies with constrained budgets. The legal domain of data licensing has seen major cases that help define fair use parameters. A prominent example is _Authors Guild, Inc. v. Google, Inc._ This 2005 lawsuit alleged that Google's book scanning project infringed copyrights by displaying snippets without permission. However, the courts ultimately ruled in Google's favor, upholding fair use based on the transformative nature of creating a searchable index and showing limited text excerpts. This precedent provides some legal grounds for arguing fair use protections apply to indexing datasets and generating representative samples for machine learning. However, license restrictions remain binding, so a comprehensive analysis of licensing terms is critical. The case demonstrates why negotiations with data providers are important to enable legal usage within acceptable bounds. **New Data Regulations and Their Implications** -New data regulations also impact licensing practices. The legislative landscape is evolving with regulations like the EU's [[Artificial Intelligence Act]{.underline}](https://digital-strategy.ec.europa.eu/en/policies/european-approach-artificial-intelligence), which is poised to regulate AI system development and use within the European Union (EU). This legislation: +New data regulations also impact licensing practices. The legislative landscape is evolving with regulations like the EU's [Artificial Intelligence Act](https://digital-strategy.ec.europa.eu/en/policies/european-approach-artificial-intelligence), which is poised to regulate AI system development and use within the European Union (EU). This legislation: 1. Classifies AI systems by risk. @@ -457,13 +465,13 @@ New data regulations also impact licensing practices. The legislative landscape 3. Emphasizes data quality, transparency, human oversight, and accountability. -Additionally, the EU Act addresses the ethical dimensions and operational challenges in sectors such as healthcare and finance. Key elements include the prohibition of AI systems posing \"unacceptable\" risks, stringent conditions for high-risk systems, and minimal obligations for \"limited risk\" AI systems. The proposed European AI Board will oversee and ensure the implementation of efficient regulation. +Additionally, the EU Act addresses the ethical dimensions and operational challenges in sectors such as healthcare and finance. Key elements include the prohibition of AI systems posing "unacceptable" risks, stringent conditions for high-risk systems, and minimal obligations for "limited risk" AI systems. The proposed European AI Board will oversee and ensure the implementation of efficient regulation. **Challenges in Assembling ML Training Datasets** Complex licensing issues around proprietary data, copyright law, and privacy regulations constrain options for assembling ML training datasets. However, expanding accessibility through more open licensing or public-private data collaborations could greatly accelerate industry progress and ethical standards. -Sometimes, certain portions of a dataset may need to be removed or obscured to comply with data usage agreements or protect sensitive information. For example, a dataset of user information may have names, contact details, and other identifying data that may need to be removed from the dataset; this is well after the dataset has already been actively sourced and used for training models. Similarly, a dataset that includes copyrighted content or trade secrets may need to filter out those portions before being distributed. Laws such as the General Data Protection Regulation (GDPR), the California Consumer Privacy Act (CCPA), and the Amended Act on the Protection of Personal Information ([[APPI]{.underline}](https://www.ppc.go.jp/files/pdf/280222_amendedlaw.pdf)) have been passed to guarantee the right to be forgotten. These regulations legally require model providers to erase user data upon request. +Sometimes, certain portions of a dataset may need to be removed or obscured to comply with data usage agreements or protect sensitive information. For example, a dataset of user information may have names, contact details, and other identifying data that may need to be removed from the dataset; this is well after the dataset has already been actively sourced and used for training models. Similarly, a dataset that includes copyrighted content or trade secrets may need to filter out those portions before being distributed. Laws such as the General Data Protection Regulation (GDPR), the California Consumer Privacy Act (CCPA), and the Amended Act on the Protection of Personal Information ([APPI](https://www.ppc.go.jp/files/pdf/280222_amendedlaw.pdf)) have been passed to guarantee the right to be forgotten. These regulations legally require model providers to erase user data upon request. Data collectors and providers need to be able to take appropriate measures to de-identify or filter out any proprietary, licensed, confidential, or regulated information as needed. Sometimes, the users may explicitly request that their data be removed. @@ -541,3 +549,6 @@ In addition to exercises, we offer a series of hands-on labs allowing students t * _Coming soon._ ::: + + + diff --git a/contents/dl_primer/dl_primer.bib b/contents/dl_primer/dl_primer.bib index 745f5b374..8daf6b12b 100644 --- a/contents/dl_primer/dl_primer.bib +++ b/contents/dl_primer/dl_primer.bib @@ -90,3 +90,10 @@ @article{vaswani2017attention volume = {30}, year = {2017}, } + +@misc{author2023using, + author = {Kun, Sachin}, + title = {Using a Linear Model to Deal with Nonlinear Dataset}, + year = {2023}, + howpublished = {\ensuremath{<}https://medium.com/@sachinkun21/using-a-linear-model-to-deal-with-nonlinear-dataset-c6ed0f7f3f51\ensuremath{>}}, +} diff --git a/contents/dl_primer/dl_primer.qmd b/contents/dl_primer/dl_primer.qmd index 34b042e48..626068050 100644 --- a/contents/dl_primer/dl_primer.qmd +++ b/contents/dl_primer/dl_primer.qmd @@ -32,7 +32,7 @@ This section briefly introduces deep learning, starting with an overview of its Deep learning, a specialized area within machine learning and artificial intelligence (AI), utilizes algorithms modeled after the structure and function of the human brain, known as artificial neural networks. This field is a foundational element in AI, driving progress in diverse sectors such as computer vision, natural language processing, and self-driving vehicles. Its significance in embedded AI systems is highlighted by its capability to handle intricate calculations and predictions, optimizing the limited resources in embedded settings. @fig-ai-ml-dl illustrates the chronological development and relative segmentation of the three fields. -![Artificial intelligence subfields. Credit: NVIDIA.](images/png/ai_dl_progress_nvidia.png){#fig-ai-ml-dl} +![The diagram illustrates artificial intelligence as the overarching field encompassing all computational methods that mimic human cognitive functions. Machine learning is a subset of AI that includes algorithms capable of learning from data. Deep learning, a further subset of ML, specifically involves neural networks that are able to learn more complex patterns in large volumes of data. Source: NVIDIA.](images/png/ai_dl_progress_nvidia.png){#fig-ai-ml-dl} ### Brief History of Deep Learning @@ -48,17 +48,17 @@ Multiple factors have contributed to this surge, including advancements in compu Second, the digital revolution has yielded a wealth of big data, offering rich material for deep learning models to learn from and excel in tasks such as image and speech recognition, language translation, and game playing. Large, labeled datasets have been key in refining and successfully deploying deep learning applications in real-world settings. -Additionally, collaborations and open-source efforts have nurtured a dynamic community of researchers and practitioners, accelerating advancements in deep learning techniques. Innovations like deep reinforcement learning, transfer learning, and generative adversarial networks have broadened the scope of what is achievable with deep learning, opening new possibilities in various sectors, including healthcare, finance, transportation, and entertainment. +Additionally, collaborations and open-source efforts have nurtured a dynamic community of researchers and practitioners, accelerating advancements in deep learning techniques. Innovations like deep reinforcement learning, transfer learning, and generative artificial intelligence have broadened the scope of what is achievable with deep learning, opening new possibilities in various sectors, including healthcare, finance, transportation, and entertainment. Organizations worldwide recognize deep learning's transformative potential and invest heavily in research and development to leverage its capabilities in providing innovative solutions, optimizing operations, and creating new business opportunities. As deep learning continues its upward trajectory, it is set to redefine how we interact with technology, enhancing convenience, safety, and connectivity in our lives. ### Applications of Deep Learning -Deep learning is extensively used across numerous industries today, and its transformative impact on society is evident. In finance, it is employed for stock market prediction, risk assessment, and fraud detection. For instance, deep learning algorithms can predict stock market trends, guide investment strategies, and enhance financial decisions. Marketing uses it for customer segmentation, personalization, and content optimization. In healthcare, machine learning aids in diagnosis, treatment planning, and patient monitoring. Similarly, deep learning can make medical predictions that improve patient diagnosis and save lives. The benefits are clear: machine learning predicts with greater accuracy than humans and does so much more quickly. +Deep learning is extensively used across numerous industries today, and its transformative impact on society is evident. In finance, it powers stock market prediction, risk assessment, and fraud detection. For instance, deep learning algorithms can predict stock market trends, guide investment strategies, and enhance financial decisions. In marketing, it drives customer segmentation, personalization, and content optimization. Deep learning analyzes consumer behavior and preferences to enable highly targeted advertising and personalized content delivery. In manufacturing, deep learning streamlines production processes and enhances quality control by continuously analyzing large volumes of data. This allows companies to boost productivity and minimize waste, leading to the production of higher quality goods at lower costs. In healthcare, machine learning aids in diagnosis, treatment planning, and patient monitoring. Similarly, deep learning can make medical predictions that improve patient diagnosis and save lives. The benefits are clear: machine learning predicts with greater accuracy than humans and does so much more quickly. + +Deep learning enhances everyday products, such as strengthening Netflix's recommender systems to provide users with more [personalized recommendations](https://dl.acm.org/doi/abs/10.1145/3543873.3587675). At Google, deep learning models have driven significant improvements in [Google Translate](https://research.google/blog/recent-advances-in-google-translate/), enabling it to handle over [100 languages](https://cloud.google.com/translate/docs/languages). Autonomous vehicles from companies like Waymo, Cruise, and Motional have become a reality through the use of deep learning in their [perception system](https://motional.com/news/technically-speaking-improving-av-perception-through-transformative-machine-learning). Additionally, Amazon employs deep learning at the edge in their Alexa devices to perform [keyword spotting](https://towardsdatascience.com/how-amazon-alexa-works-your-guide-to-natural-language-processing-ai-7506004709d3). -In manufacturing, deep learning has had a significant impact. By continuously learning from vast amounts of data collected during manufacturing, companies can boost productivity while minimizing waste through improved efficiency. This financial benefit for companies translates to better quality products at lower customer prices. Machine learning enables manufacturers to continually refine their processes, producing higher quality goods more efficiently than ever. -Deep learning enhances everyday products like Netflix recommendations and Google Translate text translations. Moreover, it helps companies like Amazon and Uber reduce customer service costs by swiftly identifying dissatisfied customers. ### Relevance to Embedded AI @@ -68,25 +68,59 @@ Embedded AI, the integration of AI algorithms directly into hardware devices, na Deep learning draws inspiration from the human brain's neural networks to create decision-making patterns. This section delves into the foundational concepts of deep learning, providing insights into the more complex topics discussed later in this primer. -Neural networks serve as the foundation of deep learning, inspired by the biological neural networks in the human brain to process and analyze data hierarchically. Below, we examine the primary components and structures in neural networks. +Neural networks serve as the foundation of deep learning, inspired by the biological neural networks in the human brain to process and analyze data hierarchically. Neural networks are composed of basic units called perceptrons, which are typically organized into layers. Each layer consists of several perceptrons, and multiple layers are stacked to form the entire network. The connections between these layers are defined by sets of weights or parameters that determine how data is processed as it flows from the input to the output of the network. + +Below, we examine the primary components and structures in neural networks. ### Perceptrons -The Perceptron is the basic unit or node that is the foundation for more complex structures. It takes various inputs, applies weights and biases to them, and then uses an activation function to produce an output. @fig-perceptron illustrates the building blocks of a perceptron. In simple terms, think of a perceptron as a tiny decision-maker that learns to make a binary decision (e.g. 'yes' or 'no'). It takes in numbers as inputs (`x_1, x_2, ...`), each representing a feature of an object we wish to analyze (an image for example). Then it multiplies each input by a weight, adds them up, and if the total is high enough (crosses a certain threshold), it returns "yes" as an asnwer, otherwise, it outputs "no." +The Perceptron is the basic unit or node that forms the foundation for more complex structures. It functions by taking multiple inputs, each representing a feature of the object under analysis, such as the characteristics of a home for predicting its price or the attributes of a song to forecast its popularity in music streaming services. These inputs are denoted as $x_1, x_2, ..., x_n$. + +Each input $x_i$ has a corresponding weight $w_{ij}$, and the perceptron simply multiplies each input by its matching weight. This operation is similar to linear regression, where the intermediate output, $z$, is computed as the sum of the products of inputs and their weights: + +$$ +z = \sum (x_i \cdot w_{ij}) +$$ + +To this intermediate calculation, a bias term $b$ is added, allowing the model to better fit the data by shifting the linear output function up or down. Thus, the intermediate linear combination computed by the perceptron including the bias becomes: + +$$ +z = \sum (x_i \cdot w_{ij}) + b +$$ + +This basic form of a perceptron can only model linear relationships between the input and output. Patterns found in nature are often complex and extend beyond linear relationships. To enable the perceptron to handle non-linear relationships, an activation function is applied to the linear output $z$. + +$$ +\hat{y} = \sigma(z) +$$ + +@fig-nonlinear illustrates an example where data exhibit a nonlinear pattern that could not be adequately modeled with a linear approach. The activation function, such as sigmoid, tanh, or ReLU, transforms the linear input sum into a non-linear output. The primary objective of this function is to introduce non-linearity into the model, enabling it to learn and perform more sophisticated tasks. Thus, the final output of the perceptron, including the activation function, can be expressed as: -![Perceptron. Credit: Wikimedia - Chrislb.](images/png/Rosenblattperceptron.png){#fig-perceptron} +![Activation functions enable the modeling of complex non-linear relationships. Source: Medium - Sachin Kaushik.](images/png/nonlinear_patterns.png){#fig-nonlinear} -Conceived in the 1950s, perceptrons paved the way for developing more intricate neural networks and have been a fundamental building block in deep learning. +A perceptron can be configured to perform either regression or classification tasks. For regression, the actual numerical output $\hat{y}$ is used. For classification, the output depends on whether $\hat{y}$ crosses a certain threshold. If $\hat{y}$ exceeds this threshold, the perceptron might output one class (e.g., 'yes'), and if it does not, another class (e.g., 'no'). + +![Perceptron. Conceived in the 1950s, perceptrons paved the way for developing more intricate neural networks and have been a fundamental building block in deep learning. Source: Wikimedia - Chrislb.](images/png/Rosenblattperceptron.png){#fig-perceptron} + +@fig-perceptron illustrates the fundamental building blocks of a perceptron, which serves as the foundation for more complex neural networks. A perceptron can be thought of as a miniature decision-maker, utilizing its weights, bias, and activation function to process inputs and generate outputs based on learned parameters. This concept forms the basis for understanding more intricate neural network architectures, such as multilayer perceptrons. In these advanced structures, layers of perceptrons work in concert, with each layer's output serving as the input for the subsequent layer. This hierarchical arrangement creates a deep learning model capable of comprehending and modeling complex, abstract patterns within data. By stacking these simple units, neural networks gain the ability to tackle increasingly sophisticated tasks, from image recognition to natural language processing. ### Multilayer Perceptrons -Multilayer perceptrons (MLPs) are an evolution of the single-layer perceptron model, featuring multiple layers of nodes connected in a feedforward manner, as shown in @fig-mlp. These layers include an input layer for data reception, several hidden layers for data processing, and an output layer for final result generation. MLPs are skilled at identifying non-linear relationships and use a backpropagation technique for training, where weights are optimized through a gradient descent algorithm. +Multilayer perceptrons (MLPs) are an evolution of the single-layer perceptron model, featuring multiple layers of nodes connected in a feedforward manner. In a feedforward network, information moves in only one direction - from the input layer, through the hidden layers, to the output layer, without any cycles or loops. This structure is illustrated in @fig-mlp. The network layers include an input layer for data reception, several hidden layers for data processing, and an output layer for final result generation. + +While a single perceptron is limited in its capacity to model complex patterns, the real strength of neural networks emerges from the assembly of multiple layers. Each layer consists of numerous perceptrons working together, allowing the network to capture intricate and non-linear relationships within the data. With sufficient depth and breadth, these networks can approximate virtually any function, no matter how complex. + +![Multilayer Perceptron. Source: Wikimedia - Charlie.](https://www.nomidl.com/wp-content/uploads/2022/04/image-7.png){width=70%, #fig-mlp} + +### Training Process -![Multilayer Perceptron. Credit: Wikimedia - Charlie.](https://www.nomidl.com/wp-content/uploads/2022/04/image-7.png){width=70%, #fig-mlp} +A neural network receives an input, performs a calculation, and produces a prediction. The prediction is determined by the calculations performed within the sets of perceptrons found between the input and output layers. These calculations depend primarily on the input and the weights. Since you do not have control over the input, the objective during training is to adjust the weights in such a way that the output of the network provides the most accurate prediction. + +The training process involves several key steps, beginning with the forward pass, where the existing weights of the network are used to calculate the output for a given input. This output is then compared to the true target values to calculate an error, which measures how well the network's prediction matches the expected outcome. Following this, a backward pass is performed. This involves using the error to make adjustments to the weights of the network through a process called backpropagation. This adjustment aims to reduce the error in subsequent predictions. The cycle of forward pass, error calculation, and backward pass is repeated iteratively. This process continues until the network's predictions are sufficiently accurate or a predefined number of iterations is reached, effectively minimizing the loss function used to measure the error. #### Forward Pass -The forward pass is the initial phase where data moves through the network from the input to the output layer. During this phase, each layer performs specific computations on the input data, using weights and biases before passing the resulting values to subsequent layers. The final output of this phase is used to compute the loss, indicating the difference between the predicted output and actual target values. +The forward pass is the initial phase where data moves through the network from the input to the output layer. At the start of training, the network's weights are randomly initialized, setting the initial conditions for learning. During the forward pass, each layer performs specific computations on the input data using these weights and biases, and the results are then passed to the subsequent layer. The final output of this phase is the network’s prediction. This prediction is compared to the actual target values present in the dataset to calculate the loss, which can be thought of as the difference between the predicted outputs and the target values. The loss quantifies the network’s performance at this stage, providing a crucial metric for the subsequent adjustment of weights during the backward pass. @vid-nn below explains how neural networks work using handwritten digit recognition as an example application. It also touches on the math underlying neural nets. @@ -94,13 +128,15 @@ The forward pass is the initial phase where data moves through the network from # Neural Networks -{{< video >}} +{{< video https://www.youtube.com/watch?v=aircAruvnKk-w&list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi&index=1 >}} ::: -#### Backward Pass (Backpropagation) +#### Backward Pass (Backpropagation) {#sec-backward_pass} + +After completing the forward pass and computing the loss, which measures how far the model's predictions deviate from the actual target values, the next step is to improve the model's performance by adjusting the network’s weights. Since we cannot control the inputs to the model, adjusting the weights becomes our primary method for refining the model. -Backpropagation is a key algorithm in training deep neural networks. This phase involves calculating the gradient of the loss function concerning each weight using the chain rule, effectively moving backward through the network. The gradients calculated in this step guide the adjustment of weights to minimize the loss function, thereby enhancing the network's performance with each iteration of training. +We determine how to adjust the weights of our model through a key algorithm called backpropagation. Backpropagation uses the calculated loss to determine the gradient of each weight. These gradients describe the direction and magnitude in which the weights should be adjusted. By tuning the weights based on these gradients, the model is better positioned to make predictions that are closer to the actual target values in the next forward pass. Grasping these foundational concepts paves the way to understanding more intricate deep learning architectures and techniques, fostering the development of more sophisticated and productive applications, especially within embedded AI systems. @@ -110,7 +146,7 @@ Grasping these foundational concepts paves the way to understanding more intrica # Gradient descent -{{< video >}} +{{< video https://www.youtube.com/watch?v=IHZwWFHWa-w&list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi&index=2 >}} ::: @@ -118,7 +154,7 @@ Grasping these foundational concepts paves the way to understanding more intrica # Backpropagation -{{< video >}} +{{< video https://www.youtube.com/watch?v=Ilg3gGewQ5U&list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi&index=3 >}} ::: @@ -151,7 +187,7 @@ We've just scratched the surface of neural networks. Now, you'll get to try and #### Convolutional Neural Networks (CNNs) -CNNs are mainly used in image and video recognition tasks. This architecture employs convolutional layers that filter input data to identify features like edges, corners, and textures. A typical CNN also includes pooling layers to reduce the spatial dimensions of the data and fully connected layers for classification. CNNs have proven highly effective in image recognition, object detection, and computer vision applications. +CNNs are mainly used in image and video recognition tasks. This architecture consists of two main parts: the convolutional base and the fully connected layers. In the convolutional base, convolutional layers filter input data to identify features like edges, corners, and textures. Following each convolutional layer, a pooling layer can be applied to reduce the spatial dimensions of the data, thereby decreasing computational load and concentrating the extracted features. Unlike MLPs, which treat input features as flat, independent entities, CNNs maintain the spatial relationships between pixels, making them particularly effective for image and video data. The extracted features from the convolutional base are then passed into the fully connected layers, similar to those used in MLPs, which perform classification based on the features extracted by the convolution layers. CNNs have proven highly effective in image recognition, object detection, and other computer vision applications. In embedded AI, CNNs are crucial for image and video recognition tasks, where real-time processing is often needed. They can be optimized for embedded systems using techniques like quantization and pruning to minimize memory usage and computational demands, enabling efficient object detection and facial recognition functionalities in devices with limited computational resources. @@ -193,18 +229,25 @@ These architectures serve specific purposes and excel in different domains, offe ### Traditional ML vs Deep Learning -To briefly highlight the differences, @tbl-mlvsdl illustrates the contrasting characteristics between traditional ML and deep learning: - -| Aspect | Traditional ML | Deep Learning | -|----------------------------|-----------------------------------------------------|--------------------------------------------------------| -| Data Requirements | Low to Moderate (efficient with smaller datasets) | High (requires large datasets for nuanced learning) | -| Model Complexity | Moderate (suitable for well-defined problems) | High (detects intricate patterns, suited for complex tasks) | -| Computational Resources | Low to Moderate (cost-effective, less resource-intensive) | High (demands substantial computational power and resources) | -| Deployment Speed | Fast (quicker training and deployment cycles) | Slow (prolonged training times, especially with larger datasets) | -| Interpretability | High (clear insights into decision pathways) | Low (complex layered structures, "black box" nature) | -| Maintenance | Easier (simple to update and maintain) | Complex (requires more efforts in maintenance and updates) | - -: Comparison of traditional machine learning and deep learning. {#tbl-mlvsdl} +Deep learning extends traditional machine learning by utilizing neural networks to discern patterns in data. In contrast, traditional machine learning relies on a set of established algorithms such as decision trees, k-nearest neighbors, and support vector machines, but does not involve neural networks. To briefly highlight the differences, @tbl-mlvsdl illustrates the contrasting characteristics between traditional ML and deep learning: + ++-------------------------------+-----------------------------------------------------------+--------------------------------------------------------------+ +| Aspect | Traditional ML | Deep Learning | ++:==============================+:==========================================================+:=============================================================+ +| Data Requirements | Low to Moderate (efficient with smaller datasets) | High (requires large datasets for nuanced learning) | ++-------------------------------+-----------------------------------------------------------+--------------------------------------------------------------+ +| Model Complexity | Moderate (suitable for well-defined problems) | High (detects intricate patterns, suited for complex tasks) | ++-------------------------------+-----------------------------------------------------------+--------------------------------------------------------------+ +| Computational Resources | Low to Moderate (cost-effective, less resource-intensive) | High (demands substantial computational power and resources) | ++-------------------------------+-----------------------------------------------------------+--------------------------------------------------------------+ +| Deployment Speed | Fast (quicker training and deployment cycles) | Slow (prolonged training times, esp. with larger datasets) | ++-------------------------------+-----------------------------------------------------------+--------------------------------------------------------------+ +| Interpretability | High (clear insights into decision pathways) | Low (complex layered structures, "black box" nature) | ++-------------------------------+-----------------------------------------------------------+--------------------------------------------------------------+ +| Maintenance | Easier (simple to update and maintain) | Complex (requires more efforts in maintenance and updates) | ++-------------------------------+-----------------------------------------------------------+--------------------------------------------------------------+ + +: Comparison of traditional machine learning and deep learning. {#tbl-mlvsdl .striped .hover} ### Choosing Traditional ML vs. DL @@ -228,7 +271,7 @@ To briefly highlight the differences, @tbl-mlvsdl illustrates the contrasting ch #### Regulatory Compliance -Regulatory compliance is crucial in various industries, requiring adherence to guidelines and best practices such as the GDPR in the EU. Traditional ML models, due to their inherent interpretability, often align better with these regulations, especially in sectors like finance and healthcare. +Regulatory compliance is crucial in various industries, requiring adherence to guidelines and best practices such as the General Data Protection Regulation (GDPR) in the EU. Traditional ML models, due to their inherent interpretability, often align better with these regulations, especially in sectors like finance and healthcare. #### Interpretability diff --git a/contents/dl_primer/images/png/nonlinear_patterns.png b/contents/dl_primer/images/png/nonlinear_patterns.png new file mode 100644 index 000000000..c063b9119 Binary files /dev/null and b/contents/dl_primer/images/png/nonlinear_patterns.png differ diff --git a/contents/dsp_spectral_features_block/dsp_spectral_features_block.qmd b/contents/dsp_spectral_features_block/dsp_spectral_features_block.qmd index dc1481a23..22f601d1e 100644 --- a/contents/dsp_spectral_features_block/dsp_spectral_features_block.qmd +++ b/contents/dsp_spectral_features_block/dsp_spectral_features_block.qmd @@ -4,7 +4,7 @@ bibliography: dsp_spectral_features_block.bib # DSP Spectral Features {.unnumbered} -![*DALL·E 3 Prompt: 1950s style cartoon illustration of a Latin male and female scientist in a vibration research room. The man is using a calculus ruler to examine ancient circuitry. The woman is at a computer with complex vibration graphs. The wooden table has boards with sensors, prominently an accelerometer. A classic, rounded-back computer shows the Arduino IDE with code for LED pin assignments and machine learning algorithms for movement detection. The Serial Monitor displays FFT, classification, wavelets, and DSPs. Vintage lamps, tools, and charts with FFT and Wavelets graphs complete the scene.*](images/jpg/dsp_ini.jpg){fig-align="center" width="6.5in"} +![*DALL·E 3 Prompt: 1950s style cartoon illustration of a Latin male and female scientist in a vibration research room. The man is using a calculus ruler to examine ancient circuitry. The woman is at a computer with complex vibration graphs. The wooden table has boards with sensors, prominently an accelerometer. A classic, rounded-back computer shows the Arduino IDE with code for LED pin assignments and machine learning algorithms for movement detection. The Serial Monitor displays FFT, classification, wavelets, and DSPs. Vintage lamps, tools, and charts with FFT and Wavelets graphs complete the scene.*](images/jpg/dsp_ini.jpg) ## Introduction @@ -36,7 +36,7 @@ Let's explore in more detail a typical TinyML Motion Classification project cove ## A TinyML Motion Classification project -![](images/jpg/spectral_block.jpeg){fig-align="center" width="6.5in"} +![](images/jpg/spectral_block.jpeg) In the hands-on project, *Motion Classification and Anomaly Detection*, we simulated mechanical stresses in transport, where our problem was to classify four classes of movement: @@ -47,11 +47,11 @@ In the hands-on project, *Motion Classification and Anomaly Detection*, we simul The accelerometers provided the data on the pallet (or container). -![](images/png/case_study.png){fig-align="center" width="6.5in"} +![](images/png/case_study.png) Below is one sample (raw data) of 10 seconds, captured with a sampling frequency of 50Hz: -![](images/png/data_sample.png){fig-align="center" width="6.5in"} +![](images/png/data_sample.png) > The result is similar when this analysis is done over another dataset with the same principle, using a different sampling frequency, 62.5Hz instead of 50Hz. @@ -61,11 +61,11 @@ The raw data captured by the accelerometer (a "time series" data) should be conv We should segment the data using a sliding window over the sample data for feature extraction. The project captured accelerometer data every 10 seconds with a sample rate of 62.5 Hz. A 2-second window captures 375 data points (3 axis x 2 seconds x 62.5 samples). The window is slid every 80ms, creating a larger dataset where each instance has 375 "raw features." -![](images/png/v1.png){fig-align="center" width="6.5in"} +![](images/png/v1.png) On the Studio, the previous version (V1) of the **Spectral Analysis Block** extracted as time-domain features only the RMS, and for the frequency-domain, the peaks and frequency (using FFT) and the power characteristics (PSD) of the signal over time resulting in a fixed tabular dataset of 33 features (11 per each axis), -![](images/png/v1_features.png){fig-align="center" width="6.5in"} +![](images/png/v1_features.png) Those 33 features were the Input tensor of a Neural Network Classifier. @@ -122,11 +122,11 @@ axis = ['accX', 'accY', 'accZ'] n_sensors = len(axis) ``` -![](images/png/impulse.png){fig-align="center" width="5.6in"} +![](images/png/impulse.png) Selecting the *Raw Features* on the Studio Spectral Analysis tab, we can copy all 375 data points of a particular 2-second window to the clipboard. -![](images/png/features.png){fig-align="center" width="6.5in"} +![](images/png/features.png) Paste the data points to a new variable *data*: @@ -140,7 +140,7 @@ The total raw features are 375, but we will work with each axis individually, wh We aim to understand how Edge Impulse gets the processed features. -![](images/png/process_features.png){fig-align="center" width="4.57in"} +![](images/png/process_features.png) So, you should also past the processed features on a variable (to compare the calculated features in Python with the ones provided by the Studio) : @@ -182,7 +182,7 @@ sensors = [accX, accY, accZ] plot_data(sensors, axis, 'Raw Features') ``` -![](images/png/sample.png){fig-align="center" width="6.5in"} +![](images/png/sample.png) **Subtracting the mean** @@ -207,7 +207,7 @@ sensors = [accX, accY, accZ] plot_data(sensors, axis, 'Raw Features - Subctract the Mean') ``` -![](images/png/sample_no_mean.png){fig-align="center" width="6.5in"} +![](images/png/sample_no_mean.png) ## Time Domain Statistical features @@ -217,7 +217,7 @@ The RMS value of a set of values (or a continuous-time waveform) is the square r In the case of a set of n values {𝑥1, 𝑥2, ..., 𝑥𝑛}, the RMS is: -![](images/png/rms.png){fig-align="center"} +![](images/png/rms.png) > NOTE that the RMS value is different for the original raw data, and after subtracting the mean @@ -262,11 +262,11 @@ plt.suptitle('IMU Sensors distribution', fontsize=16, y=1.02) plt.show() ``` -![](images/png/skew.png){fig-align="center" width="6.5in"} +![](images/png/skew.png) [**Skewness**](https://en.wikipedia.org/wiki/Skewness) is a measure of the asymmetry of a distribution. This value can be positive or negative. -![](images/png/skew_2.png){fig-align="center" width="4.65in"} +![](images/png/skew_2.png) - A negative skew indicates that the tail is on the left side of the distribution, which extends towards more negative values. - A positive skew indicates that the tail is on the right side of the distribution, which extends towards more positive values. @@ -291,7 +291,7 @@ Compared with Edge Impulse result features: [**Kurtosis**](https://en.wikipedia.org/wiki/Kurtosis) is a measure of whether or not a distribution is heavy-tailed or light-tailed relative to a normal distribution. -![](images/png/kurto.png){fig-align="center"} +![](images/png/kurto.png) - The kurtosis of a normal distribution is zero. - If a given distribution has a negative kurtosis, it is said to be playkurtic, which means it tends to produce fewer and less extreme outliers than the normal distribution. @@ -367,7 +367,7 @@ plt.box(False) plt.show() ``` -![](images/png/fft.png){fig-align="center" width="6.5in"} +![](images/png/fft.png) Besides the Power Spectrum, we can also include the skewness and kurtosis of the features in the frequency domain (should be available on a new version): @@ -447,7 +447,7 @@ Let's select Wavelet on the Spectral Features block in the same project: - Wavelet Decomposition Level: 1 - Wavelet: bior1.3 -![](images/png/fft_result.png){fig-align="center"} +![](images/png/fft_result.png) **The Wavelet Function** @@ -466,11 +466,11 @@ plt.box(False) plt.show() ``` -![](images/png/wav.png){fig-align="center" width="6.5in"} +![](images/png/wav.png) As we did before, let's copy and past the Processed Features: -![](images/png/wav_processed.png){fig-align="center" width="6.5in"} +![](images/png/wav_processed.png) ``` python features = [3.6251, 0.0615, 0.0615, -7.3517, -2.7641, 2.8462, 5.0924, ...] @@ -515,7 +515,7 @@ plt.box(False) plt.show() ``` -![](images/png/wavelet_input.png){fig-align="center" width="6.5in"} +![](images/png/wavelet_input.png) ### Feature Extraction @@ -610,7 +610,7 @@ all_feat_l1 = [item for sublist in all_feat_l1 for item in sublist] print(f"\nAll L1 Features = {len(all_feat_l1)}") ``` -![](images/png/wav_result.png){fig-align="center" width="3.58in"} +![](images/png/wav_result.png) ## Conclusion diff --git a/contents/efficient_ai/efficient_ai.qmd b/contents/efficient_ai/efficient_ai.qmd index 8457ffa64..49051dfd6 100644 --- a/contents/efficient_ai/efficient_ai.qmd +++ b/contents/efficient_ai/efficient_ai.qmd @@ -41,7 +41,7 @@ Training models can consume significant energy, sometimes comparable to the carb Efficiency takes on different connotations depending on where AI computations occur. Regarding efficiency, let's revisit and differentiate between Cloud, Edge, and TinyML. @fig-platforms provides a big-picture comparison of the three different platforms. -![Cloud, Mobile and TinyML. Credit: @schizas2022tinyml.](https://www.mdpi.com/futureinternet/futureinternet-14-00363/article_deploy/html/images/futureinternet-14-00363-g001-550.jpg){#fig-platforms} +![Cloud, Mobile and TinyML. Source: @schizas2022tinyml.](https://www.mdpi.com/futureinternet/futureinternet-14-00363/article_deploy/html/images/futureinternet-14-00363-g001-550.jpg){#fig-platforms} For cloud AI, traditional AI models often run in large-scale data centers equipped with powerful GPUs and TPUs [@barroso2019datacenter]. Here, efficiency pertains to optimizing computational resources, reducing costs, and ensuring timely data processing and results. However, relying on the cloud introduces latency, especially when dealing with large data streams that require uploading, processing, and downloading. @@ -82,7 +82,7 @@ Model compression methods are essential for bringing deep learning models to dev [Edge TPUs](https://cloud.google.com/edge-tpu) are a smaller, power-efficient version of Google's TPUs tailored for edge devices. They provide fast on-device ML inferencing for TensorFlow Lite models. Edge TPUs allow for low-latency, high-efficiency inference on edge devices like smartphones, IoT devices, and embedded systems. AI capabilities can be deployed in real-time applications without communicating with a central server, thus saving bandwidth and reducing latency. Consider the table in @fig-edge-tpu-perf. It shows the performance differences between running different models on CPUs versus a Coral USB accelerator. The Coral USB accelerator is an accessory by Google's Coral AI platform that lets developers connect Edge TPUs to Linux computers. Running inference on the Edge TPUs was 70 to 100 times faster than on CPUs. -![Accelerator vs CPU performance comparison. Credit: [TensorFlow Blog.](https://blog.tensorflow.org/2019/03/build-ai-that-works-offline-with-coral.html)](images/png/tflite_edge_tpu_perf.png){#fig-edge-tpu-perf} +![Accelerator vs CPU performance comparison. Source: [TensorFlow Blog.](https://blog.tensorflow.org/2019/03/build-ai-that-works-offline-with-coral.html)](images/png/tflite_edge_tpu_perf.png){#fig-edge-tpu-perf} **NN Accelerators:** Fixed-function neural network accelerators are hardware accelerators designed explicitly for neural network computations. They can be standalone chips or part of a larger system-on-chip (SoC) solution. By optimizing the hardware for the specific operations that neural networks require, such as matrix multiplications and convolutions, NN accelerators can achieve faster inference times and lower power consumption than general-purpose CPUs and GPUs. They are especially beneficial in TinyML devices with power or thermal constraints, such as smartwatches, micro-drones, or robotics. @@ -115,16 +115,36 @@ By retaining the 8-bit exponent of FP32, BF16 offers a similar range, which is c Efficient numerics is not just about reducing the bit-width of numbers but understanding the trade-offs between accuracy and efficiency. As machine learning models become more pervasive, especially in real-world, resource-constrained environments, the focus on efficient numerics will continue to grow. By thoughtfully selecting and leveraging the appropriate numeric precision, one can achieve robust model performance while optimizing for speed, memory, and energy. @tbl-precision summarizes these trade-offs. -| Precision | Pros | Cons | -|------------|-----------------------------------------------------------|--------------------------------------------------| -| **FP32** (Floating Point 32-bit) | Standard precision used in most deep learning frameworks.
High accuracy due to ample representational capacity.
Well-suited for training | High memory usage.
Slower inference times compared to quantized models.
Higher energy consumption. | -| **FP16** (Floating Point 16-bit) | Reduces memory usage compared to FP32.
Speeds up computations on hardware that supports FP16.
Often used in mixed-precision training to balance speed and accuracy. | Lower representational capacity compared to FP32.
Risk of numerical instability in some models or layers. | -| **INT8** (8-bit Integer) | Significantly reduced memory footprint compared to floating-point representations.
Faster inference if hardware supports INT8 computations.
Suitable for many post-training quantization scenarios. | Quantization can lead to some accuracy loss.
Requires careful calibration during quantization to minimize accuracy degradation. | -| **INT4** (4-bit Integer) | Even lower memory usage than INT8.< br//> Further speedup potential for inference. | Higher risk of accuracy loss compared to INT8.
Calibration during quantization becomes more critical. | -| **Binary** | Minimal memory footprint (only 1 bit per parameter).
Extremely fast inference due to bitwise operations.
Power efficient. | Significant accuracy drop for many tasks.
Complex training dynamics due to extreme quantization. | -| **Ternary** | Low memory usage but slightly more than binary.
Offers a middle ground between representation and efficiency. | accuracy might still be lower than that of higher precision models.
Training dynamics can be complex. | - -: Comparing precision levels in deep learning. {#tbl-precision} ++------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| Precision | Pros | Cons | ++:=================+:=============================================================+:=============================================================+ +| FP32 (Floating | * Standard precision used in most deep learning frameworks. | * High memory usage. | +| Point 32-bit) | * High accuracy due to ample representational capacity. | * Slower inference times compared to quantized models. | +| | * Well-suited for training | * Higher energy consumption. | ++------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| FP16 (Floating | * Reduces memory usage compared to FP32. | * Lower representational capacity compared to FP32. | +| Point 16-bit) | * Speeds up computations on hardware that supports FP16. | * Risk of numerical instability in some models or layers. | +| | * Often used in mixed-precision training to balance speed | | +| | and accuracy. | | ++------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| INT8 (8-bit | * Significantly reduced memory footprint compared to | * Quantization can lead to some accuracy loss. | +| Integer) | floating-point representations. | * Requires careful calibration during quantization to | +| | * Faster inference if hardware supports INT8 computations. | minimize accuracy degradation. | +| | * Suitable for many post-training quantization scenarios. | | ++------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| INT4 (4-bit | * Even lower memory usage than INT8. | * Higher risk of accuracy loss compared to INT8. | +| Integer) | * Further speedup potential for inference. | * Calibration during quantization becomes more critical. | ++------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| Binary | * Minimal memory footprint (only 1 bit per parameter). | * Significant accuracy drop for many tasks. | +| | * Extremely fast inference due to bitwise operations. | * Complex training dynamics due to extreme quantization. | +| | * Power efficient. | | ++------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| Ternary | * Low memory usage but slightly more than binary. | * Accuracy might still be lower than that of higher | +| | * Offers a middle ground between representation and | precision models. | +| | efficiency. | * Training dynamics can be complex. | ++------------------+--------------------------------------------------------------+--------------------------------------------------------------+ + +: Comparing precision levels in deep learning. {#tbl-precision .striped .hover} ### Efficiency Benefits diff --git a/contents/frameworks/frameworks.qmd b/contents/frameworks/frameworks.qmd index 110bedf2f..cffb3acc7 100644 --- a/contents/frameworks/frameworks.qmd +++ b/contents/frameworks/frameworks.qmd @@ -40,25 +40,25 @@ Machine learning frameworks provide the tools and infrastructure to efficiently ML frameworks handle much of the complexity of model development through high-level APIs and domain-specific languages that allow practitioners to quickly construct models by combining pre-made components and abstractions. For example, frameworks like TensorFlow and PyTorch provide Python APIs to define neural network architectures using layers, optimizers, datasets, and more. This enables rapid iteration compared to coding every model detail from scratch. -A key capability framework offered is distributed training engines that can scale model training across clusters of GPUs and TPUs. This makes it feasible to train state-of-the-art models with billions or trillions of parameters on vast datasets. Frameworks also integrate with specialized hardware like NVIDIA GPUs to further accelerate training via optimizations like parallelization and efficient matrix operations. +A key capability offered by these frameworks is distributed training engines that can scale model training across clusters of GPUs and TPUs. This makes it feasible to train state-of-the-art models with billions or trillions of parameters on vast datasets. Frameworks also integrate with specialized hardware like NVIDIA GPUs to further accelerate training via optimizations like parallelization and efficient matrix operations. In addition, frameworks simplify deploying finished models into production through tools like [TensorFlow Serving](https://www.tensorflow.org/tfx/guide/serving) for scalable model serving and [TensorFlow Lite](https://www.tensorflow.org/lite) for optimization on mobile and edge devices. Other valuable capabilities include visualization, model optimization techniques like quantization and pruning, and monitoring metrics during training. -They were leading open-source frameworks like TensorFlow, PyTorch, and [MXNet](https://mxnet.apache.org/versions/1.9.1/), which power much of AI research and development today. Commercial offerings like [Amazon SageMaker](https://aws.amazon.com/pm/sagemaker/) and [Microsoft Azure Machine Learning](https://azure.microsoft.com/en-us/free/machine-learning/search/?ef_id=_k_CjwKCAjws9ipBhB1EiwAccEi1JVOThls797Sj3Li96_GYjoJQDx_EWaXNsDaEWeFbIaRkESUCkq64xoCSmwQAvD_BwE_k_&OCID=AIDcmm5edswduu_SEM__k_CjwKCAjws9ipBhB1EiwAccEi1JVOThls797Sj3Li96_GYjoJQDx_EWaXNsDaEWeFbIaRkESUCkq64xoCSmwQAvD_BwE_k_&gad=1&gclid=CjwKCAjws9ipBhB1EiwAccEi1JVOThls797Sj3Li96_GYjoJQDx_EWaXNsDaEWeFbIaRkESUCkq64xoCSmwQAvD_BwE) integrate these open source frameworks with proprietary capabilities and enterprise tools. +Leading open-source frameworks like TensorFlow, PyTorch, and [MXNet](https://mxnet.apache.org/versions/1.9.1/) power much of AI research and development today. Commercial offerings like [Amazon SageMaker](https://aws.amazon.com/pm/sagemaker/) and [Microsoft Azure Machine Learning](https://azure.microsoft.com/en-us/free/machine-learning/search/?ef_id=_k_CjwKCAjws9ipBhB1EiwAccEi1JVOThls797Sj3Li96_GYjoJQDx_EWaXNsDaEWeFbIaRkESUCkq64xoCSmwQAvD_BwE_k_&OCID=AIDcmm5edswduu_SEM__k_CjwKCAjws9ipBhB1EiwAccEi1JVOThls797Sj3Li96_GYjoJQDx_EWaXNsDaEWeFbIaRkESUCkq64xoCSmwQAvD_BwE_k_&gad=1&gclid=CjwKCAjws9ipBhB1EiwAccEi1JVOThls797Sj3Li96_GYjoJQDx_EWaXNsDaEWeFbIaRkESUCkq64xoCSmwQAvD_BwE) integrate these open source frameworks with proprietary capabilities and enterprise tools. Machine learning engineers and practitioners leverage these robust frameworks to focus on high-value tasks like model architecture, feature engineering, and hyperparameter tuning instead of infrastructure. The goal is to build and deploy performant models that solve real-world problems efficiently. -This chapter, we will explore today's leading cloud frameworks and how they have adapted models and tools specifically for embedded and edge deployment. We will compare programming models, supported hardware, optimization capabilities, and more to fully understand how frameworks enable scalable machine learning from the cloud to the edge. +In this chapter, we will explore today's leading cloud frameworks and how they have adapted models and tools specifically for embedded and edge deployment. We will compare programming models, supported hardware, optimization capabilities, and more to fully understand how frameworks enable scalable machine learning from the cloud to the edge. ## Framework Evolution -Machine learning frameworks have evolved significantly to meet the diverse needs of machine learning practitioners and advancements in AI techniques. A few decades ago, building and training machine learning models required extensive low-level coding and infrastructure. Machine learning frameworks have evolved considerably over the past decade to meet the expanding needs of practitioners and rapid advances in deep learning techniques. Insufficient data and computing power constrained early neural network research. Building and training machine learning models required extensive low-level coding and infrastructure. However, the release of large datasets like [ImageNet](https://www.image-net.org/) [@deng2009imagenet] and advancements in parallel GPU computing unlocked the potential for far deeper neural networks. +Machine learning frameworks have evolved significantly to meet the diverse needs of machine learning practitioners and advancements in AI techniques. A few decades ago, building and training machine learning models required extensive low-level coding and infrastructure. Alongside the need for low-level coding, early neural network research was constrained by insufficient data and computing power. However, machine learning frameworks have evolved considerably over the past decade to meet the expanding needs of practitioners and rapid advances in deep learning techniques. The release of large datasets like [ImageNet](https://www.image-net.org/) [@deng2009imagenet] and advancements in parallel GPU computing unlocked the potential for far deeper neural networks. -The first ML frameworks, [Theano](https://pypi.org/project/Theano/#:~:text=Theano) is a Python library, a similar interface to NumPy's.) by @al2016theano and [Caffe](https://caffe.berkeleyvision.org/) by @jia2014caffe, were developed by academic institutions (Montreal Institute for Learning Algorithms, Berkeley Vision and Learning Center). Amid growing interest in deep learning due to state-of-the-art performance of AlexNet @krizhevsky2012imagenet on the ImageNet dataset, private companies and individuals began developing ML frameworks, resulting in frameworks such as [Keras](https://keras.io/) by @chollet2018keras, [Chainer](https://chainer.org/) by @tokui2015chainer, TensorFlow from Google [@abadi2016tensorflow], [CNTK](https://learn.microsoft.com/en-us/cognitive-toolkit/) by Microsoft [@seide2016cntk], and PyTorch by Facebook [@paszke2019pytorch]. +The first ML frameworks, [Theano](https://pypi.org/project/Theano/#:~:text=Theano) by @al2016theano and [Caffe](https://caffe.berkeleyvision.org/) by @jia2014caffe, were developed by academic institutions. Theano was created by the Montreal Institute for Learning Algorithms, while Caffe was developed by the Berkeley Vision and Learning Center. Amid growing interest in deep learning due to state-of-the-art performance of AlexNet @krizhevsky2012imagenet on the ImageNet dataset, private companies and individuals began developing ML frameworks, resulting in frameworks such as [Keras](https://keras.io/) by @chollet2018keras, [Chainer](https://chainer.org/) by @tokui2015chainer, TensorFlow from Google [@abadi2016tensorflow], [CNTK](https://learn.microsoft.com/en-us/cognitive-toolkit/) by Microsoft [@seide2016cntk], and PyTorch by Facebook [@paszke2019pytorch]. Many of these ML frameworks can be divided into high-level vs. low-level frameworks and static vs. dynamic computational graph frameworks. High-level frameworks provide a higher level of abstraction than low-level frameworks. High-level frameworks have pre-built functions and modules for common ML tasks, such as creating, training, and evaluating common ML models, preprocessing data, engineering features, and visualizing data, which low-level frameworks do not have. Thus, high-level frameworks may be easier to use but are less customizable than low-level frameworks (i.e., users of low-level frameworks can define custom layers, loss functions, optimization algorithms, etc.). Examples of high-level frameworks include TensorFlow/Keras and PyTorch. Examples of low-level ML frameworks include TensorFlow with low-level APIs, Theano, Caffe, Chainer, and CNTK. -Frameworks like Theano and Caffe used static computational graphs, which required rigidly defining the full model architecture upfront. Static graphs require upfront declaration and limit flexibility. Dynamic graphs are constructed on the fly for more iterative development. However, around 2016, frameworks began adopting dynamic graphs like PyTorch and TensorFlow 2.0, which can construct graphs on the fly. This provides greater flexibility for model development. We will discuss these concepts and details later in the AI Training section. +Frameworks like Theano and Caffe used static computational graphs, which required defining the full model architecture upfront, thus limiting flexibility. In contract, dynamic graphs are constructed on the fly for more iterative development. Around 2016, frameworks like PyTorch and TensorFlow 2.0 began adopting dynamic graphs, providing greater flexibility for model development. We will discuss these concepts and details later in the AI Training section. The development of these frameworks facilitated an explosion in model size and complexity over time---from early multilayer perceptrons and convolutional networks to modern transformers with billions or trillions of parameters. In 2016, ResNet models by @he2016deep achieved record ImageNet accuracy with over 150 layers and 25 million parameters. Then, in 2020, the GPT-3 language model from OpenAI [@brown2020language] pushed parameters to an astonishing 175 billion using model parallelism in frameworks to train across thousands of GPUs and TPUs. @@ -76,13 +76,13 @@ Each generation of frameworks unlocked new capabilities that powered advancement In recent years, the frameworks have converged. @fig-ml-framework shows that TensorFlow and PyTorch have become the overwhelmingly dominant ML frameworks, representing more than 95% of ML frameworks used in research and production. Keras was integrated into TensorFlow in 2019; Preferred Networks transitioned Chainer to PyTorch in 2019; and Microsoft stopped actively developing CNTK in 2022 to support PyTorch on Windows. -![Popularity of ML frameworks in the United States as measured by Google web searches. Credit: Google.](images/png/image6.png){#fig-ml-framework} +![Popularity of ML frameworks in the United States as measured by Google web searches. Source: Google.](images/png/image6.png){#fig-ml-framework} -However, a one-size-fits-all approach only works well across the spectrum from cloud to tiny edge devices. Different frameworks represent various philosophies around graph execution, declarative versus imperative APIs, and more. Declaratives define what the program should do, while imperatives focus on how it should be done step-by-step. For instance, TensorFlow uses graph execution and declarative-style modeling, while PyTorch adopts eager execution and imperative modeling for more Pythonic flexibility. Each approach carries tradeoffs, which we will discuss later in the Basic Components section. +A one-size-fits-all approach does not work well across the spectrum from cloud to tiny edge devices. Different frameworks represent various philosophies around graph execution, declarative versus imperative APIs, and more. Declaratives define what the program should do, while imperatives focus on how it should be done step-by-step. For instance, TensorFlow uses graph execution and declarative-style modeling, while PyTorch adopts eager execution and imperative modeling for more Pythonic flexibility. Each approach carries tradeoffs which we will discuss in @sec-pytorch_vs_tensorflow. -Today's advanced frameworks enable practitioners to develop and deploy increasingly complex models - a key driver of innovation in the AI field. However, they continue to evolve and expand their capabilities for the next generation of machine learning. To understand how these systems continue to evolve, we will dive deeper into TensorFlow as an example of how the framework grew in complexity over time. +Today's advanced frameworks enable practitioners to develop and deploy increasingly complex models - a key driver of innovation in the AI field. These frameworks continue to evolve and expand their capabilities for the next generation of machine learning. To understand how these systems continue to evolve, we will dive deeper into TensorFlow as an example of how the framework grew in complexity over time. -## DeepDive into TensorFlow +## Deep Dive into TensorFlow {#sec-deep_dive_into_tensorflow} TensorFlow was developed by the Google Brain team and was released as an open-source software library on November 9, 2015. It was designed for numerical computation using data flow graphs and has since become popular for a wide range of machine learning and deep learning applications. @@ -90,33 +90,35 @@ TensorFlow is a training and inference framework that provides built-in function ### TF Ecosystem -1. [TensorFlow Core](https://www.tensorflow.org/tutorials): primary package that most developers engage with. It provides a comprehensive, flexible platform for defining, training, and deploying machine learning models. It includes tf—keras as its high-level API. +1. [TensorFlow Core](https://www.tensorflow.org/tutorials): primary package that most developers engage with. It provides a comprehensive, flexible platform for defining, training, and deploying machine learning models. It includes [tf.keras](https://www.tensorflow.org/guide/keras) as its high-level API. -2. [TensorFlow Lite] (): designed for deploying lightweight models on mobile, embedded, and edge devices. It offers tools to convert TensorFlow models to a more compact format suitable for limited-resource devices and provides optimized pre-trained models for mobile. +2. [TensorFlow Lite](): designed for deploying lightweight models on mobile, embedded, and edge devices. It offers tools to convert TensorFlow models to a more compact format suitable for limited-resource devices and provides optimized pre-trained models for mobile. -3. [TensorFlow.js](https://www.tensorflow.org/js): JavaScript library that allows training and deployment of machine learning models directly in the browser or on Node.js. It also provides tools for porting pre-trained TensorFlow models to the browser-friendly format. +3. [TensorFlow Lite Micro](https://www.tensorflow.org/lite/microcontrollers): designed for running machine learning models on microcontrollers with minimal resources. It operates without the need for operating system support, standard C or C++ libraries, or dynamic memory allocation, using only a few kilobytes of memory. -4. [TensorFlow on Edge Devices (Coral)](https://developers.googleblog.com/2019/03/introducing-coral-our-platform-for.html): platform of hardware components and software tools from Google that allows the execution of TensorFlow models on edge devices, leveraging Edge TPUs for acceleration. +4. [TensorFlow.js](https://www.tensorflow.org/js): JavaScript library that allows training and deployment of machine learning models directly in the browser or on Node.js. It also provides tools for porting pre-trained TensorFlow models to the browser-friendly format. -5. [TensorFlow Federated (TFF)](https://www.tensorflow.org/federated): framework for machine learning and other computations on decentralized data. TFF facilitates federated learning, allowing model training across many devices without centralizing the data. +5. [TensorFlow on Edge Devices (Coral)](https://developers.googleblog.com/2019/03/introducing-coral-our-platform-for.html): platform of hardware components and software tools from Google that allows the execution of TensorFlow models on edge devices, leveraging Edge TPUs for acceleration. -6. [TensorFlow Graphics](https://www.tensorflow.org/graphics): library for using TensorFlow to carry out graphics-related tasks, including 3D shapes and point clouds processing, using deep learning. +6. [TensorFlow Federated (TFF)](https://www.tensorflow.org/federated): framework for machine learning and other computations on decentralized data. TFF facilitates federated learning, allowing model training across many devices without centralizing the data. -7. [TensorFlow Hub](https://www.tensorflow.org/hub): repository of reusable machine learning model components to allow developers to reuse pre-trained model components, facilitating transfer learning and model composition +7. [TensorFlow Graphics](https://www.tensorflow.org/graphics): library for using TensorFlow to carry out graphics-related tasks, including 3D shapes and point clouds processing, using deep learning. -8. [TensorFlow Serving](https://www.tensorflow.org/tfx/guide/serving): framework designed for serving and deploying machine learning models for inference in production environments. It provides tools for versioning and dynamically updating deployed models without service interruption. +8. [TensorFlow Hub](https://www.tensorflow.org/hub): repository of reusable machine learning model components to allow developers to reuse pre-trained model components, facilitating transfer learning and model composition. -9. [TensorFlow Extended (TFX)](https://www.tensorflow.org/tfx): end-to-end platform designed to deploy and manage machine learning pipelines in production settings. TFX encompasses data validation, preprocessing, model training, validation, and serving components. +9. [TensorFlow Serving](https://www.tensorflow.org/tfx/guide/serving): framework designed for serving and deploying machine learning models for inference in production environments. It provides tools for versioning and dynamically updating deployed models without service interruption. -![Architecture overview of TensorFlow 2.0. Credit: [Tensorflow.](https://blog.tensorflow.org/2019/01/whats-coming-in-tensorflow-2-0.html)](images/png/tensorflow.png){#fig-tensorflow-architecture} +10. [TensorFlow Extended (TFX)](https://www.tensorflow.org/tfx): end-to-end platform designed to deploy and manage machine learning pipelines in production settings. TFX encompasses data validation, preprocessing, model training, validation, and serving components. + +![Architecture overview of TensorFlow 2.0. Source: [Tensorflow.](https://blog.tensorflow.org/2019/01/whats-coming-in-tensorflow-2-0.html)](images/png/tensorflow.png){#fig-tensorflow-architecture} TensorFlow was developed to address the limitations of DistBelief [@abadi2016tensorflow]---the framework in use at Google from 2011 to 2015---by providing flexibility along three axes: 1) defining new layers, 2) refining training algorithms, and 3) defining new training algorithms. To understand what limitations in DistBelief led to the development of TensorFlow, we will first give a brief overview of the Parameter Server Architecture that DistBelief employed [@dean2012large]. -The Parameter Server (PS) architecture is a popular design for distributing the training of machine learning models, especially deep neural networks, across multiple machines. The fundamental idea is to separate the storage and management of model parameters from the computation used to update these parameters: +The Parameter Server (PS) architecture is a popular design for distributing the training of machine learning models, especially deep neural networks, across multiple machines. The fundamental idea is to separate the storage and management of model parameters from the computation used to update these parameters. Typically, parameter servers handle the storage and management of model parameters, partitioning them across multiple servers. Worker processes perform the computational tasks, including data processing and computation of gradients, which are then sent back to the parameter servers for updating. -**Storage:** The stateful parameter server processes handled the storage and management of model parameters. Given the large scale of models and the system's distributed nature, these parameters were sharded across multiple parameter servers. Each server maintained a portion of the model parameters, making it \"stateful\" as it had to maintain and manage this state across the training process. +**Storage:** The stateful parameter server processes handled the storage and management of model parameters. Given the large scale of models and the system's distributed nature, these parameters were shared across multiple parameter servers. Each server maintained a portion of the model parameters, making it \"stateful\" as it had to maintain and manage this state across the training process. -**Computation:** The worker processes, which could be run in parallel, were stateless and purely computational. They processed data and computed gradients without maintaining any state or long-term memory [@li2014communication]. +**Computation:** The worker processes, which could be run in parallel, were stateless and purely computational. They processed data and computed gradients without maintaining any state or long-term memory [@li2014communication]. Workers did not retain information between different tasks. Instead, they periodically communicated with the parameter servers to retrieve the latest parameters and send back computed gradients. :::{#exr-tfc .callout-caution collapse="true"} @@ -155,7 +157,7 @@ TensorFlow was built to run on multiple platforms, from mobile devices and edge Rather than using the parameter server architecture, TensorFlow deploys tasks across a cluster. These tasks are named processes that can communicate over a network, and each can execute TensorFlow's core construct, the dataflow graph, and interface with various computing devices (like CPUs or GPUs). This graph is a directed representation where nodes symbolize computational operations, and edges depict the tensors (data) flowing between these operations. -Despite the absence of traditional parameter servers, some "PS tasks" still store and manage parameters reminiscent of parameter servers in other systems. The remaining tasks, which usually handle computation, data processing, and gradient calculations, are referred to as \"worker tasks.\" TensorFlow's PS tasks can execute any computation representable by the dataflow graph, meaning they aren't just limited to parameter storage, and the computation can be distributed. This capability makes them significantly more versatile and gives users the power to program the PS tasks using the standard TensorFlow interface, the same one they'd use to define their models. As mentioned above, dataflow graphs' structure also makes them inherently good for parallelism, allowing for the processing of large datasets. +Despite the absence of traditional parameter servers, some "PS tasks" still store and manage parameters reminiscent of parameter servers in other systems. The remaining tasks, which usually handle computation, data processing, and gradient calculations, are referred to as "worker tasks." TensorFlow's PS tasks can execute any computation representable by the dataflow graph, meaning they aren't just limited to parameter storage, and the computation can be distributed. This capability makes them significantly more versatile and gives users the power to program the PS tasks using the standard TensorFlow interface, the same one they'd use to define their models. As mentioned above, dataflow graphs' structure also makes them inherently good for parallelism, allowing for the processing of large datasets. ### Built-in Functionality & Keras @@ -174,43 +176,125 @@ Here, we'll learn how to use Keras, a high-level neural network API, for model d ### Limitations and Challenges -TensorFlow is one of the most popular deep learning frameworks but has criticisms and weaknesses, mostly focusing on usability and resource usage. While advantageous, the rapid pace of updates through its support from Google has sometimes led to backward compatibility issues, deprecated functions, and shifting documentation. Additionally, even with the Keras implementation, TensorFlow's syntax and learning curve can be difficult for new users. One major critique of TensorFlow is its high overhead and memory consumption due to the range of built-in libraries and support. Some of these concerns can be addressed using pared-down versions, but they can still be limited in resource-constrained environments. +TensorFlow is one of the most popular deep learning frameworks, but it has faced criticisms and weaknesses, primarily related to usability and resource usage. While advantageous, the rapid pace of updates through its support from Google has sometimes led to backward compatibility issues, deprecated functions, and shifting documentation. Additionally, even with the Keras implementation, TensorFlow's syntax and learning curve can be difficult for new users. Another major critique of TensorFlow is its high overhead and memory consumption due to the range of built-in libraries and support. While pared-down versions can address some of these concerns, they may still be limited in resource-constrained environments. -### PyTorch vs. TensorFlow +### PyTorch vs. TensorFlow {#sec-pytorch_vs_tensorflow} PyTorch and TensorFlow have established themselves as frontrunners in the industry. Both frameworks offer robust functionalities but differ in design philosophies, ease of use, ecosystem, and deployment capabilities. **Design Philosophy and Programming Paradigm:** PyTorch uses a dynamic computational graph termed eager execution. This makes it intuitive and facilitates debugging since operations are executed immediately and can be inspected on the fly. In comparison, earlier versions of TensorFlow were centered around a static computational graph, which required the graph's complete definition before execution. However, TensorFlow 2.0 introduced eager execution by default, making it more aligned with PyTorch. PyTorch's dynamic nature and Python-based approach have enabled its simplicity and flexibility, particularly for rapid prototyping. TensorFlow's static graph approach in its earlier versions had a steeper learning curve; the introduction of TensorFlow 2.0, with its Keras integration as the high-level API, has significantly simplified the development process. -**Deployment:** PyTorch is heavily favored in research environments; deploying PyTorch models in production settings was traditionally challenging. However, deployment has become more feasible with the introduction of TorchScript and the TorchServe tool. One of TensorFlow's strengths lies in its scalability and deployment capabilities, especially on embedded and mobile platforms with TensorFlow Lite. TensorFlow Serving and TensorFlow.js further facilitate deployment in various environments, thus giving it a broader reach in the ecosystem. +**Deployment:** PyTorch is heavily favored in research environments, but deploying PyTorch models in production settings has traditionally been challenging. However, deployment has become more feasible with the introduction of TorchScript, the TorchServe tool, and [PyTorch Mobile](https://pytorch.org/mobile/home/). TensorFlow stands out for its strong scalability and deployment capabilities, particularly on embedded and mobile platforms with TensorFlow Lite. TensorFlow Serving and TensorFlow.js further facilitate deployment in various environments, thus giving it a broader reach in the ecosystem. **Performance:** Both frameworks offer efficient hardware acceleration for their operations. However, TensorFlow has a slightly more robust optimization workflow, such as the XLA (Accelerated Linear Algebra) compiler, which can further boost performance. Its static computational graph was also advantageous for certain optimizations in the early versions. -**Ecosystem:** PyTorch has a growing ecosystem with tools like TorchServe for serving models and libraries like TorchVision, TorchText, and TorchAudio for specific domains. As we mentioned earlier, TensorFlow has a broad and mature ecosystem. TensorFlow Extended (TFX) provides an end-to-end platform for deploying production machine learning pipelines. Other tools and libraries include TensorFlow Lite, TensorFlow.js, TensorFlow Hub, and TensorFlow Serving. +**Ecosystem:** PyTorch has a growing ecosystem with tools like TorchServe for serving models and libraries like TorchVision, TorchText, and TorchAudio for specific domains. As we mentioned earlier, TensorFlow has a broad and mature ecosystem. TensorFlow Extended (TFX) provides an end-to-end platform for deploying production machine learning pipelines. Other tools and libraries include TensorFlow Lite, TensorFlow Lite Micro, TensorFlow.js, TensorFlow Hub, and TensorFlow Serving. @tbl-pytorch_vs_tf provides a comparative analysis: -| Feature/Aspect | PyTorch | TensorFlow | -|-----------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------| -| Design Philosophy | Dynamic computational graph (eager execution) | Static computational graph (early versions); Eager execution in TensorFlow 2.0 | -| Deployment | Traditionally challenging; Improved with TorchScript & TorchServe | Scalable, especially on embedded platforms with TensorFlow Lite | -| Performance & Optimization | Efficient GPU acceleration | Robust optimization with XLA compiler | -| Ecosystem | TorchServe, TorchVision, TorchText, TorchAudio | TensorFlow Extended (TFX), TensorFlow Lite, TensorFlow.js, TensorFlow Hub, TensorFlow Serving | -| Ease of Use | Preferred for its Pythonic approach and rapid prototyping | Initially steep learning curve; Simplified with Keras in TensorFlow 2.0 | - -: Comparison of PyTorch and TensorFlow. {#tbl-pytorch_vs_tf} ++-------------------------------+--------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------+ +| Aspect | Pytorch | TensorFlow | ++:==============================+:===================================================================+:====================================================================================================================+ +| Design Philosophy | Dynamic computational graph (eager execution) | Static computational graph (early versions); Eager execution in TensorFlow 2.0 | ++-------------------------------+--------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------+ +| Deployment | Traditionally challenging; Improved with TorchScript & TorchServe | Scalable, especially on embedded platforms with TensorFlow Lite | ++-------------------------------+--------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------+ +| Performance & Optimization | Efficient GPU acceleration | Robust optimization with XLA compiler | ++-------------------------------+--------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------+ +| Ecosystem | TorchServe, TorchVision, TorchText, TorchAudio, PyTorch Mobile | TensorFlow Extended (TFX), TensorFlow Lite, TensorFlow Lite Micro TensorFlow.js, TensorFlow Hub, TensorFlow Serving | ++-------------------------------+--------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------+ +| Ease of Use | Preferred for its Pythonic approach and rapid prototyping | Initially steep learning curve; Simplified with Keras in TensorFlow 2.0 | ++-------------------------------+--------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------+ + +: Comparison of PyTorch and TensorFlow. {#tbl-pytorch_vs_tf .striped .hover} ## Basic Framework Components +Having introduced the popular machine learning frameworks and provided a high-level comparison, this section will introduce you to the core functionalities that form the fabric of these frameworks. It will cover the special structure called tensors, which these frameworks use to handle complex multi-dimensional data more easily. You will also learn how these frameworks represent different types of neural network architectures and their required operations through computational graphs. Additionally, you will see how they offer tools that make the development of machine learning models more abstract and efficient, such as data loaders, implemented loss optimization algorithms, efficient differentiation techniques, and the ability to accelerate your training process on hardware accelerators. + ### Tensor data structures To understand tensors, let us start from the familiar concepts in linear algebra. As demonstrated in @fig-tensor-data-structure, vectors can be represented as a stack of numbers in a 1-dimensional array. Matrices follow the same idea, and one can think of them as many vectors stacked on each other, making them 2 dimensional. Higher dimensional tensors work the same way. A 3-dimensional tensor is simply a set of matrices stacked on each other in another direction. Therefore, vectors and matrices can be considered special cases of tensors with 1D and 2D dimensions, respectively. ![Visualization of Tensor Data Structure.](images/png/image2.png){#fig-tensor-data-structure} -Defining formally, in machine learning, tensors are a multi-dimensional array of numbers. The number of dimensions defines the rank of the tensor. As a generalization of linear algebra, the study of tensors is called multilinear algebra. There are noticeable similarities between matrices and higher-ranked tensors. First, extending the definitions given in linear algebra to tensors, such as with eigenvalues, eigenvectors, and rank (in the linear algebra sense), is possible. Furthermore, with the way we have defined tensors, it is possible to turn higher dimensional tensors into matrices. This is critical in practice, as the multiplication of abstract representations of higher dimensional tensors is often completed by first converting them into matrices for multiplication. +Tensors offer a flexible structure that can represent data in higher dimensions. For instance, to represent image data, the pixels at each position of an image are structured as matrices. However, images are not represented by just one matrix of pixel values; they typically have three channels where each channel is a matrix containing pixel values that represent the intensity of red, green, or blue. Together, these channels create a colored image. Without tensors, storing all this information from multiple matrices can be complex. With tensors, it is easy to contain image data in a single 3-dimensional tensor, with each number representing a certain color value at a specific location in the image. + +![Visualization of colored image structure that can be easily stored as a 3D Tensor. Credit: [Niklas Lang](https://towardsdatascience.com/what-are-tensors-in-machine-learning-5671814646ff)](images/png/color_channels_of_image.png){#fig-tensor-data-structure} + + +You don't have to stop there. If we wanted to store a series of images, we could use a 4-dimensional tensor, where the new dimension represents different images. This means you are storing multiple images, each having three matrices that represent the three color channels. This gives you an idea of the usefulness of tensors when dealing with multi-dimensional data efficiently. + +Tensors also have a unique attribute that enables frameworks to automatically compute gradients, simplifying the implementation of complex models and optimization algorithms. In machine learning, as discussed in [Chapter 3](../dl_primer/dl_primer.qmd#sec-backward_pass), backpropagation requires taking the derivative of equations. One of the key features of tensors in PyTorch and TensorFlow is their ability to track computations and calculate gradients. This is crucial for backpropagation in neural networks. For example, in PyTorch, you can use the `requires_grad` attribute, which allows you to automatically compute and store gradients during the backward pass, facilitating the optimization process. Similarly, in TensorFlow, `tf.GradientTape` records operations for automatic differentiation. + +Consider this simple mathematical equation that you want to differentiate. Mathematically, you can compute the gradient in the following way: + +Given: +$$ +y = x^2 +$$ + +The derivative of $y$ with respect to $x$ is: +$$ +\frac{dy}{dx} = 2x +$$ + +When $x = 2$: +$$ +\frac{dy}{dx} = 2*2 = 4 +$$ + +The gradient of $y$ with respect to $x$, at $x = 2$, is 4. + +A powerful feature of tensors in PyTorch and TensorFlow is their ability to easily compute derivatives (gradients). Here are the corresponding code examples in PyTorch and TensorFlow: + +::: {.panel-tabset} + +### PyTorch + +```python +import torch + +# Create a tensor with gradient tracking +x = torch.tensor(2.0, requires_grad=True) + +# Define a simple function +y = x ** 2 + +# Compute the gradient +y.backward() + +# Print the gradient +print(x.grad) + +# Output +tensor(4.0) +``` + +### TensorFlow + +```python +import tensorflow as tf + +# Create a tensor with gradient tracking +x = tf.Variable(2.0) + +# Define a simple function +with tf.GradientTape() as tape: + y = x ** 2 -Tensors offer a flexible data structure that can represent data in higher dimensions. For example, to represent color image data, for each pixel value (in 2 dimensions), one needs the color values for red, green, and blue. With tensors, it is easy to contain image data in a single 3-dimensional tensor, with each number within it representing a certain color value in a certain location of the image. Extending even further, if we wanted to store a series of images, we could extend the dimensions such that the new dimension (to create a 4-dimensional tensor) represents our different images. This is exactly what the famous [MNIST](https://www.tensorflow.org/datasets/catalog/mnist) dataset does, loading a single 4-dimensional tensor when one calls to load the dataset, allowing a compact representation of all the data in one place. +# Compute the gradient +grad = tape.gradient(y, x) + +# Print the gradient +print(grad) + +# Output +tf.Tensor(4.0, shape=(), dtype=float32) +``` +:::: + +This automatic differentiation is a powerful feature of tensors in frameworks like PyTorch and TensorFlow, making it easier to implement and optimize complex machine learning models. ### Computational graphs @@ -218,13 +302,15 @@ Tensors offer a flexible data structure that can represent data in higher dimens Computational graphs are a key component of deep learning frameworks like TensorFlow and PyTorch. They allow us to express complex neural network architectures efficiently and differentiatedly. A computational graph consists of a directed acyclic graph (DAG) where each node represents an operation or variable, and edges represent data dependencies between them. +It's important to differentiate computational graphs from neural network diagrams, such as those for multilayer perceptrons (MLPs), which depict nodes and layers. Neural network diagrams, as depicted in [Chapter 3](../dl_primer/dl_primer.qmd), visualize the architecture and flow of data through nodes and layers, providing an intuitive understanding of the model's structure. In contrast, computational graphs provide a low-level representation of the underlying mathematical operations and data dependencies required to implement and train these networks. + For example, a node might represent a matrix multiplication operation, taking two input matrices (or tensors) and producing an output matrix (or tensor). To visualize this, consider the simple example in @fig-computational-graph. The directed acyclic graph above computes $z = x \times y$, where each variable is just numbers. ![Basic example of a computational graph.](images/png/image1.png){#fig-computational-graph width="50%" height="auto" align="center"} -Underneath the hood, the computational graphs represent abstractions for common layers like convolutional, pooling, recurrent, and dense layers, with data including activations, weights, and biases represented in tensors. Convolutional layers form the backbone of CNN models for computer vision. They detect spatial patterns in input data through learned filters. Recurrent layers like LSTMs and GRUs enable sequential data processing for tasks like language translation. Attention layers are used in transformers to draw global context from the entire input. +Frameworks like TensorFlow and PyTorch create computational graphs to implement the architectures of neural networks that we typically represent with diagrams. When you define a neural network layer in code (e.g., a dense layer in TensorFlow), the framework constructs a computational graph that includes all the necessary operations (such as matrix multiplication, addition, and activation functions) and their data dependencies. This graph enables the framework to efficiently manage the flow of data, optimize the execution of operations, and automatically compute gradients for training. Underneath the hood, the computational graphs represent abstractions for common layers like convolutional, pooling, recurrent, and dense layers, with data including activations, weights, and biases represented in tensors. This representation allows for efficient computation, leveraging the structure of the graph to parallelize operations and apply optimizations. -Layers are higher-level abstractions that define computations on top of those tensors. For example, a Dense layer performs a matrix multiplication and addition between input/weight/bias tensors. Note that a layer operates on tensors as inputs and outputs; the layer is not a tensor. Some key differences: +Some common layers that computational graphs might implement include convolutional layers, attention layers, recurrent layers, and dense layers. Layers serve as higher-level abstractions that define specific computations on top of the basic operations represented in the graph. For example, a Dense layer performs matrix multiplication and addition between input, weight, and bias tensors. It is important to note that a layer operates on tensors as inputs and outputs; the layer itself is not a tensor. Some key differences between layers and tensors are: * Layers contain states like weights and biases. Tensors are stateless, just holding data. @@ -242,7 +328,7 @@ In addition, computational graphs include activation functions like ReLU, sigmoi In recent years, models like ResNets and MobileNets have emerged as popular architectures, with current frameworks pre-packaging these as computational graphs. Rather than worrying about the fine details, developers can utilize them as a starting point, customizing as needed by substituting layers. This simplifies and speeds up model development, avoiding reinventing architectures from scratch. Predefined models include well-tested, optimized implementations that ensure good performance. Their modular design also enables transferring learned features to new tasks via transfer learning. These predefined architectures provide high-performance building blocks to create robust models quickly. -These layer abstractions, activation functions, and predefined architectures the frameworks provide constitute a computational graph. When a user defines a layer in a framework (e.g., tf.keras.layers.Dense()), the framework configures computational graph nodes and edges to represent that layer. The layer parameters like weights and biases become variables in the graph. The layer computations become operation nodes (such as the x and y in the figure above). When you call an activation function like tf.nn.relu(), the framework adds a ReLU operation node to the graph. Predefined architectures are just pre-configured subgraphs that can be inserted into your model's graph. Thus, model definition via high-level abstractions creates a computational graph—the layers, activations, and architectures we use become graph nodes and edges. +These layer abstractions, activation functions, and predefined architectures the frameworks provide constitute a computational graph. When a user defines a layer in a framework (e.g., `tf.keras.layers.Dense()`), the framework configures computational graph nodes and edges to represent that layer. The layer parameters like weights and biases become variables in the graph. The layer computations become operation nodes (such as the x and y in the figure above). When you call an activation function like `tf.nn.relu()`, the framework adds a ReLU operation node to the graph. Predefined architectures are just pre-configured subgraphs that can be inserted into your model's graph. Thus, model definition via high-level abstractions creates a computational graph—the layers, activations, and architectures we use become graph nodes and edges. We implicitly construct a computational graph when defining a neural network architecture in a framework. The framework uses this graph to determine operations to run during training and inference. Computational graphs bring several advantages over raw code, and that's one of the core functionalities that is offered by a good ML framework: @@ -271,7 +357,11 @@ x = tf.placeholder(tf.float32) y = tf.matmul(x, weights) + biases ``` -The model is defined separately from execution, like building a blueprint. For TensorFlow 1. x, this is done using tf.Graph(). All ops and variables must be declared upfront. Subsequently, the graph is compiled and optimized before running. Execution is done later by feeding in tensor values. +In this example, x is a placeholder for input data, and y is the result of a matrix multiplication operation followed by an addition. The model is defined in this declaration phase, where all operations and variables must be specified upfront. + +Once the entire graph is defined, the framework compiles and optimizes it. This means that the computational steps are set in stone, and the framework can apply various optimizations to improve efficiency and performance. When you later execute the graph, you provide the actual input tensors, and the pre-defined operations are carried out in the optimized sequence. + +This approach is similar to building a blueprint where every detail is planned before construction begins. While this allows for powerful optimizations, it also means that any changes to the model require redefining the entire graph from scratch. **Dynamic graphs (define-by-run):** Unlike declaring (all) first and then executing, the graph is built dynamically as execution happens. There is no separate declaration phase - operations execute immediately as defined. This style is imperative and flexible, facilitating experimentation. @@ -284,9 +374,7 @@ y = torch.matmul(x, weights) + biases The above example does not have separate compile/build/run phases. Ops define and execute immediately. With dynamic graphs, the definition is intertwined with execution, providing a more intuitive, interactive workflow. However, the downside is that there is less potential for optimization since the framework only sees the graph as it is built. -Recently, however, the distinction has blurred as frameworks adopt both modes. TensorFlow 2.0 defaults to dynamic graph mode while letting users work with static graphs when needed. Dynamic declaration makes frameworks easier to use, while static models provide optimization benefits. The ideal framework offers both options. - -Static graph declaration provides optimization opportunities but less interactivity. While dynamic execution offers flexibility and ease of use, it may have performance overhead. Here is a table comparing the pros and cons of static vs dynamic execution graphs: +Recently, the distinction has blurred as frameworks adopt both modes. TensorFlow 2.0 defaults to dynamic graph mode while letting users work with static graphs when needed. Dynamic declaration offers flexibility and ease of use, making frameworks more user-friendly, while static graphs provide optimization benefits at the cost of interactivity. The ideal framework balances these approaches. Here is a table comparing the pros and cons of static vs dynamic execution graphs: | Execution Graph | Pros | Cons | | --- | --- | --- | @@ -299,7 +387,8 @@ Computational graphs can only be as good as the data they learn from and work on #### Data Loaders -These pipelines' cores are data loaders, which handle reading examples from storage formats like CSV files or image folders. Reading training examples from sources like files, databases, object storage, etc., is the job of the data loaders. Deep learning models require diverse data formats depending on the application. Among the popular formats is + +At the core of these pipelines are data loaders, which handle reading training examples from sources like files, databases, and object storage. Data loaders facilitate efficient data loading and preprocessing, crucial for deep learning models. For instance, TensorFlow's [tf.data](https://www.tensorflow.org/guide/data) dataloading pipeline is designed to manage this process. Depending on the application, deep learning models require diverse data formats such as CSV files or image folders. Some popular formats include: * CSV, a versatile, simple format often used for tabular data. @@ -311,8 +400,6 @@ These pipelines' cores are data loaders, which handle reading examples from stor * WAV/MP3: Prevalent formats for audio data. -For instance, `tf.data` is TensorFlows's dataloading pipeline: . - Data loaders batch examples to leverage vectorization support in hardware. Batching refers to grouping multiple data points for simultaneous processing, leveraging the vectorized computation capabilities of hardware like GPUs. While typical batch sizes range from 32 to 512 examples, the optimal size often depends on the data's memory footprint and the specific hardware constraints. Advanced loaders can stream virtually unlimited datasets from disk and cloud storage. They stream large datasets from disks or networks instead of fully loading them into memory, enabling unlimited dataset sizes. Data loaders can also shuffle data across epochs for randomization and preprocess features in parallel with model training to expedite the training process. Randomly shuffling the order of examples between training epochs reduces bias and improves generalization. @@ -321,33 +408,19 @@ Data loaders also support caching and prefetching strategies to optimize data de ### Data Augmentation -Besides loading, data augmentation expands datasets synthetically. Augmentations apply random transformations for images like flipping, cropping, rotating, altering color, adding noise, etc. For audio, common augmentations involve mixing clips with background noise or modulating speed/pitch/volume. - -Augmentations increase variation in the training data. Frameworks like TensorFlow and PyTorch simplify applying random augmentations each epoch by integrating them into the data pipeline. By programmatically increasing variation in the training data distribution, augmentations reduce Overfitting and improve model generalization. - -Many frameworks simplify integrating augmentations into the data pipeline, applying them on the fly each epoch. Together, performant data loaders and extensive augmentations enable practitioners to feed massive, varied datasets to neural networks efficiently. Hands-off data pipelines represent a significant improvement in usability and productivity. They allow developers to focus more on model architecture and less on data wrangling when training deep learning models. - -### Optimization Algorithms +Machine learning frameworks like TensorFlow and PyTorch provide tools to simplify and streamline the process of data augmentation, enhancing the efficiency of expanding datasets synthetically. These frameworks offer integrated functionalities to apply random transformations, such as flipping, cropping, rotating, altering color, and adding noise for images. For audio data, common augmentations involve mixing clips with background noise or modulating speed, pitch, and volume. -Training a neural network is fundamentally an iterative process that seeks to minimize a loss function. The goal is to fine-tune the model weights and parameters to produce predictions close to the true target labels. Machine learning frameworks have greatly streamlined this process by offering extensive support in three critical areas: loss functions, optimization algorithms, and regularization techniques. +By integrating augmentation tools into the data pipeline, frameworks enable these transformations to be applied on the fly during each training epoch. This approach increases the variation in the training data distribution, thereby reducing overfitting and improving model generalization. The use of performant data loaders in combination with extensive augmentation capabilities allows practitioners to efficiently feed massive, varied datasets to neural networks. -Loss Functions are useful to quantify the difference between the model's predictions and the true values. Different datasets require a different loss function to perform properly, as the loss function tells the computer the "objective" for it to aim. Commonly used loss functions are Mean Squared Error (MSE) for regression tasks and Cross-Entropy Loss for classification tasks. +These hands-off data pipelines represent a significant improvement in usability and productivity. They allow developers to focus more on model architecture and less on data wrangling when training deep learning models. -To demonstrate some of the loss functions, imagine you have a set of inputs and the corresponding outputs, $Y_n$, that denote the output of $n$'th value. The inputs are fed into the model, and the model outputs a prediction, which we can call $\hat{Y_n}$. With the predicted value and the real value, we can, for example, use the MSE to calculate the loss function: +### Loss Functions and Optimization Algorithms -$$MSE = \frac{1}{N}\sum_{n=1}^{N}(Y_n - \hat{Y_n})^2$$ +Training a neural network is fundamentally an iterative process that seeks to minimize a loss function. The goal is to fine-tune the model weights and parameters to produce predictions close to the true target labels. Machine learning frameworks have greatly streamlined this process by offering loss functions and optimization algorithms. -If the problem is a classification problem, we do not want to use the MSE since the distance between the predicted value and the real value does not have significant meaning. For example, if one wants to recognize handwritten models, while 9 is further away from 2, it does not mean that the model is wrong in making the prediction. Therefore, we use the cross-entropy loss function, which is defined as: +Machine learning frameworks provide implemented loss functions that are needed for quantifying the difference between the model's predictions and the true values. Different datasets require a different loss function to perform properly, as the loss function tells the computer the "objective" for it to aim. Commonly used loss functions include Mean Squared Error (MSE) for regression tasks, Cross-Entropy Loss for classification tasks, and Kullback-Leibler (KL) Divergence for probabilistic models. For instance, TensorFlow's [tf.keras.losses](https://www.tensorflow.org/api_docs/python/tf/keras/losses) holds a suite of these commonly used loss functions. -$$Cross-Entropy = -\sum_{n=1}^{N}Y_n\log(\hat{Y_n})$$ - -Once a loss like the above is computed, we need methods to adjust the model's parameters to reduce this loss or error during the training process. To do so, current frameworks use a gradient-based approach, which computes how much changes tuning the weights in a certain way changes the value of the loss function. Knowing this gradient, the model moves in the direction that reduces the gradient. Many challenges are associated with this, primarily stemming from the fact that the optimization problem could not be more, making it very easy to solve. More details about this will come in the AI Training section. Modern frameworks come equipped with efficient implementations of several optimization algorithms, many of which are variants of gradient descent algorithms with stochastic methods and adaptive learning rates. More information with clear examples can be found in the AI Training section. - -Lastly, overly complex models tend to overfit, meaning they perform well on the training data but must generalize to new, unseen data (see Overfitting). To counteract this, regularization methods are employed to penalize model complexity and encourage it to learn simpler patterns. Dropout randomly sets a fraction of input units to 0 at each update during training, which helps prevent Overfitting. - -However, there are cases where the problem is more complex than the model can represent, which may result in underfitting. Therefore, choosing the right model architecture is also a critical step in the training process. Further heuristics and techniques are discussed in the AI Training section. - -Frameworks also efficiently implement gradient descent, Adagrad, Adadelta, and Adam. Adding regularization, such as dropout and L1/L2 penalties, prevents Overfitting during training. Batch normalization accelerates training by normalizing inputs to layers. +Optimization algorithms are used to efficiently find the set of model parameters that minimize the loss function, ensuring the model performs well on training data and generalizes to new data. Modern frameworks come equipped with efficient implementations of several optimization algorithms, many of which are variants of gradient descent with stochastic methods and adaptive learning rates. Some examples of these variants are Stochastic Gradient Descent, Adagrad, Adadelta, and Adam. The implementation of such variants are provided in [tf.keras.optimizers](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers). More information with clear examples can be found in the AI Training section. ### Model Training Support @@ -417,12 +490,14 @@ On the other hand, [Tensor Processing Units](https://cloud.google.com/tpu/docs/i While TPUs can drastically reduce training times, they also have disadvantages. For example, many operations within the machine learning frameworks (primarily TensorFlow here since the TPU directly integrates with it) are not supported by TPUs. They cannot also support custom operations from the machine learning frameworks, and the network design must closely align with the hardware capabilities. -Today, NVIDIA GPUs dominate training, aided by software libraries like [CUDA](https://developer.nvidia.com/cuda-toolkit), [cuDNN](https://developer.nvidia.com/cudnn), and [TensorRT.](https://developer.nvidia.com/tensorrt#:~:text=NVIDIA) TensorRT-LLM is an,knowledge of C++ or CUDA.) Frameworks also include optimizations to maximize performance on these hardware types, like pruning unimportant connections and fusing layers. Combining these techniques with hardware acceleration provides greater efficiency. For inference, hardware is increasingly moving towards optimized ASICs and SoCs. Google's TPUs accelerate models in data centers. Apple, Qualcomm, and others now produce AI-focused mobile chips. The NVIDIA Jetson family targets autonomous robots. +Today, NVIDIA GPUs dominate training, aided by software libraries like [CUDA](https://developer.nvidia.com/cuda-toolkit), [cuDNN](https://developer.nvidia.com/cudnn), and [TensorRT.](https://developer.nvidia.com/tensorrt#:~:text=NVIDIA) Frameworks also include optimizations to maximize performance on these hardware types, such as pruning unimportant connections and fusing layers. Combining these techniques with hardware acceleration provides greater efficiency. For inference, hardware is increasingly moving towards optimized ASICs and SoCs. Google's TPUs accelerate models in data centers, while Apple, Qualcomm, the NVIDIA Jetson family, and others now produce AI-focused mobile chips. -![Companies offering ML hardware accelerators. Credit: [Gradient Flow.](https://gradientflow.com/one-simple-chart-companies-that-offer-deep-neural-network-accelerators/)](images/png/hardware_accelerator.png){#fig-hardware-accelerator} +![Companies offering ML hardware accelerators. Source: [Gradient Flow.](https://gradientflow.com/one-simple-chart-companies-that-offer-deep-neural-network-accelerators/)](images/png/hardware_accelerator.png){#fig-hardware-accelerator} ## Advanced Features {#sec-ai_frameworks-advanced} +Beyond providing the essential tools for training machine learning models, frameworks also offer advanced features. These features include distributing training across different hardware platforms, fine-tuning large pre-trained models with ease, and facilitating federated learning. Implementing these capabilities independently would be highly complex and resource-intensive, but frameworks simplify these processes, making advanced machine learning techniques more accessible. + ### Distributed training As machine learning models have become larger over the years, it has become essential for large models to utilize multiple computing nodes in the training process. This process, distributed learning, has allowed for higher training capabilities but has also imposed challenges in implementation. @@ -443,7 +518,7 @@ More information about model conversion in TensorFlow is linked [here](https://w ### AutoML, No-Code/Low-Code ML -In many cases, machine learning can have a relatively high barrier of entry compared to other fields. To successfully train and deploy models, one needs to have a critical understanding of a variety of disciplines, from data science (data processing, data cleaning), model structures (hyperparameter tuning, neural network architecture), hardware (acceleration, parallel processing), and more depending on the problem at hand. The complexity of these problems has led to the introduction of frameworks such as AutoML, which aims to make "Machine learning available for non-Machine Learning exports" and to "automate research in machine learning." They have constructed AutoWEKA, which aids in the complex process of hyperparameter selection, and Auto-sklearn and Auto-pytorch, an extension of AutoWEKA into the popular sklearn and PyTorch Libraries. +In many cases, machine learning can have a relatively high barrier of entry compared to other fields. To successfully train and deploy models, one needs to have a critical understanding of a variety of disciplines, from data science (data processing, data cleaning), model structures (hyperparameter tuning, neural network architecture), hardware (acceleration, parallel processing), and more depending on the problem at hand. The complexity of these problems has led to the introduction of frameworks such as AutoML, which aims to make "Machine learning available for non-Machine Learning experts" and to "automate research in machine learning." They have constructed AutoWEKA, which aids in the complex process of hyperparameter selection, and Auto-sklearn and Auto-pytorch, an extension of AutoWEKA into the popular sklearn and PyTorch Libraries. While these efforts to automate parts of machine learning tasks are underway, others have focused on making machine learning models easier by deploying no-code/low-code machine learning, utilizing a drag-and-drop interface with an easy-to-navigate user interface. Companies such as Apple, Google, and Amazon have already created these easy-to-use platforms to allow users to construct machine learning models that can integrate into their ecosystem. @@ -453,25 +528,21 @@ These steps to remove barriers to entry continue to democratize machine learning #### Transfer Learning -Transfer learning is the practice of using knowledge gained from a pre-trained model to train and improve the performance of a model for a different task. For example, datasets trained on ImageNet datasets such as MobileNet and ResNet can help classify other image datasets. To do so, one may freeze the pre-trained model, utilizing it as a feature extractor to train a much smaller model built on top of the feature extraction. One can also fine-tune the entire model to fit the new task. +Transfer learning is the practice of using knowledge gained from a pre-trained model to train and improve the performance of a model for a different task. For example, models such as MobileNet and ResNet are trained on the ImageNet dataset. To do so, one may freeze the pre-trained model, utilizing it as a feature extractor to train a much smaller model built on top of the feature extraction. One can also fine-tune the entire model to fit the new task. Machine learning frameworks make it easy to load pre-trained models, freeze specific layers, and train custom layers on top. They simplify this process by providing intuitive APIs and easy access to large repositories of [pre-trained models](https://keras.io/api/applications/). -Transfer learning has challenges, such as the modified model's inability to conduct its original tasks after transfer learning. Papers such as ["Learning without Forgetting"](https://browse.arxiv.org/pdf/1606.09282.pdf) by @li2017learning aims to address these challenges and have been implemented in modern machine learning platforms. +Transfer learning has challenges, such as the modified model's inability to conduct its original tasks after transfer learning. Papers such as ["Learning without Forgetting"](https://browse.arxiv.org/pdf/1606.09282.pdf) by @li2017learning aims to address these challenges and have been implemented in modern machine learning platforms. #### Federated Learning -Consider the problem of labeling items in a photo from personal devices and moving the image data from the devices to a central server, where a single model will train Using the image data provided by the devices. However, this presents many potential challenges. First, with many devices, one needs a massive network infrastructure to move and store data from these devices to a central location. With the number of devices present today, this is often not feasible and very costly. Furthermore, privacy challenges like those of Photos central servers are associated with moving personal data. - -Federated learning by @mcmahan2023communicationefficient is a form of distributed computing that resolves these issues by distributing the models into personal devices for them to be trained on devices (@fig-federated-learning). Initially, a base global model is trained on a central server to be distributed to all devices. Using this base model, the devices individually compute the gradients and send them back to the central hub. Intuitively, this transfers model parameters instead of the data itself. This innovative approach allows the model to be trained with many different datasets (in our example, the set of images on personal devices) without transferring a large amount of potentially sensitive data. However, federated learning also comes with a series of challenges. +Federated learning by @mcmahan2023communicationefficient is a form of distributed computing that involves training models on personal devices rather than centralizing the data on a single server (@fig-federated-learning). Initially, a base global model is trained on a central server to be distributed to all devices. Using this base model, the devices individually compute the gradients and send them back to the central hub. Intuitively, this transfers model parameters instead of the data itself. Federated learning enhances privacy by keeping sensitive data on local devices and only sharing model updates with a central server. This method is particularly useful when dealing with sensitive data or when a large-scale infrastructure is impractical. -Data collected from devices may come with something other than suitable labels in many real-world situations. Users compound this issue; the primary data source can often be unreliable. This unreliability means that even when data is labeled, its accuracy or relevance is not guaranteed. Furthermore, each user's data is unique, resulting in a significant variance in the data generated by different users. This non-IID nature of data, coupled with the unbalanced data production where some users generate more data than others, can adversely impact the performance of the global model. Researchers have worked to compensate for this by adding a proximal term to balance the local and global model and adding a frozen [global hypersphere classifier](https://arxiv.org/abs/2207.09413). +![A centralized-server approach to federated learning. Source: [NVIDIA.](https://blogs.nvidia.com/blog/what-is-federated-learning/)](images/png/federated_learning.png){#fig-federated-learning} -Additional challenges are associated with federated learning. The number of mobile device owners can far exceed the average number of training samples on each device, leading to substantial communication overhead. This issue is particularly pronounced in the context of mobile networks, which are often used for such communication and can be unstable. This instability can result in delayed or failed transmission of model updates, thereby affecting the overall training process. +However, federated learning faces challenges such as ensuring data accuracy, managing non-IID (independent and identically distributed) data, dealing with unbalanced data production, and overcoming communication overhead and device heterogeneity. Privacy and security concerns, such as gradient inversion attacks, also pose significant challenges. -The heterogeneity of device resources is another hurdle. Devices participating in Federated Learning can have varying computational powers and memory capacities. This diversity makes it challenging to design efficient algorithms across all devices. Privacy and security issues are not a guarantee for federated learning. Techniques such as inversion gradient attacks can extract information about the training data from the model parameters. Despite these challenges, the many potential benefits continue to make it a popular research area. Open source programs such as [Flower](https://flower.dev/) have been developed to simplify implementing federated learning with various machine learning frameworks. +Machine learning frameworks simplify the implementation of federated learning by providing necessary tools and libraries. For example, [TensorFlow Federated (TFF)](https://www.tensorflow.org/federated) offers an open-source framework to support federated learning. TFF allows developers to simulate and implement federated learning algorithms, offering a federated core for low-level operations and high-level APIs for common federated tasks. It seamlessly integrates with TensorFlow, enabling the use of TensorFlow models and optimizers in a federated setting. TFF supports secure aggregation techniques to enhance privacy and allows for customization of federated learning algorithms. By leveraging these tools, developers can efficiently distribute training, fine-tune pre-trained models, and handle federated learning's inherent complexities. -@fig-federated-learning illustrates an example of federated learning. Consider a model used for medical predictions by diffrent hospitals. Given that medical data is extremely sensitive and must be kept private, it can't be transferred to a centralized server for training. Instead, each hospital would fine-tune/train the base model using its own private data, while only communicating non-sensitive information with the Federated Server, such as the learned parameters. - -![A centralized-server approach to federated learning. Credit: [NVIDIA.](https://blogs.nvidia.com/blog/what-is-federated-learning/)](images/png/federated_learning.png){#fig-federated-learning} +Other open source programs such as [Flower](https://flower.dev/) have also been developed to simplify implementing federated learning with various machine learning frameworks. ## Framework Specialization @@ -489,13 +560,17 @@ Edge AI frameworks are tailored to deploy AI models on IoT devices, smartphones, TinyML frameworks are specialized for deploying AI models on extremely resource-constrained devices, specifically microcontrollers and sensors within the IoT ecosystem. TinyML frameworks are designed for devices with limited resources, emphasizing minimal memory and power consumption. TinyML frameworks are specialized for use cases on resource-constrained IoT devices for predictive maintenance, gesture recognition, and environmental monitoring applications. Major TinyML frameworks include TensorFlow Lite Micro, uTensor, and ARM NN. They optimize complex models to fit within kilobytes of memory through techniques like quantization-aware training and reduced precision. TinyML allows intelligent sensing across battery-powered devices, enabling collaborative learning via federated learning. The choice of framework involves balancing model performance and computational constraints of the target platform, whether cloud, edge, or TinyML. @tbl-ml_frameworks compares the major AI frameworks across cloud, edge, and TinyML environments: -| Framework Type | Examples | Key Technologies | Use Cases | -|----------------|-----------------------------------|-------------------------------------------------------------------------|------------------------------------------------------| -| Cloud AI | TensorFlow, PyTorch, MXNet, Keras | GPUs, TPUs, distributed training, AutoML, MLOps | Cloud services, web apps, big data analytics | -| Edge AI | TensorFlow Lite, PyTorch Mobile, Core ML | Model optimization, compression, quantization, efficient NN architectures | Mobile apps, robots, autonomous systems, real-time processing | -| TinyML | TensorFlow Lite Micro, uTensor, ARM NN | Quantization-aware training, reduced precision, neural architecture search | IoT sensors, wearables, predictive maintenance, gesture recognition | ++----------------+-------------------------------------------+--------------------------------------------------------------------------------+---------------------------------------------------------------------+ +| Framework Type | Examples | Key Technologies | Use Cases | ++:===============+:==========================================+:===============================================================================+:====================================================================+ +| Cloud AI | TensorFlow, PyTorch, MXNet, Keras | GPUs, TPUs, distributed training, AutoML, MLOps | Cloud services, web apps, big data analytics | ++----------------+-------------------------------------------+--------------------------------------------------------------------------------+---------------------------------------------------------------------+ +| Edge AI | TensorFlow Lite, PyTorch Mobile, Core ML | Model optimization, compression, quantization, efficient NN architectures | Mobile apps, autonomous systems, real-time processing | ++----------------+-------------------------------------------+--------------------------------------------------------------------------------+---------------------------------------------------------------------+ +| TinyML | TensorFlow Lite Micro, uTensor, ARM NN | Quantization-aware training, reduced precision, neural architecture search | IoT sensors, wearables, predictive maintenance, gesture recognition | ++----------------+-------------------------------------------+--------------------------------------------------------------------------------+---------------------------------------------------------------------+ -: Comparison of framework types for Cloud AI, Edge AI, and TinyML. {#tbl-ml_frameworks} +: Comparison of framework types for Cloud AI, Edge AI, and TinyML. {#tbl-ml_frameworks .striped .hover} **Key differences:** @@ -577,19 +652,35 @@ Machine learning deployment on microcontrollers and other embedded devices often @tbl-compare_frameworks summarizes the key differences and similarities between these three specialized machine-learning inference frameworks for embedded systems and microcontrollers. -| Framework | TensorFlow Lite Micro | TinyEngine | CMSIS-NN | -|------------------------|:----------------------------:|:--------------------------------------:|:--------------------------------------:| -| **Approach** | Interpreter-based | Static compilation | Optimized neural network kernels | -| **Hardware Focus** | General embedded devices | Microcontrollers | ARM Cortex-M processors | -| **Arithmetic Support** | Floating point | Floating point, fixed point | Floating point, fixed point | -| **Model Support** | General neural network models| Models co-designed with TinyNAS | Common neural network layer types | -| **Code Footprint** | Larger due to inclusion of interpreter and ops | Small, includes only ops needed for model | Lightweight by design | -| **Latency** | Higher due to interpretation overhead | Very low due to compiled model | Low latency focus | -| **Memory Management** | Dynamically managed by interpreter | Model-level optimization | Tools for efficient allocation | -| **Optimization Approach** | Some code generation features | Specialized kernels, operator fusion | Architecture-specific assembly optimizations | -| **Key Benefits** | Flexibility, portability, ease of updating models | Maximizes performance, optimized memory usage | Hardware acceleration, standardized API, portability | - -: Comparison of frameworks: TensorFlow Lite Micro, TinyEngine, and CMSIS-NN {#tbl-compare_frameworks} ++------------------------+----------------------------+------------------------------------------+------------------------------------------+ +| Framework | TensorFlow Lite Micro | TinyEngine | CMSIS-NN | ++:=======================+:===========================+:=========================================+:=========================================+ +| Approach | Interpreter-based | Static compilation | Optimized neural network kernels | ++------------------------+----------------------------+------------------------------------------+------------------------------------------+ +| Hardware Focus | General embedded devices | Microcontrollers | ARM Cortex-M processors | ++------------------------+----------------------------+------------------------------------------+------------------------------------------+ +| Arithmetic Support | Floating point | Floating point, fixed point | Floating point, fixed point | ++------------------------+----------------------------+------------------------------------------+------------------------------------------+ +| Model Support | General neural network | Models co-designed with TinyNAS | Common neural network layer types | +| | models | | | ++------------------------+----------------------------+------------------------------------------+------------------------------------------+ +| Code Footprint | Larger due to inclusion | Small, includes only ops needed for | Lightweight by design | +| | of interpreter and ops | model | | ++------------------------+----------------------------+------------------------------------------+------------------------------------------+ +| Latency | Higher due to | Very low due to compiled model | Low latency focus | +| | interpretation overhead | | | ++------------------------+----------------------------+------------------------------------------+------------------------------------------+ +| Memory Management | Dynamically managed by | Model-level optimization | Tools for efficient allocation | +| | interpreter | | | ++------------------------+----------------------------+------------------------------------------+------------------------------------------+ +| Optimization Approach | Some code generation | Specialized kernels, operator fusion | Architecture-specific assembly | +| | features | | optimizations | ++------------------------+----------------------------+------------------------------------------+------------------------------------------+ +| Key Benefits | Flexibility, portability, | Maximizes performance, optimized | Hardware acceleration, standardized | +| | ease of updating models | memory usage | API, portability | ++------------------------+----------------------------+------------------------------------------+------------------------------------------+ + +: Comparison of frameworks: TensorFlow Lite Micro, TinyEngine, and CMSIS-NN {#tbl-compare_frameworks .striped .hover} We will understand each of these in greater detail in the following sections. @@ -625,7 +716,7 @@ Conventional ML frameworks schedule memory per layer, trying to minimize usage f TinyEngine also specializes in the kernels for each layer through techniques like tiling, unrolling, and fusing operators. For example, it will generate unrolled compute kernels with the number of loops needed for a 3x3 or 5x5 convolution. These specialized kernels extract maximum performance from the microcontroller hardware. It uses optimized depthwise convolutions to minimize memory allocations by computing each channel's output in place over the input channel data. This technique exploits the channel-separable nature of depthwise convolutions to reduce peak memory size. -Like TFLite Micro, the compiled TinyEngine binary only includes ops needed for a specific model rather than all possible operations. This results in a very small binary footprint, keeping code size low for memory-constrained devices. +Like TFLite Micro, the compiled TinyEngine binary only includes operations needed for a specific model rather than all possible operations. This results in a very small binary footprint, keeping code size low for memory-constrained devices. One difference between TFLite Micro and TinyEngine is that the latter is co-designed with "TinyNAS," an architecture search method for microcontroller models similar to differential NAS for microcontrollers. TinyEngine's efficiency allows for exploring larger and more accurate models through NAS. It also provides feedback to TinyNAS on which models can fit within the hardware constraints. @@ -653,29 +744,27 @@ Through various custom techniques, such as static compilation, model-based sched Choosing the right machine learning framework for a given application requires carefully evaluating models, hardware, and software considerations. By analyzing these three aspects—models, hardware, and software—ML engineers can select the optimal framework and customize it as needed for efficient and performant on-device ML applications. The goal is to balance model complexity, hardware limitations, and software integration to design a tailored ML pipeline for embedded and edge devices. -![TensorFlow Framework Comparison - General. Credit: TensorFlow.](images/png/image4.png){#fig-tf-comparison width="100%" height="auto" align="center" caption="TensorFlow Framework Comparison - General"} +![TensorFlow Framework Comparison - General. Source: TensorFlow.](images/png/image4.png){#fig-tf-comparison width="100%" height="auto" align="center" caption="TensorFlow Framework Comparison - General"} ### Model -TensorFlow supports significantly more ops than TensorFlow Lite and TensorFlow Lite Micro as it is typically used for research or cloud deployment, which require a large number of and more flexibility with operators (see @fig-tf-comparison). TensorFlow Lite supports select ops for on-device training, whereas TensorFlow Micro does not. TensorFlow Lite also supports dynamic shapes and quantization-aware training, but TensorFlow Micro does not. In contrast, TensorFlow Lite and TensorFlow Micro offer native quantization tooling and support, where quantization refers to transforming an ML program into an approximated representation with available lower precision operations. +TensorFlow supports significantly more operations (ops) than TensorFlow Lite and TensorFlow Lite Micro as it is typically used for research or cloud deployment, which require a large number of and more flexibility with operators (see @fig-tf-comparison). TensorFlow Lite supports select ops for on-device training, whereas TensorFlow Micro does not. TensorFlow Lite also supports dynamic shapes and quantization-aware training, but TensorFlow Micro does not. In contrast, TensorFlow Lite and TensorFlow Micro offer native quantization tooling and support, where quantization refers to transforming an ML program into an approximated representation with available lower precision operations. ### Software -![TensorFlow Framework Comparison - Software. Credit: TensorFlow.](images/png/image5.png){#fig-tf-sw-comparison width="100%" height="auto" align="center" caption="TensorFlow Framework Comparison - Model"} +![TensorFlow Framework Comparison - Software. Source: TensorFlow.](images/png/image5.png){#fig-tf-sw-comparison width="100%" height="auto" align="center" caption="TensorFlow Framework Comparison - Model"} TensorFlow Lite Micro does not have OS support, while TensorFlow and TensorFlow Lite do, to reduce memory overhead, make startup times faster, and consume less energy (see @fig-tf-sw-comparison). TensorFlow Lite Micro can be used in conjunction with real-time operating systems (RTOS) like FreeRTOS, Zephyr, and Mbed OS. TensorFlow Lite and TensorFlow Lite Micro support model memory mapping, allowing models to be directly accessed from flash storage rather than loaded into RAM, whereas TensorFlow does not. TensorFlow and TensorFlow Lite support accelerator delegation to schedule code to different accelerators, whereas TensorFlow Lite Micro does not, as embedded systems tend to have a limited array of specialized accelerators. ### Hardware -![TensorFlow Framework Comparison - Hardware. Credit: TensorFlow.](images/png/image3.png){#fig-tf-hw-comparison width="100%" height="auto" align="center" caption="TensorFlow Framework Comparison - Hardware"} +![TensorFlow Framework Comparison - Hardware. Source: TensorFlow.](images/png/image3.png){#fig-tf-hw-comparison width="100%" height="auto" align="center" caption="TensorFlow Framework Comparison - Hardware"} TensorFlow Lite and TensorFlow Lite Micro have significantly smaller base binary sizes and memory footprints than TensorFlow (see @fig-tf-hw-comparison). For example, a typical TensorFlow Lite Micro binary is less than 200KB, whereas TensorFlow is much larger. This is due to the resource-constrained environments of embedded systems. TensorFlow supports x86, TPUs, and GPUs like NVIDIA, AMD, and Intel. TensorFlow Lite supports Arm Cortex-A and x86 processors commonly used on mobile phones and tablets. The latter is stripped of all the unnecessary training logic for on-device deployment. TensorFlow Lite Micro provides support for microcontroller-focused Arm Cortex M cores like M0, M3, M4, and M7, as well as DSPs like Hexagon and SHARC and MCUs like STM32, NXP Kinetis, Microchip AVR. -Selecting the appropriate AI framework is essential to ensure that embedded systems can efficiently execute AI models. Key factors to consider when choosing a machine learning framework are ease of use, community support, performance, scalability, integration with data engineering tools, and integration with model optimization tools. By understanding these factors, you can make informed decisions and maximize the potential of your machine-learning initiatives. - ### Other Factors -Several other key factors beyond models, hardware, and software should be considered when evaluating AI frameworks for embedded systems. +Selecting the appropriate AI framework is essential to ensure that embedded systems can efficiently execute AI models. Several key factors beyond models, hardware, and software should be considered when evaluating AI frameworks for embedded systems. Other key factors to consider when choosing a machine learning framework are performance, scalability, ease of use, integration with data engineering tools, integration with model optimization tools, and community support. By understanding these factors, you can make informed decisions and maximize the potential of your machine-learning initiatives. #### Performance @@ -707,7 +796,7 @@ Community support plays another essential factor. Frameworks with active and eng Currently, the ML system stack consists of four abstractions as shown in @fig-mlsys-stack, namely (1) computational graphs, (2) tensor programs, (3) libraries and runtimes, and (4) hardware primitives. -![Four abstractions in current ML system stacks. Credit: [TVM.](https://tvm.apache.org/2021/12/15/tvm-unity)](images/png/image8.png){#fig-mlsys-stack align="center" caption="Four Abstractions in Current ML System Stack"} +![Four abstractions in current ML system stacks. Source: [TVM.](https://tvm.apache.org/2021/12/15/tvm-unity)](images/png/image8.png){#fig-mlsys-stack align="center" caption="Four Abstractions in Current ML System Stack"} This has led to vertical (i.e., between abstraction levels) and horizontal (i.e., library-driven vs. compilation-driven approaches to tensor computation) boundaries, which hinder innovation for ML. Future work in ML frameworks can look toward breaking these boundaries. In December 2021, [Apache TVM](https://tvm.apache.org/2021/12/15/tvm-unity) Unity was proposed, which aimed to facilitate interactions between the different abstraction levels (as well as the people behind them, such as ML scientists, ML engineers, and hardware engineers) and co-optimize decisions in all four abstraction levels. @@ -719,23 +808,23 @@ As ML frameworks further develop, high-performance compilers and libraries will We can also use ML to improve ML frameworks in the future. Some current uses of ML for ML frameworks include: -* hyperparameter optimization using techniques such as Bayesian optimization, random search, and grid search +* Hyperparameter optimization using techniques such as Bayesian optimization, random search, and grid search -* neural architecture search (NAS) to automatically search for optimal network architectures +* Neural Architecture Search (NAS) to automatically search for optimal network architectures * AutoML, which as described in @sec-ai_frameworks-advanced, automates the ML pipeline. ## Conclusion -In summary, selecting the optimal framework requires thoroughly evaluating options against criteria like usability, community support, performance, hardware compatibility, and model conversion abilities. There is no universal best solution, as the right framework depends on the specific constraints and use case. +In summary, selecting the optimal machine learning framework requires a thorough evaluation of various options against criteria such as usability, community support, performance, hardware compatibility, and model conversion capabilities. There is no one-size-fits-all solution, as the right framework depends on specific constraints and use cases. -TensorFlow Lite Micro currently provides a strong starting point for extremely resource-constrained microcontroller-based platforms. Its comprehensive optimization tooling, such as quantization mapping and kernel optimizations, enables high performance on devices like Arm Cortex-M and RISC-V processors. The active developer community ensures accessible technical support. Seamless integration with TensorFlow for training and converting models makes the workflow cohesive. +We first introduced the necessity of machine learning frameworks like TensorFlow and PyTorch. These frameworks offer features such as tensors for handling multi-dimensional data, computational graphs for defining and optimizing model operations, and a suite of tools including loss functions, optimizers, and data loaders that streamline model development. -For platforms with more capable CPUs like Cortex-A, TensorFlow Lite for Microcontrollers expands possibilities. It provides greater flexibility for custom and advanced models beyond the core operators in TFLite Micro. However, this comes at the cost of a larger memory footprint. These frameworks are ideal for automotive systems, drones, and more powerful edge devices that can benefit from greater model sophistication. +Advanced features further enhance these frameworks' usability, enabling tasks like fine-tuning large pre-trained models and facilitating federated learning. These capabilities are critical for developing sophisticated machine learning models efficiently. -Frameworks specifically built for specialized hardware like CMSIS-NN on Cortex-M processors can further maximize performance but sacrifice portability. Integrated frameworks from processor vendors tailor the stack to their architectures, unlocking the full potential of their chips but locking you into their ecosystem. +Embedded AI frameworks, such as TensorFlow Lite Micro, provide specialized tools for deploying models on resource-constrained platforms. TensorFlow Lite Micro, for instance, offers comprehensive optimization tooling, including quantization mapping and kernel optimizations, to ensure high performance on microcontroller-based platforms like Arm Cortex-M and RISC-V processors. Frameworks specifically built for specialized hardware like CMSIS-NN on Cortex-M processors can further maximize performance but sacrifice portability. Integrated frameworks from processor vendors tailor the stack to their architectures, unlocking the full potential of their chips but locking you into their ecosystem. -Ultimately, choosing the right framework involves finding the best match between its capabilities and the requirements of the target platform. This requires balancing tradeoffs between performance needs, hardware constraints, model complexity, and other factors. Thoroughly assessing intended models and use cases and evaluating options against key metrics will guide developers in picking the ideal framework for their embedded ML application. +Ultimately, choosing the right framework involves finding the best match between its capabilities and the requirements of the target platform. This requires balancing trade-offs between performance needs, hardware constraints, model complexity, and other factors. Thoroughly assessing the intended models and use cases and evaluating options against key metrics will guide developers in selecting the ideal framework for their machine learning applications. ## Resources {#sec-ai-frameworks-resource} @@ -795,3 +884,6 @@ In addition to exercises, we offer a series of hands-on labs allowing students t * _Coming soon._ ::: + + + diff --git a/contents/frameworks/images/png/color_channels_of_image.png b/contents/frameworks/images/png/color_channels_of_image.png new file mode 100644 index 000000000..814523545 Binary files /dev/null and b/contents/frameworks/images/png/color_channels_of_image.png differ diff --git a/contents/hw_acceleration/hw_acceleration.qmd b/contents/hw_acceleration/hw_acceleration.qmd index 85e7f3e57..0ad2a8932 100644 --- a/contents/hw_acceleration/hw_acceleration.qmd +++ b/contents/hw_acceleration/hw_acceleration.qmd @@ -66,7 +66,7 @@ The concept of dark silicon emerged as a consequence of these constraints. "Dark This phenomenon meant that while chips had more transistors, not all could be operational simultaneously, limiting potential performance gains. This power crisis necessitated a shift to the accelerator era, with specialized hardware units tailored for specific tasks to maximize efficiency. The explosion in AI workloads further drove demand for customized accelerators. Enabling factors included new programming languages, software tools, and manufacturing advances. -![Microprocessor trends. Credit: [Karl Rupp](https://www.karlrupp.net/2018/02/42-years-of-microprocessor-trend-data/).](images/png/hwai_40yearsmicrotrenddata.png){#fig-moore-dennard} +![Microprocessor trends. Source: [Karl Rupp](https://www.karlrupp.net/2018/02/42-years-of-microprocessor-trend-data/).](images/png/hwai_40yearsmicrotrenddata.png){#fig-moore-dennard} Fundamentally, hardware accelerators are evaluated on performance, power, and silicon area (PPA)—the nature of the target application—whether memory-bound or compute-bound—heavily influences the design. For example, memory-bound workloads demand high bandwidth and low latency access, while compute-bound applications require maximal computational throughput. @@ -136,7 +136,7 @@ By structuring the analysis along this spectrum, we aim to illustrate the fundam The progression begins with the most specialized option, ASICs purpose-built for AI, to ground our understanding in the maximum possible optimizations before expanding to more generalizable architectures. This structured approach aims to elucidate the accelerator design space. -![Design tradeoffs. Credit: @rayis2014.](images/png/tradeoffs.png){#fig-design-tradeoffs} +![Design tradeoffs. Source: @rayis2014.](images/png/tradeoffs.png){#fig-design-tradeoffs} ### Application-Specific Integrated Circuits (ASICs) @@ -231,7 +231,7 @@ FPGAs provide several benefits over GPUs and ASICs for accelerating machine lear The key advantage of FPGAs is the ability to reconfigure the underlying fabric to implement custom architectures optimized for different models, unlike fixed-function ASICs. For example, quant trading firms use FPGAs to accelerate their algorithms because they change frequently, and the low NRE cost of FPGAs is more viable than tapping out new ASICs. @fig-different-fpgas contains a table comparing three different FPGAs. -![Comparison of FPGAs. Credit: @gwennap_certus-nx_nodate.](images/png/fpga.png){#fig-different-fpgas} +![Comparison of FPGAs. Source: @gwennap_certus-nx_nodate.](images/png/fpga.png){#fig-different-fpgas} FPGAs comprise basic building blocks - configurable logic blocks, RAM blocks, and interconnects. Vendors provide a base amount of these resources, and engineers program the chips by compiling HDL code into bitstreams that rearrange the fabric into different configurations. This makes FPGAs adaptable as algorithms evolve. @@ -760,7 +760,7 @@ Wafer-scale AI takes an extremely integrated approach, manufacturing an entire s The wafer-scale approach also diverges from more modular system-on-chip designs that still have discrete components communicating by bus. Instead, wafer-scale AI enables full customization and tight integration of computation, memory, and interconnects across the entire die. -![Wafer-scale vs. GPU. Credit: [Cerebras](https://www.cerebras.net/product-chip/).](images/png/aimage1.png){#fig-wafer-scale} +![Wafer-scale vs. GPU. Source: [Cerebras](https://www.cerebras.net/product-chip/).](images/png/aimage1.png){#fig-wafer-scale} By designing the wafer as one integrated logic unit, data transfer between elements is minimized. This provides lower latency and power consumption than discrete system-on-chip or chiplet designs. While chiplets can offer flexibility by mixing and matching components, communication between chiplets is challenging. The monolithic nature of wafer-scale integration eliminates these inter-chip communication bottlenecks. @@ -772,7 +772,7 @@ However, the ultra-large-scale also poses difficulties for manufacturability and # Wafer-scale AI Chips -{{< video >}} +{{< video https://www.youtube.com/watch?v=Fcob512SJz0 >}} ::: @@ -782,7 +782,7 @@ Chiplet design refers to a semiconductor architecture in which a single integrat Chiplets are interconnected using advanced packaging techniques like high-density substrate interposers, 2.5D/3D stacking, and wafer-level packaging. This allows combining chiplets fabricated with different process nodes, specialized memories, and various optimized AI engines. -![Chiplet partitioning. Credit: @vivet2021intact.](images/png/aimage2.png){#fig-chiplet} +![Chiplet partitioning. Source: @vivet2021intact.](images/png/aimage2.png){#fig-chiplet} Some key advantages of using chiplets for AI include: @@ -803,13 +803,13 @@ The key objective of chiplets is finding the right balance between modular flexi Neuromorphic computing is an emerging field aiming to emulate the efficiency and robustness of biological neural systems for machine learning applications. A key difference from classical Von Neumann architectures is the merging of memory and processing in the same circuit [@schuman2022opportunities; @markovic2020physics; @furber2016large], as illustrated in @fig-neuromorphic. The structure of the brain inspires this integrated approach. A key advantage is the potential for orders of magnitude improvement in energy-efficient computation compared to conventional AI hardware. For example, estimates project 100x-1000x gains in energy efficiency versus current GPU-based systems for equivalent workloads. -![Comparison of the von Neumann architecture with the neuromorphic architecture. Credit: @schuman2022opportunities.](images/png/aimage3.png){#fig-neuromorphic} +![Comparison of the von Neumann architecture with the neuromorphic architecture. Source: @schuman2022opportunities.](images/png/aimage3.png){#fig-neuromorphic} Intel and IBM are leading commercial efforts in neuromorphic hardware. Intel's Loihi and Loihi 2 chips [@davies2018loihi; @davies2021advancing] offer programmable neuromorphic cores with on-chip learning. IBM's Northpole [@modha2023neural] device comprises over 100 million magnetic tunnel junction synapses and 68 billion transistors. These specialized chips deliver benefits like low power consumption for edge inference. Spiking neural networks (SNNs) [@maass1997networks] are computational models for neuromorphic hardware. Unlike deep neural networks communicating via continuous values, SNNs use discrete spikes that are more akin to biological neurons. This allows efficient event-based computation rather than constant processing. Additionally, SNNs consider the temporal and spatial characteristics of input data. This better mimics biological neural networks, where the timing of neuronal spikes plays an important role. However, training SNNs remains challenging due to the added temporal complexity. @fig-spiking provides an overview of the spiking methodology: (a) Diagram of a neuron; (b) Measuring an action potential propagated along the axon of a neuron. Only the action potential is detectable along the axon; (c) The neuron's spike is approximated with a binary representation; (d) Event-Driven Processing; (e) Active Pixel Sensor and Dynamic Vision Sensor. -![Neuromoprhic spiking. Credit: @eshraghian2023training.](images/png/aimage4.png){#fig-spiking} +![Neuromoprhic spiking. Source: @eshraghian2023training.](images/png/aimage4.png){#fig-spiking} You can also watch @vid-snn linked below for a more detailed explanation. @@ -817,7 +817,7 @@ You can also watch @vid-snn linked below for a more detailed explanation. # Neuromorphic Computing -{{< video >}} +{{< video https://www.youtube.com/watch?v=yihk_8XnCzg >}} ::: @@ -849,7 +849,7 @@ Flexible electronics refer to electronic circuits and devices fabricated on flex Flexible AI hardware can conform to curvy surfaces and operate efficiently with microwatt power budgets. Flexibility also enables rollable or foldable form factors to minimize device footprint and weight, ideal for small, portable smart devices and wearables incorporating TinyML. Another key advantage of flexible electronics compared to conventional technologies is lower manufacturing costs and simpler fabrication processes, which could democratize access to these technologies. While silicon masks and fabrication costs typically cost millions of dollars, flexible hardware typically costs only tens of cents to manufacture [@huang2010pseudo; @biggs2021natively]. The potential to fabricate flexible electronics directly onto plastic films using high-throughput printing and coating processes can reduce costs and improve manufacturability at scale versus rigid AI chips [@musk2019integrated]. -![Flexible device prototype. Credit: Jabil Circuit.](images/jpg/flexible-circuit.jpeg){#fig-flexible-device} +![Flexible device prototype. Source: Jabil Circuit.](images/jpg/flexible-circuit.jpeg){#fig-flexible-device} The field is enabled by advances in organic semiconductors and nanomaterials that can be deposited on thin, flexible films. However, fabrication remains challenging compared to mature silicon processes. Flexible circuits currently typically exhibit lower performance than rigid equivalents. Still, they promise to transform electronics into lightweight, bendable materials. @@ -1013,7 +1013,7 @@ Alternatively, you can watch @vid-arch for more details. # Architecture 2.0 -{{< video >}} +{{< video https://www.youtube.com/watch?v=F5Eieaz7u1I&ab_channel=OpenComputeProject >}} ::: diff --git a/contents/image_classification/image_classification.qmd b/contents/image_classification/image_classification.qmd index ad75fd026..04d3bc2ee 100644 --- a/contents/image_classification/image_classification.qmd +++ b/contents/image_classification/image_classification.qmd @@ -4,7 +4,7 @@ bibliography: image_classification.bib # CV on Nicla Vision {.unnumbered} -![*DALL·E 3 Prompt: Cartoon in a 1950s style featuring a compact electronic device with a camera module placed on a wooden table. The screen displays blue robots on one side and green periquitos on the other. LED lights on the device indicate classifications, while characters in retro clothing observe with interest.*](images/jpg/img_class_ini.jpg){fig-align="center" width="6.5in"} +![*DALL·E 3 Prompt: Cartoon in a 1950s style featuring a compact electronic device with a camera module placed on a wooden table. The screen displays blue robots on one side and green periquitos on the other. LED lights on the device indicate classifications, while characters in retro clothing observe with interest.*](images/jpg/img_class_ini.jpg) ## Introduction @@ -12,7 +12,7 @@ As we initiate our studies into embedded machine learning or TinyML, it's imposs More and more, we are facing an artificial intelligence (AI) revolution where, as stated by Gartner, **Edge AI** has a very high impact potential, and **it is for now**! -![](images/jpg/image2.jpg){fig-align="center" width="4.729166666666667in"} +![](images/jpg/image2.jpg) In the "bullseye" of the Radar is the *Edge Computer Vision*, and when we talk about Machine Learning (ML) applied to vision, the first thing that comes to mind is **Image Classification**, a kind of ML "Hello World"! @@ -26,7 +26,7 @@ At its core, computer vision aims to enable machines to interpret and make decis When discussing Computer Vision projects applied to embedded devices, the most common applications that come to mind are *Image Classification* and *Object Detection*. -![](images/jpg/image15.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image15.jpg) Both models can be implemented on tiny devices like the Arduino Nicla Vision and used on real projects. In this chapter, we will cover Image Classification. @@ -34,7 +34,7 @@ Both models can be implemented on tiny devices like the Arduino Nicla Vision and The first step in any ML project is to define the goal. In this case, it is to detect and classify two specific objects present in one image. For this project, we will use two small toys: a *robot* and a small Brazilian parrot (named *Periquito*). Also, we will collect images of a *background* where those two objects are absent. -![](images/jpg/image36.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image36.jpg) ## Data Collection @@ -44,23 +44,23 @@ Once you have defined your Machine Learning project goal, the next and most cruc First, create in your computer a folder where your data will be saved, for example, "data." Next, on the OpenMV IDE, go to `Tools > Dataset Editor` and select `New Dataset` to start the dataset collection: -![](images/png/image29.png){fig-align="center" width="6.291666666666667in"} +![](images/png/image29.png) The IDE will ask you to open the file where your data will be saved and choose the "data" folder that was created. Note that new icons will appear on the Left panel. -![](images/png/image46.png){fig-align="center" width="0.9583333333333334in"} +![](images/png/image46.png) Using the upper icon (1), enter with the first class name, for example, "periquito": -![](images/png/image22.png){fig-align="center" width="3.25in"} +![](images/png/image22.png) Running the `dataset_capture_script.py` and clicking on the camera icon (2), will start capturing images: -![](images/png/image43.png){fig-align="center" width="6.5in"} +![](images/png/image43.png) Repeat the same procedure with the other classes -![](images/jpg/image6.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image6.jpg) > We suggest around 60 images from each category. Try to capture different angles, backgrounds, and light conditions. @@ -70,7 +70,7 @@ After capturing your dataset, close the Dataset Editor Tool on the `Tools > Data On your computer, you will end with a dataset that contains three classes: *periquito,* *robot*, and *background*. -![](images/png/image20.png){fig-align="center" width="6.5in"} +![](images/png/image20.png) You should return to *Edge Impulse Studio* and upload the dataset to your project. @@ -78,7 +78,7 @@ You should return to *Edge Impulse Studio* and upload the dataset to your projec We will use the Edge Impulse Studio for training our model. Enter your account credentials and create a new project: -![](images/png/image45.png){fig-align="center" width="6.5in"} +![](images/png/image45.png) > Here, you can clone a similar project: [NICLA-Vision_Image_Classification](https://studio.edgeimpulse.com/public/273858/latest). @@ -86,27 +86,27 @@ We will use the Edge Impulse Studio for training our model. Enter your account c Using the EI Studio (or *Studio*), we will go over four main steps to have our model ready for use on the Nicla Vision board: Dataset, Impulse, Tests, and Deploy (on the Edge Device, in this case, the NiclaV). -![](images/jpg/image41.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image41.jpg) Regarding the Dataset, it is essential to point out that our Original Dataset, captured with the OpenMV IDE, will be split into *Training*, *Validation*, and *Test*. The Test Set will be divided from the beginning, and a part will reserved to be used only in the Test phase after training. The Validation Set will be used during training. -![](images/jpg/image7.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image7.jpg) On Studio, go to the Data acquisition tab, and on the UPLOAD DATA section, upload the chosen categories files from your computer: -![](images/png/image39.png){fig-align="center" width="6.5in"} +![](images/png/image39.png) Leave to the Studio the splitting of the original dataset into *train and test* and choose the label about that specific data: -![](images/png/image30.png){fig-align="center" width="6.5in"} +![](images/png/image30.png) Repeat the procedure for all three classes. At the end, you should see your "raw data" in the Studio: -![](images/png/image11.png){fig-align="center" width="6.5in"} +![](images/png/image11.png) The Studio allows you to explore your data, showing a complete view of all the data in your project. You can clear, inspect, or change labels by clicking on individual data items. In our case, a very simple project, the data seems OK. -![](images/png/image44.png){fig-align="center" width="6.5in"} +![](images/png/image44.png) ## The Impulse Design @@ -116,35 +116,35 @@ In this phase, we should define how to: - Specify a Model, in this case, it will be the `Transfer Learning (Images)` to fine-tune a pre-trained MobileNet V2 image classification model on our data. This method performs well even with relatively small image datasets (around 150 images in our case). -![](images/jpg/image23.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image23.jpg) Transfer Learning with MobileNet offers a streamlined approach to model training, which is especially beneficial for resource-constrained environments and projects with limited labeled data. MobileNet, known for its lightweight architecture, is a pre-trained model that has already learned valuable features from a large dataset (ImageNet). -![](images/jpg/image9.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image9.jpg) By leveraging these learned features, you can train a new model for your specific task with fewer data and computational resources and yet achieve competitive accuracy. -![](images/jpg/image32.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image32.jpg) This approach significantly reduces training time and computational cost, making it ideal for quick prototyping and deployment on embedded devices where efficiency is paramount. Go to the Impulse Design Tab and create the *impulse*, defining an image size of 96x96 and squashing them (squared form, without cropping). Select Image and Transfer Learning blocks. Save the Impulse. -![](images/png/image16.png){fig-align="center" width="6.5in"} +![](images/png/image16.png) ### Image Pre-Processing All the input QVGA/RGB565 images will be converted to 27,640 features (96x96x3). -![](images/png/image17.png){fig-align="center" width="6.5in"} +![](images/png/image17.png) Press \[Save parameters\] and Generate all features: -![](images/png/image5.png){fig-align="center" width="6.5in"} +![](images/png/image5.png) ### Model Design -In 2007, Google introduced [[MobileNetV1]{.underline}](https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html), a family of general-purpose computer vision neural networks designed with mobile devices in mind to support classification, detection, and more. MobileNets are small, low-latency, low-power models parameterized to meet the resource constraints of various use cases. in 2018, Google launched [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381). +In 2007, Google introduced [MobileNetV1](https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html), a family of general-purpose computer vision neural networks designed with mobile devices in mind to support classification, detection, and more. MobileNets are small, low-latency, low-power models parameterized to meet the resource constraints of various use cases. in 2018, Google launched [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381). MobileNet V1 and MobileNet V2 aim at mobile efficiency and embedded vision applications but differ in architectural complexity and performance. While both use depthwise separable convolutions to reduce the computational cost, MobileNet V2 introduces Inverted Residual Blocks and Linear Bottlenecks to enhance performance. These new features allow V2 to capture more complex features using fewer parameters, making it computationally more efficient and generally more accurate than its predecessor. Additionally, V2 employs a non-linear activation in the intermediate expansion layer. It still uses a linear activation for the bottleneck layer, a design choice found to preserve important information through the network. MobileNet V2 offers an optimized architecture for higher accuracy and efficiency and will be used in this project. @@ -152,11 +152,11 @@ Although the base MobileNet architecture is already tiny and has low latency, ma Edge Impulse Studio can use both MobileNetV1 (96x96 images) and V2 (96x96 or 160x160 images), with several different **α** values (from 0.05 to 1.0). For example, you will get the highest accuracy with V2, 160x160 images, and α=1.0. Of course, there is a trade-off. The higher the accuracy, the more memory (around 1.3MB RAM and 2.6MB ROM) will be needed to run the model, implying more latency. The smaller footprint will be obtained at the other extreme with MobileNetV1 and α=0.10 (around 53.2K RAM and 101K ROM). -![](images/jpg/image27.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image27.jpg) We will use **MobileNetV2 96x96 0.1** for this project, with an estimated memory cost of 265.3 KB in RAM. This model should be OK for the Nicla Vision with 1MB of SRAM. On the Transfer Learning Tab, select this model: -![](images/png/image24.png){fig-align="center" width="6.5in"} +![](images/png/image24.png) ## Model Training @@ -188,33 +188,33 @@ Exposure to these variations during training can help prevent your model from ta The final layer of our model will have 12 neurons with a 15% dropout for overfitting prevention. Here is the Training result: -![](images/jpg/image31.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image31.jpg) The result is excellent, with 77ms of latency, which should result in 13fps (frames per second) during inference. ## Model Testing -![](images/jpg/image10.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image10.jpg) Now, you should take the data set aside at the start of the project and run the trained model using it as input: -![](images/png/image34.png){fig-align="center" width="3.1041666666666665in"} +![](images/png/image34.png) The result is, again, excellent. -![](images/png/image12.png){fig-align="center" width="6.5in"} +![](images/png/image12.png) ## Deploying the model At this point, we can deploy the trained model as.tflite and use the OpenMV IDE to run it using MicroPython, or we can deploy it as a C/C++ or an Arduino library. -![](images/jpg/image28.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image28.jpg) ### Arduino Library First, Let's deploy it as an Arduino Library: -![](images/png/image48.png){fig-align="center" width="6.5in"} +![](images/png/image48.png) You should install the library as.zip on the Arduino IDE and run the sketch *nicla_vision_camera.ino* available in Examples under your library name. @@ -222,9 +222,9 @@ You should install the library as.zip on the Arduino IDE and run the sketch *nic The result is good, with 86ms of measured latency. -![](images/jpg/image25.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image25.jpg) -Here is a short video showing the inference results: {{< video width="480" height="270" center >}} +Here is a short video showing the inference results: {{< video https://youtu.be/bZPZZJblU-o >}} ### OpenMV @@ -232,47 +232,47 @@ It is possible to deploy the trained model to be used with OpenMV in two ways: a Three files are generated as a library: the trained.tflite model, a list with labels, and a simple MicroPython script that can make inferences using the model. -![](images/png/image26.png){fig-align="center" width="6.5in"} +![](images/png/image26.png) Running this model as a *.tflite* directly in the Nicla was impossible. So, we can sacrifice the accuracy using a smaller model or deploy the model as an OpenMV Firmware (FW). Choosing FW, the Edge Impulse Studio generates optimized models, libraries, and frameworks needed to make the inference. Let's explore this option. Select `OpenMV Firmware` on the `Deploy Tab` and press `[Build]`. -![](images/png/image3.png){fig-align="center" width="6.5in"} +![](images/png/image3.png) On your computer, you will find a ZIP file. Open it: -![](images/png/image33.png){fig-align="center" width="6.5in"} +![](images/png/image33.png) Use the Bootloader tool on the OpenMV IDE to load the FW on your board: -![](images/jpg/image35.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image35.jpg) Select the appropriate file (.bin for Nicla-Vision): -![](images/png/image8.png){fig-align="center" width="6.5in"} +![](images/png/image8.png) After the download is finished, press OK: -![](images/png/image40.png){fig-align="center" width="3.875in"} +![](images/png/image40.png) If a message says that the FW is outdated, DO NOT UPGRADE. Select \[NO\]. -![](images/png/image42.png){fig-align="center" width="4.572916666666667in"} +![](images/png/image42.png) Now, open the script **ei_image_classification.py** that was downloaded from the Studio and the.bin file for the Nicla. -![](images/png/image14.png){fig-align="center" width="6.5in"} +![](images/png/image14.png) Run it. Pointing the camera to the objects we want to classify, the inference result will be displayed on the Serial Terminal. -![](images/png/image37.png){fig-align="center" width="6.5in"} +![](images/png/image37.png) #### Changing the Code to add labels The code provided by Edge Impulse can be modified so that we can see, for test reasons, the inference result directly on the image displayed on the OpenMV IDE. -[[Upload the code from GitHub,]{.underline}](https://github.com/Mjrovai/Arduino_Nicla_Vision/blob/main/Micropython/nicla_image_classification.py) or modify it as below: +[Upload the code from GitHub,](https://github.com/Mjrovai/Arduino_Nicla_Vision/blob/main/Micropython/nicla_image_classification.py) or modify it as below: ``` python # Marcelo Rovai - NICLA Vision - Image Classification @@ -343,15 +343,15 @@ while(True): Here you can see the result: -![](images/jpg/image47.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image47.jpg) Note that the latency (136 ms) is almost double of what we got directly with the Arduino IDE. This is because we are using the IDE as an interface and also the time to wait for the camera to be ready. If we start the clock just before the inference: -![](images/jpg/image13.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image13.jpg) The latency will drop to only 71 ms. -![](images/jpg/image1.jpg){fig-align="center" width="3.5520833333333335in"} +![](images/jpg/image1.jpg) > The NiclaV runs about half as fast when connected to the IDE. The FPS should increase once disconnected. @@ -359,9 +359,9 @@ The latency will drop to only 71 ms. When working with embedded machine learning, we are looking for devices that can continually proceed with the inference and result, taking some action directly on the physical world and not displaying the result on a connected computer. To simulate this, we will light up a different LED for each possible inference result. -![](images/jpg/image38.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image38.jpg) -To accomplish that, we should [[upload the code from GitHub]{.underline}](https://github.com/Mjrovai/Arduino_Nicla_Vision/blob/main/Micropython/nicla_image_classification_LED.py) or change the last code to include the LEDs: +To accomplish that, we should [upload the code from GitHub](https://github.com/Mjrovai/Arduino_Nicla_Vision/blob/main/Micropython/nicla_image_classification_LED.py) or change the last code to include the LEDs: ``` python # Marcelo Rovai - NICLA Vision - Image Classification with LEDs @@ -477,21 +477,21 @@ Now, each time that a class scores a result greater than 0.8, the correspondent Here is the result: -![](images/jpg/image18.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image18.jpg) In more detail -![](images/jpg/image21.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image21.jpg) ## Image Classification (non-official) Benchmark Several development boards can be used for embedded machine learning (TinyML), and the most common ones for Computer Vision applications (consuming low energy), are the ESP32 CAM, the Seeed XIAO ESP32S3 Sense, the Arduino Nicla Vison, and the Arduino Portenta. -![](images/jpg/image19.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image19.jpg) Catching the opportunity, the same trained model was deployed on the ESP-CAM, the XIAO, and the Portenta (in this one, the model was trained again, using grayscaled images to be compatible with its camera). Here is the result, deploying the models as Arduino's Library: -![](images/jpg/image4.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image4.jpg) ## Conclusion diff --git a/contents/introduction/introduction.qmd b/contents/introduction/introduction.qmd index 3ab8396f9..ee77a896f 100644 --- a/contents/introduction/introduction.qmd +++ b/contents/introduction/introduction.qmd @@ -12,7 +12,7 @@ In the early 1990s, [Mark Weiser](https://en.wikipedia.org/wiki/Mark_Weiser), a ![Ubiqutous computing.](images/png/21st_computer.png){#fig-ubiqutous width=50%} -In the vision of ubiquitous computing[@weiser1991computer], the integration of processors into everyday objects is just one aspect of a larger paradigm shift. The true essence of this vision lies in creating an intelligent environment that can anticipate our needs and act on our behalf, enhancing our experiences without requiring explicit commands. To achieve this level of pervasive intelligence, it is crucial to develop and deploy machine learning systems that span the entire ecosystem, from the cloud to the edge and even to the tiniest IoT devices. +In the vision of ubiquitous computing [@weiser1991computer], the integration of processors into everyday objects is just one aspect of a larger paradigm shift. The true essence of this vision lies in creating an intelligent environment that can anticipate our needs and act on our behalf, enhancing our experiences without requiring explicit commands. To achieve this level of pervasive intelligence, it is crucial to develop and deploy machine learning systems that span the entire ecosystem, from the cloud to the edge and even to the tiniest IoT devices. By distributing machine learning capabilities across the computing continuum, we can harness the strengths of each layer while mitigating their limitations. The cloud, with its vast computational resources and storage capacity, is ideal for training complex models on large datasets and performing resource-intensive tasks. Edge devices, such as gateways and smartphones, can process data locally, enabling faster response times, improved privacy, and reduced bandwidth requirements. Finally, the tiniest IoT devices, equipped with machine learning capabilities, can make quick decisions based on sensor data, enabling highly responsive and efficient systems. diff --git a/contents/labs/arduino/nicla_vision/image_classification/image_classification.qmd b/contents/labs/arduino/nicla_vision/image_classification/image_classification.qmd index 4f48d9b91..fd0bc8be1 100644 --- a/contents/labs/arduino/nicla_vision/image_classification/image_classification.qmd +++ b/contents/labs/arduino/nicla_vision/image_classification/image_classification.qmd @@ -4,7 +4,7 @@ bibliography: image_classification.bib # Image Classification {.unnumbered} -![*DALL·E 3 Prompt: Cartoon in a 1950s style featuring a compact electronic device with a camera module placed on a wooden table. The screen displays blue robots on one side and green periquitos on the other. LED lights on the device indicate classifications, while characters in retro clothing observe with interest.*](images/jpg/img_class_ini.jpg){fig-align="center" width="6.5in"} +![*DALL·E 3 Prompt: Cartoon in a 1950s style featuring a compact electronic device with a camera module placed on a wooden table. The screen displays blue robots on one side and green periquitos on the other. LED lights on the device indicate classifications, while characters in retro clothing observe with interest.*](images/jpg/img_class_ini.jpg) ## Introduction @@ -12,7 +12,7 @@ As we initiate our studies into embedded machine learning or TinyML, it's imposs More and more, we are facing an artificial intelligence (AI) revolution where, as stated by Gartner, **Edge AI** has a very high impact potential, and **it is for now**! -![](images/jpg/image2.jpg){fig-align="center" width="4.729166666666667in"} +![](images/jpg/image2.jpg) In the "bullseye" of the Radar is the *Edge Computer Vision*, and when we talk about Machine Learning (ML) applied to vision, the first thing that comes to mind is **Image Classification**, a kind of ML "Hello World"! @@ -26,7 +26,7 @@ At its core, computer vision aims to enable machines to interpret and make decis When discussing Computer Vision projects applied to embedded devices, the most common applications that come to mind are *Image Classification* and *Object Detection*. -![](images/jpg/image15.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image15.jpg) Both models can be implemented on tiny devices like the Arduino Nicla Vision and used on real projects. In this chapter, we will cover Image Classification. @@ -34,7 +34,7 @@ Both models can be implemented on tiny devices like the Arduino Nicla Vision and The first step in any ML project is to define the goal. In this case, it is to detect and classify two specific objects present in one image. For this project, we will use two small toys: a *robot* and a small Brazilian parrot (named *Periquito*). Also, we will collect images of a *background* where those two objects are absent. -![](images/jpg/image36.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image36.jpg) ## Data Collection @@ -44,23 +44,23 @@ Once you have defined your Machine Learning project goal, the next and most cruc First, create in your computer a folder where your data will be saved, for example, "data." Next, on the OpenMV IDE, go to `Tools > Dataset Editor` and select `New Dataset` to start the dataset collection: -![](images/png/image29.png){fig-align="center" width="6.291666666666667in"} +![](images/png/image29.png) The IDE will ask you to open the file where your data will be saved and choose the "data" folder that was created. Note that new icons will appear on the Left panel. -![](images/png/image46.png){fig-align="center" width="0.9583333333333334in"} +![](images/png/image46.png) Using the upper icon (1), enter with the first class name, for example, "periquito": -![](images/png/image22.png){fig-align="center" width="3.25in"} +![](images/png/image22.png) Running the `dataset_capture_script.py` and clicking on the camera icon (2), will start capturing images: -![](images/png/image43.png){fig-align="center" width="6.5in"} +![](images/png/image43.png) Repeat the same procedure with the other classes -![](images/jpg/image6.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image6.jpg) > We suggest around 60 images from each category. Try to capture different angles, backgrounds, and light conditions. @@ -70,7 +70,7 @@ After capturing your dataset, close the Dataset Editor Tool on the `Tools > Data On your computer, you will end with a dataset that contains three classes: *periquito,* *robot*, and *background*. -![](images/png/image20.png){fig-align="center" width="6.5in"} +![](images/png/image20.png) You should return to *Edge Impulse Studio* and upload the dataset to your project. @@ -78,7 +78,7 @@ You should return to *Edge Impulse Studio* and upload the dataset to your projec We will use the Edge Impulse Studio for training our model. Enter your account credentials and create a new project: -![](images/png/image45.png){fig-align="center" width="6.5in"} +![](images/png/image45.png) > Here, you can clone a similar project: [NICLA-Vision_Image_Classification](https://studio.edgeimpulse.com/public/273858/latest). @@ -86,27 +86,27 @@ We will use the Edge Impulse Studio for training our model. Enter your account c Using the EI Studio (or *Studio*), we will go over four main steps to have our model ready for use on the Nicla Vision board: Dataset, Impulse, Tests, and Deploy (on the Edge Device, in this case, the NiclaV). -![](images/jpg/image41.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image41.jpg) Regarding the Dataset, it is essential to point out that our Original Dataset, captured with the OpenMV IDE, will be split into *Training*, *Validation*, and *Test*. The Test Set will be divided from the beginning, and a part will reserved to be used only in the Test phase after training. The Validation Set will be used during training. -![](images/jpg/image7.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image7.jpg) On Studio, go to the Data acquisition tab, and on the UPLOAD DATA section, upload the chosen categories files from your computer: -![](images/png/image39.png){fig-align="center" width="6.5in"} +![](images/png/image39.png) Leave to the Studio the splitting of the original dataset into *train and test* and choose the label about that specific data: -![](images/png/image30.png){fig-align="center" width="6.5in"} +![](images/png/image30.png) Repeat the procedure for all three classes. At the end, you should see your "raw data" in the Studio: -![](images/png/image11.png){fig-align="center" width="6.5in"} +![](images/png/image11.png) The Studio allows you to explore your data, showing a complete view of all the data in your project. You can clear, inspect, or change labels by clicking on individual data items. In our case, a very simple project, the data seems OK. -![](images/png/image44.png){fig-align="center" width="6.5in"} +![](images/png/image44.png) ## The Impulse Design @@ -116,35 +116,35 @@ In this phase, we should define how to: - Specify a Model, in this case, it will be the `Transfer Learning (Images)` to fine-tune a pre-trained MobileNet V2 image classification model on our data. This method performs well even with relatively small image datasets (around 150 images in our case). -![](images/jpg/image23.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image23.jpg) Transfer Learning with MobileNet offers a streamlined approach to model training, which is especially beneficial for resource-constrained environments and projects with limited labeled data. MobileNet, known for its lightweight architecture, is a pre-trained model that has already learned valuable features from a large dataset (ImageNet). -![](images/jpg/image9.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image9.jpg) By leveraging these learned features, you can train a new model for your specific task with fewer data and computational resources and yet achieve competitive accuracy. -![](images/jpg/image32.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image32.jpg) This approach significantly reduces training time and computational cost, making it ideal for quick prototyping and deployment on embedded devices where efficiency is paramount. Go to the Impulse Design Tab and create the *impulse*, defining an image size of 96x96 and squashing them (squared form, without cropping). Select Image and Transfer Learning blocks. Save the Impulse. -![](images/png/image16.png){fig-align="center" width="6.5in"} +![](images/png/image16.png) ### Image Pre-Processing All the input QVGA/RGB565 images will be converted to 27,640 features (96x96x3). -![](images/png/image17.png){fig-align="center" width="6.5in"} +![](images/png/image17.png) Press \[Save parameters\] and Generate all features: -![](images/png/image5.png){fig-align="center" width="6.5in"} +![](images/png/image5.png) ### Model Design -In 2007, Google introduced [[MobileNetV1]{.underline}](https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html), a family of general-purpose computer vision neural networks designed with mobile devices in mind to support classification, detection, and more. MobileNets are small, low-latency, low-power models parameterized to meet the resource constraints of various use cases. in 2018, Google launched [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381). +In 2007, Google introduced [MobileNetV1](https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html), a family of general-purpose computer vision neural networks designed with mobile devices in mind to support classification, detection, and more. MobileNets are small, low-latency, low-power models parameterized to meet the resource constraints of various use cases. in 2018, Google launched [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381). MobileNet V1 and MobileNet V2 aim at mobile efficiency and embedded vision applications but differ in architectural complexity and performance. While both use depthwise separable convolutions to reduce the computational cost, MobileNet V2 introduces Inverted Residual Blocks and Linear Bottlenecks to enhance performance. These new features allow V2 to capture more complex features using fewer parameters, making it computationally more efficient and generally more accurate than its predecessor. Additionally, V2 employs a non-linear activation in the intermediate expansion layer. It still uses a linear activation for the bottleneck layer, a design choice found to preserve important information through the network. MobileNet V2 offers an optimized architecture for higher accuracy and efficiency and will be used in this project. @@ -152,11 +152,11 @@ Although the base MobileNet architecture is already tiny and has low latency, ma Edge Impulse Studio can use both MobileNetV1 (96x96 images) and V2 (96x96 or 160x160 images), with several different **α** values (from 0.05 to 1.0). For example, you will get the highest accuracy with V2, 160x160 images, and α=1.0. Of course, there is a trade-off. The higher the accuracy, the more memory (around 1.3MB RAM and 2.6MB ROM) will be needed to run the model, implying more latency. The smaller footprint will be obtained at the other extreme with MobileNetV1 and α=0.10 (around 53.2K RAM and 101K ROM). -![](images/jpg/image27.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image27.jpg) We will use **MobileNetV2 96x96 0.1** for this project, with an estimated memory cost of 265.3 KB in RAM. This model should be OK for the Nicla Vision with 1MB of SRAM. On the Transfer Learning Tab, select this model: -![](images/png/image24.png){fig-align="center" width="6.5in"} +![](images/png/image24.png) ## Model Training @@ -188,33 +188,33 @@ Exposure to these variations during training can help prevent your model from ta The final layer of our model will have 12 neurons with a 15% dropout for overfitting prevention. Here is the Training result: -![](images/jpg/image31.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image31.jpg) The result is excellent, with 77ms of latency, which should result in 13fps (frames per second) during inference. ## Model Testing -![](images/jpg/image10.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image10.jpg) Now, you should take the data set aside at the start of the project and run the trained model using it as input: -![](images/png/image34.png){fig-align="center" width="3.1041666666666665in"} +![](images/png/image34.png) The result is, again, excellent. -![](images/png/image12.png){fig-align="center" width="6.5in"} +![](images/png/image12.png) ## Deploying the model At this point, we can deploy the trained model as.tflite and use the OpenMV IDE to run it using MicroPython, or we can deploy it as a C/C++ or an Arduino library. -![](images/jpg/image28.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image28.jpg) ### Arduino Library First, Let's deploy it as an Arduino Library: -![](images/png/image48.png){fig-align="center" width="6.5in"} +![](images/png/image48.png) You should install the library as.zip on the Arduino IDE and run the sketch *nicla_vision_camera.ino* available in Examples under your library name. @@ -222,9 +222,9 @@ You should install the library as.zip on the Arduino IDE and run the sketch *nic The result is good, with 86ms of measured latency. -![](images/jpg/image25.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image25.jpg) -Here is a short video showing the inference results: {{< video width="480" height="270" center >}} +Here is a short video showing the inference results: {{< video https://youtu.be/bZPZZJblU-o >}} ### OpenMV @@ -232,47 +232,47 @@ It is possible to deploy the trained model to be used with OpenMV in two ways: a Three files are generated as a library: the trained.tflite model, a list with labels, and a simple MicroPython script that can make inferences using the model. -![](images/png/image26.png){fig-align="center" width="6.5in"} +![](images/png/image26.png) Running this model as a *.tflite* directly in the Nicla was impossible. So, we can sacrifice the accuracy using a smaller model or deploy the model as an OpenMV Firmware (FW). Choosing FW, the Edge Impulse Studio generates optimized models, libraries, and frameworks needed to make the inference. Let's explore this option. Select `OpenMV Firmware` on the `Deploy Tab` and press `[Build]`. -![](images/png/image3.png){fig-align="center" width="6.5in"} +![](images/png/image3.png) On your computer, you will find a ZIP file. Open it: -![](images/png/image33.png){fig-align="center" width="6.5in"} +![](images/png/image33.png) Use the Bootloader tool on the OpenMV IDE to load the FW on your board: -![](images/jpg/image35.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image35.jpg) Select the appropriate file (.bin for Nicla-Vision): -![](images/png/image8.png){fig-align="center" width="6.5in"} +![](images/png/image8.png) After the download is finished, press OK: -![](images/png/image40.png){fig-align="center" width="3.875in"} +![](images/png/image40.png) If a message says that the FW is outdated, DO NOT UPGRADE. Select \[NO\]. -![](images/png/image42.png){fig-align="center" width="4.572916666666667in"} +![](images/png/image42.png) Now, open the script **ei_image_classification.py** that was downloaded from the Studio and the.bin file for the Nicla. -![](images/png/image14.png){fig-align="center" width="6.5in"} +![](images/png/image14.png) Run it. Pointing the camera to the objects we want to classify, the inference result will be displayed on the Serial Terminal. -![](images/png/image37.png){fig-align="center" width="6.5in"} +![](images/png/image37.png) #### Changing the Code to add labels The code provided by Edge Impulse can be modified so that we can see, for test reasons, the inference result directly on the image displayed on the OpenMV IDE. -[[Upload the code from GitHub,]{.underline}](https://github.com/Mjrovai/Arduino_Nicla_Vision/blob/main/Micropython/nicla_image_classification.py) or modify it as below: +[Upload the code from GitHub,](https://github.com/Mjrovai/Arduino_Nicla_Vision/blob/main/Micropython/nicla_image_classification.py) or modify it as below: ``` python # Marcelo Rovai - NICLA Vision - Image Classification @@ -343,15 +343,15 @@ while(True): Here you can see the result: -![](images/jpg/image47.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image47.jpg) Note that the latency (136 ms) is almost double of what we got directly with the Arduino IDE. This is because we are using the IDE as an interface and also the time to wait for the camera to be ready. If we start the clock just before the inference: -![](images/jpg/image13.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image13.jpg) The latency will drop to only 71 ms. -![](images/jpg/image1.jpg){fig-align="center" width="3.5520833333333335in"} +![](images/jpg/image1.jpg) > The NiclaV runs about half as fast when connected to the IDE. The FPS should increase once disconnected. @@ -359,9 +359,9 @@ The latency will drop to only 71 ms. When working with embedded machine learning, we are looking for devices that can continually proceed with the inference and result, taking some action directly on the physical world and not displaying the result on a connected computer. To simulate this, we will light up a different LED for each possible inference result. -![](images/jpg/image38.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image38.jpg) -To accomplish that, we should [[upload the code from GitHub]{.underline}](https://github.com/Mjrovai/Arduino_Nicla_Vision/blob/main/Micropython/nicla_image_classification_LED.py) or change the last code to include the LEDs: +To accomplish that, we should [upload the code from GitHub](https://github.com/Mjrovai/Arduino_Nicla_Vision/blob/main/Micropython/nicla_image_classification_LED.py) or change the last code to include the LEDs: ``` python # Marcelo Rovai - NICLA Vision - Image Classification with LEDs @@ -477,21 +477,21 @@ Now, each time that a class scores a result greater than 0.8, the correspondent Here is the result: -![](images/jpg/image18.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image18.jpg) In more detail -![](images/jpg/image21.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image21.jpg) ## Image Classification (non-official) Benchmark Several development boards can be used for embedded machine learning (TinyML), and the most common ones for Computer Vision applications (consuming low energy), are the ESP32 CAM, the Seeed XIAO ESP32S3 Sense, the Arduino Nicla Vison, and the Arduino Portenta. -![](images/jpg/image19.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image19.jpg) Catching the opportunity, the same trained model was deployed on the ESP-CAM, the XIAO, and the Portenta (in this one, the model was trained again, using grayscaled images to be compatible with its camera). Here is the result, deploying the models as Arduino's Library: -![](images/jpg/image4.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image4.jpg) ## Conclusion diff --git a/contents/labs/arduino/nicla_vision/kws/kws.qmd b/contents/labs/arduino/nicla_vision/kws/kws.qmd index 164169e1d..08fee3e4f 100644 --- a/contents/labs/arduino/nicla_vision/kws/kws.qmd +++ b/contents/labs/arduino/nicla_vision/kws/kws.qmd @@ -4,7 +4,7 @@ bibliography: kws.bib # Keyword Spotting (KWS) {.unnumbered} -![*DALL·E 3 Prompt: 1950s style cartoon scene set in a vintage audio research room. Two Afro-American female scientists are at the center. One holds a magnifying glass, closely examining ancient circuitry, while the other takes notes. On their wooden table, there are multiple boards with sensors, notably featuring a microphone. Behind these boards, a computer with a large, rounded back displays the Arduino IDE. The IDE showcases code for LED pin assignments and machine learning inference for voice command detection. A distinct window in the IDE, the Serial Monitor, reveals outputs indicating the spoken commands 'yes' and 'no'. The room ambiance is nostalgic with vintage lamps, classic audio analysis tools, and charts depicting FFT graphs and time-domain curves.*](images/jpg/nicla-kws.jpg){fig-align="center" width="6.5in"} +![*DALL·E 3 Prompt: 1950s style cartoon scene set in a vintage audio research room. Two Afro-American female scientists are at the center. One holds a magnifying glass, closely examining ancient circuitry, while the other takes notes. On their wooden table, there are multiple boards with sensors, notably featuring a microphone. Behind these boards, a computer with a large, rounded back displays the Arduino IDE. The IDE showcases code for LED pin assignments and machine learning inference for voice command detection. A distinct window in the IDE, the Serial Monitor, reveals outputs indicating the spoken commands 'yes' and 'no'. The room ambiance is nostalgic with vintage lamps, classic audio analysis tools, and charts depicting FFT graphs and time-domain curves.*](images/jpg/nicla-kws.jpg) ## Introduction @@ -18,11 +18,11 @@ Our model will be designed to recognize keywords that can trigger device wake-up As said, *voice assistants* on the market, like Google Home or Amazon Echo-Dot, only react to humans when they are "waked up" by particular keywords such as " Hey Google" on the first one and "Alexa" on the second. -![](images/png/hey_google.png){fig-align="center" width="6.5in"} +![](images/png/hey_google.png) In other words, recognizing voice commands is based on a multi-stage model or Cascade Detection. -![](images/jpg/pa_block.jpg){fig-align="center" width="6.5in"} +![](images/jpg/pa_block.jpg) **Stage 1:** A small microprocessor inside the Echo Dot or Google Home continuously listens, waiting for the keyword to be spotted, using a TinyML model at the edge (KWS application). @@ -30,7 +30,7 @@ In other words, recognizing voice commands is based on a multi-stage model or Ca The video below shows an example of a Google Assistant being programmed on a Raspberry Pi (Stage 2), with an Arduino Nano 33 BLE as the TinyML device (Stage 1). -{{< video width="480" height="270" center >}} +{{< video https://youtu.be/e_OPgcnsyvM >}} > To explore the above Google Assistant project, please see the tutorial: [Building an Intelligent Voice Assistant From Scratch](https://www.hackster.io/mjrobot/building-an-intelligent-voice-assistant-from-scratch-2199c3). @@ -40,7 +40,7 @@ In this KWS project, we will focus on Stage 1 (KWS or Keyword Spotting), where w The diagram below gives an idea of how the final KWS application should work (during inference): -![](images/jpg/KWS_PROJ_INF_BLK.jpg){fig-align="center" width="6.5in"} +![](images/jpg/KWS_PROJ_INF_BLK.jpg) Our KWS application will recognize four classes of sound: @@ -55,7 +55,7 @@ Our KWS application will recognize four classes of sound: The main component of the KWS application is its model. So, we must train such a model with our specific keywords, noise, and other words (the "unknown"): -![](images/jpg/KWS_PROJ_TRAIN_BLK.jpg){fig-align="center" width="6.5in"} +![](images/jpg/KWS_PROJ_TRAIN_BLK.jpg) ## Dataset @@ -70,15 +70,15 @@ You can download a small portion of the dataset from Edge Studio ([Keyword spott Initiate a new project at Edge Impulse Studio (EIS) and select the `Upload Existing Data` tool in the `Data Acquisition` section. Choose the files to be uploaded: -![](images/jpg/files.jpg){fig-align="center" width="6.5in"} +![](images/jpg/files.jpg) Define the Label, select `Automatically split between train and test,` and `Upload data` to the EIS. Repeat for all classes. -![](images/jpg/upload.jpg){fig-align="center" width="6.5in"} +![](images/jpg/upload.jpg) The dataset will now appear in the `Data acquisition` section. Note that the approximately 6,000 samples (1,500 for each class) are split into Train (4,800) and Test (1,200) sets. -![](images/jpg/dataset.jpg){fig-align="center" width="6.5in"} +![](images/jpg/dataset.jpg) ### Capturing additional Audio Data @@ -90,7 +90,7 @@ When we pronounce a keyword, the sound waves should be converted to audio data. So, any device that can generate audio data with this basic specification (16KHz/16bits) will work fine. As a *device*, we can use the NiclaV, a computer, or even your mobile phone. -![](images/jpg/audio_capt.jpg){fig-align="center" width="6.5in"} +![](images/jpg/audio_capt.jpg) #### Using the NiclaV and the Edge Impulse Studio @@ -100,7 +100,7 @@ As we learned in the chapter *Setup Nicla Vision*, EIS officially supports the N - Open the zip file on your computer and select the uploader corresponding to your OS: -![](images/png/image17.png){fig-align="center" width="4.416666666666667in"} +![](images/png/image17.png) - Put the NiclaV in Boot Mode by pressing the reset button twice. @@ -110,13 +110,13 @@ Go to your project on EIS, and on the `Data Acquisition tab`, select `WebUSB`. A You can choose which sensor data to pick in the `Collect Data` section on the `Data Acquisition` tab. Select: `Built-in microphone`, define your `label` (for example, *yes*), the sampling `Frequency`\[16000Hz\], and the `Sample length (in milliseconds)`, for example \[10s\]. `Start sampling`. -![](images/jpg/ei_data_collection.jpg){fig-align="center" width="6.5in"} +![](images/jpg/ei_data_collection.jpg) Data on Pete's dataset have a length of 1s, but the recorded samples are 10s long and must be split into 1s samples. Click on `three dots` after the sample name and select `Split sample`. A window will pop up with the Split tool. -![](images/jpg/split.jpg){fig-align="center" width="6.5in"} +![](images/jpg/split.jpg) Once inside the tool, split the data into 1-second (1000 ms) records. If necessary, add or remove segments. This procedure should be repeated for all new samples. @@ -126,7 +126,7 @@ You can also use your PC or smartphone to capture audio data, using a sampling f Go to `Devices`, scan the `QR Code` using your phone, and click on the link. A data Collection app will appear in your browser. Select `Collecting Audio`, and define your `Label`, data capture `Length,` and `Category`. -![](images/jpg/phone.jpg){fig-align="center" width="6.5in"} +![](images/jpg/phone.jpg) Repeat the same procedure used with the NiclaV. @@ -138,7 +138,7 @@ Repeat the same procedure used with the NiclaV. ### Impulse Design -![](images/jpg/impulse.jpg){fig-align="center" width="6.5in"} +![](images/jpg/impulse.jpg) First, we will take the data points with a 1-second window, augmenting the data and sliding that window in 500ms intervals. Note that the option zero-pad data is set. It is essential to fill with 'zeros' samples smaller than 1 second (in some cases, some samples can result smaller than the 1000 ms window on the split tool to avoid noise and spikes). @@ -154,17 +154,17 @@ The following step is to create the features to be trained in the next phase: We could keep the default parameter values, but we will use the DSP `Autotune parameters` option. -![](images/jpg/ei_MFCC.jpg){fig-align="center" width="6.5in"} +![](images/jpg/ei_MFCC.jpg) We will take the `Raw features` (our 1-second, 16KHz sampled audio data) and use the MFCC processing block to calculate the `Processed features`. For every 16,000 raw features (16,000 x 1 second), we will get 637 processed features (13 x 49). -![](images/jpg/MFCC.jpg){fig-align="center" width="6.5in"} +![](images/jpg/MFCC.jpg) The result shows that we only used a small amount of memory to pre-process data (16KB) and a latency of 34ms, which is excellent. For example, on an Arduino Nano (Cortex-M4f \@ 64MHz), the same pre-process will take around 480ms. The parameters chosen, such as the `FFT length` \[512\], will significantly impact the latency. Now, let's `Save parameters` and move to the `Generated features` tab, where the actual features will be generated. Using [UMAP](https://umap-learn.readthedocs.io/en/latest/), a dimension reduction technique, the `Feature explorer` shows how the features are distributed on a two-dimensional plot. -![](images/jpg/feat_expl.jpg){fig-align="center" width="5.9in"} +![](images/jpg/feat_expl.jpg) The result seems OK, with a visually clear separation between *yes* features (in red) and *no* features (in blue). The *unknown* features seem nearer to the *no space* than the *yes*. This suggests that the keyword *no* has more propensity to false positives. @@ -176,17 +176,17 @@ To understand better how the raw sound is preprocessed, look at the *Feature Eng We will use a simple Convolution Neural Network (CNN) model, tested with 1D and 2D convolutions. The basic architecture has two blocks of Convolution + MaxPooling (\[8\] and \[16\] filters, respectively) and a Dropout of \[0.25\] for the 1D and \[0.5\] for the 2D. For the last layer, after Flattening, we have \[4\] neurons, one for each class: -![](images/jpg/models_1d-2d.jpg){fig-align="center" width="6.5in"} +![](images/jpg/models_1d-2d.jpg) As hyper-parameters, we will have a `Learning Rate` of \[0.005\] and a model trained by \[100\] epochs. We will also include a data augmentation method based on [SpecAugment](https://arxiv.org/abs/1904.08779). We trained the 1D and the 2D models with the same hyperparameters. The 1D architecture had a better overall result (90.5% accuracy when compared with 88% of the 2D, so we will use the 1D. -![](images/jpg/train_result.jpg){fig-align="center" width="6.5in"} +![](images/jpg/train_result.jpg) > Using 1D convolutions is more efficient because it requires fewer parameters than 2D convolutions, making them more suitable for resource-constrained environments. It is also interesting to pay attention to the 1D Confusion Matrix. The F1 Score for `yes` is 95%, and for `no`, 91%. That was expected by what we saw with the Feature Explorer (`no` and `unknown` at close distance). In trying to improve the result, you can inspect closely the results of the samples with an error. -![](images/jpg/train_errors.jpg){fig-align="center" width="6.5in"} +![](images/jpg/train_errors.jpg) Listen to the samples that went wrong. For example, for `yes`, most of the mistakes were related to a yes pronounced as "yeh". You can acquire additional samples and then retrain your model. @@ -194,13 +194,13 @@ Listen to the samples that went wrong. For example, for `yes`, most of the mista If you want to understand what is happening "under the hood," you can download the pre-processed dataset (`MFCC training data`) from the `Dashboard` tab and run this [Jupyter Notebook](https://github.com/Mjrovai/Arduino_Nicla_Vision/blob/main/KWS/KWS_CNN_training.ipynb), playing with the code or [\[Opening it In Colab\]](https://colab.research.google.com/github/Mjrovai/Arduino_Nicla_Vision/blob/main/KWS/KWS_CNN_training.ipynb). For example, you can analyze the accuracy by each epoch: -![](images/jpg/train_graphs.jpg){fig-align="center" width="6.5in"} +![](images/jpg/train_graphs.jpg) ## Testing Testing the model with the data reserved for training (Test Data), we got an accuracy of approximately 76%. -![](images/jpg/test.jpg){fig-align="center" width="6.5in"} +![](images/jpg/test.jpg) Inspecting the F1 score, we can see that for YES, we got 0.90, an excellent result since we expect to use this keyword as the primary "trigger" for our KWS project. The worst result (0.70) is for UNKNOWN, which is OK. @@ -214,21 +214,21 @@ We can proceed to the project's next step but also consider that it is possible The EIS will package all the needed libraries, preprocessing functions, and trained models, downloading them to your computer. Go to the `Deployment` section, select `Arduino Library`, and at the bottom, choose `Quantized (Int8)` and press `Build`. -![](images/jpg/deploy.jpg){fig-align="center" width="5.29in"} +![](images/jpg/deploy.jpg) When the `Build` button is selected, a zip file will be created and downloaded to your computer. On your Arduino IDE, go to the `Sketch` tab, select the option `Add .ZIP Library`, and Choose the .zip file downloaded by EIS: -![](images/jpg/install_zip.jpg){fig-align="center" width="6.5in"} +![](images/jpg/install_zip.jpg) Now, it is time for a real test. We will make inferences while completely disconnected from the EIS. Let's use the NiclaV code example created when we deployed the Arduino Library. In your Arduino IDE, go to the `File/Examples` tab, look for your project, and select `nicla-vision/nicla-vision_microphone` (or `nicla-vision_microphone_continuous`) -![](images/jpg/code_ide.jpg){fig-align="center" width="6.5in"} +![](images/jpg/code_ide.jpg) Press the reset button twice to put the NiclaV in boot mode, upload the sketch to your board, and test some real inferences: -![](images/jpg/yes_no.jpg){fig-align="center" width="6.5in"} +![](images/jpg/yes_no.jpg) ## Post-processing @@ -359,7 +359,7 @@ You can find the complete code on the [project's GitHub](https://github.com/Mjro Upload the sketch to your board and test some real inferences. The idea is that the Green LED will be ON whenever the keyword YES is detected, the Red will lit for a NO, and any other word will turn on the Blue LED. All the LEDs should be off if silence or background noise is present. Remember that the same procedure can "trigger" an external device to perform a desired action instead of turning on an LED, as we saw in the introduction. -{{< video width="480" height="270" center >}} +{{< video https://youtu.be/25Rd76OTXLY >}} ## Conclusion diff --git a/contents/labs/arduino/nicla_vision/motion_classification/motion_classification.qmd b/contents/labs/arduino/nicla_vision/motion_classification/motion_classification.qmd index b0c8f1f78..c7fe59ec1 100644 --- a/contents/labs/arduino/nicla_vision/motion_classification/motion_classification.qmd +++ b/contents/labs/arduino/nicla_vision/motion_classification/motion_classification.qmd @@ -4,7 +4,7 @@ bibliography: motion_classification.bib # Motion Classification and Anomaly Detection {.unnumbered} -![*DALL·E 3 Prompt: 1950s style cartoon illustration depicting a movement research room. In the center of the room, there's a simulated container used for transporting goods on trucks, boats, and forklifts. The container is detailed with rivets and markings typical of industrial cargo boxes. Around the container, the room is filled with vintage equipment, including an oscilloscope, various sensor arrays, and large paper rolls of recorded data. The walls are adorned with educational posters about transportation safety and logistics. The overall ambiance of the room is nostalgic and scientific, with a hint of industrial flair.*](images/jpg/movement_anomaly_ini.jpg){fig-align="center"} +![*DALL·E 3 Prompt: 1950s style cartoon illustration depicting a movement research room. In the center of the room, there's a simulated container used for transporting goods on trucks, boats, and forklifts. The container is detailed with rivets and markings typical of industrial cargo boxes. Around the container, the room is filled with vintage equipment, including an oscilloscope, various sensor arrays, and large paper rolls of recorded data. The walls are adorned with educational posters about transportation safety and logistics. The overall ambiance of the room is nostalgic and scientific, with a hint of industrial flair.*](images/jpg/movement_anomaly_ini.jpg) ## Introduction @@ -29,11 +29,11 @@ By the end of this tutorial, you'll have a working prototype that can classify d For this project, we will use an accelerometer. As discussed in the Hands-On Tutorial, *Setup Nicla Vision*, the Nicla Vision Board has an onboard **6-axis IMU:** 3D gyroscope and 3D accelerometer, the [LSM6DSOX](https://www.st.com/resource/en/datasheet/lsm6dsox.pdf). Let's verify if the [LSM6DSOX IMU library](https://github.com/arduino-libraries/Arduino_LSM6DSOX) is installed. If not, install it. -![](images/jpg/imu_ide.jpg){fig-align="center" width="6.5in"} +![](images/jpg/imu_ide.jpg) Next, go to `Examples > Arduino_LSM6DSOX > SimpleAccelerometer` and run the accelerometer test. You can check if it works by opening the IDE Serial Monitor or Plotter. The values are in g (earth gravity), with a default range of +/- 4g: -![](images/jpg/imu_test.jpg){fig-align="center" width="6.5in"} +![](images/jpg/imu_test.jpg) ### Defining the Sampling frequency: @@ -106,7 +106,7 @@ void loop() { Uploading the sketch and inspecting the Serial Monitor, we can see that we are capturing 50 samples per second. -![](images/jpg/sampling.jpg){fig-align="center" width="6.5in"} +![](images/jpg/sampling.jpg) > Note that with the Nicla board resting on a table (with the camera facing down), the z-axis measures around 9.8m/s$^2$, the expected earth acceleration. @@ -119,11 +119,11 @@ We will simulate container (or better package) transportation through different 3. Vertical Movement via Fork-**Lift** 4. Stationary **(Idle**) period in a Warehouse -![](images/jpg/classes.jpg){fig-align="center" width="6.5in"} +![](images/jpg/classes.jpg) From the above images, we can define for our simulation that primarily horizontal movements (x or y axis) should be associated with the "Terrestrial class," Vertical movements (z-axis) with the "Lift Class," no activity with the "Idle class," and movement on all three axes to [Maritime class.](https://www.containerhandbuch.de/chb_e/stra/index.html?/chb_e/stra/stra_02_03_03.htm) -![](images/jpg/classes_mov_def.jpg){fig-align="center" width="6.5in"} +![](images/jpg/classes_mov_def.jpg) ## Data Collection @@ -146,7 +146,7 @@ Please create a new project on the Edge Impulse Studio (EIS) and connect the Nic 2. Upload a sketch for data capture (the one discussed previously in this tutorial). 3. Use the [CLI Data Forwarder](https://docs.edgeimpulse.com/docs/edge-impulse-cli/cli-data-forwarder) to capture data from the Nicla's accelerometer and send it to the Studio, as shown in this diagram: -![](images/jpg/data-forw.jpg){fig-align="center" width="5.25in"} +![](images/jpg/data-forw.jpg) Start the [CLI Data Forwarder](https://docs.edgeimpulse.com/docs/edge-impulse-cli/cli-data-forwarder) on your terminal, entering (if it is the first time) the following command: @@ -156,11 +156,11 @@ $ edge-impulse-data-forwarder --clean Next, enter your EI credentials and choose your project, variables (for example, *accX,* *accY*, and *accZ*), and device name (for example, *NiclaV*: -![](images/jpg/term.jpg){fig-align="center" width="6.5in"} +![](images/jpg/term.jpg) Go to the `Devices` section on your EI Project and verify if the device is connected (the dot should be green): -![](images/jpg/device.jpg){fig-align="center" width="6.5in"} +![](images/jpg/device.jpg) > You can clone the project developed for this hands-on: [NICLA Vision Movement Classification](https://studio.edgeimpulse.com/public/302078/latest). @@ -168,11 +168,11 @@ Go to the `Devices` section on your EI Project and verify if the device is conne On the `Data Acquisition` section, you should see that your board `[NiclaV]` is connected. The sensor is available: `[sensor with 3 axes (accX, accY, accZ)]` with a sampling frequency of `[50Hz]`. The Studio suggests a sample length of `[10000]` ms (10s). The last thing left is defining the sample label. Let's start with`[terrestrial]`: -![](images/jpg/collect_data.jpg){fig-align="center" width="5.64in"} +![](images/jpg/collect_data.jpg) **Terrestrial** (palettes in a Truck or Train), moving horizontally. Press `[Start Sample]`and move your device horizontally, keeping one direction over your table. After 10 s, your data will be uploaded to the studio. Here is how the sample was collected: -![](images/jpg/terrestrial_result.jpg){fig-align="center" width="6.5in"} +![](images/jpg/terrestrial_result.jpg) As expected, the movement was captured mainly in the Y-axis (green). In the blue, we see the Z axis, around -10 m/s$^2$ (the Nicla has the camera facing up). @@ -180,23 +180,23 @@ As discussed before, we should capture data from all four Transportation Classes **Maritime** (pallets in boats into an angry ocean). The movement is captured on all three axes: -![](images/jpg/maritime_result.jpg){fig-align="center" width="6.5in"} +![](images/jpg/maritime_result.jpg) **Lift** (Palettes being handled vertically by a Forklift). Movement captured only in the Z-axis: -![](images/jpg/lift_result.jpg){fig-align="center" width="6.5in"} +![](images/jpg/lift_result.jpg) **Idle** (Paletts in a warehouse). No movement detected by the accelerometer: -![](images/jpg/idle_result.jpg){fig-align="center" width="6.5in"} +![](images/jpg/idle_result.jpg) You can capture, for example, 2 minutes (twelve samples of 10 seconds) for each of the four classes (a total of 8 minutes of data). Using the `three dots` menu after each one of the samples, select 2 of them, reserving them for the Test set. Alternatively, you can use the automatic `Train/Test Split tool` on the `Danger Zone` of `Dashboard` tab. Below, you can see the resulting dataset: -![](images/jpg/dataset.jpg){fig-align="center" width="6.5in"} +![](images/jpg/dataset.jpg) Once you have captured your dataset, you can explore it in more detail using the [Data Explorer](https://docs.edgeimpulse.com/docs/edge-impulse-studio/data-acquisition/data-explorer), a visual tool to find outliers or mislabeled data (helping to correct them). The data explorer first tries to extract meaningful features from your data (by applying signal processing and neural network embeddings) and then uses a dimensionality reduction algorithm such as [PCA](https://en.wikipedia.org/wiki/Principal_component_analysis) or [t-SNE](https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding) to map these features to a 2D space. This gives you a one-look overview of your complete dataset. -![](images/jpg/data_explorer.jpg){fig-align="center" width="6.5in"} +![](images/jpg/data_explorer.jpg) In our case, the dataset seems OK (good separation). But the PCA shows we can have issues between maritime (green) and lift (orange). This is expected, once on a boat, sometimes the movement can be only "vertical". @@ -204,15 +204,15 @@ In our case, the dataset seems OK (good separation). But the PCA shows we can ha The next step is the definition of our Impulse, which takes the raw data and uses signal processing to extract features, passing them as the input tensor of a *learning block* to classify new data. Go to `Impulse Design` and `Create Impulse`. The Studio will suggest the basic design. Let's also add a second *Learning Block* for `Anomaly Detection`. -![](images/jpg/impulse.jpg){fig-align="center" width="6.5in"} +![](images/jpg/impulse.jpg) This second model uses a K-means model. If we imagine that we could have our known classes as clusters, any sample that could not fit on that could be an outlier, an anomaly such as a container rolling out of a ship on the ocean or falling from a Forklift. -![](images/jpg/anomaly_detect.jpg){fig-align="center" width="6.5in"} +![](images/jpg/anomaly_detect.jpg) The sampling frequency should be automatically captured, if not, enter it: `[50]`Hz. The Studio suggests a *Window Size* of 2 seconds (`[2000]` ms) with a *sliding window* of `[20]`ms. What we are defining in this step is that we will pre-process the captured data (Time-Seres data), creating a tabular dataset features) that will be the input for a Neural Networks Classifier (DNN) and an Anomaly Detection model (K-Means), as shown below: -![](images/jpg/impulse-block.jpg){fig-align="center" width="6.5in"} +![](images/jpg/impulse-block.jpg) Let's dig into those steps and parameters to understand better what we are doing here. @@ -224,7 +224,7 @@ Raw accelerometer data can be noisy and contain errors or irrelevant information > With a sampling rate (SR) of 50Hz and a window size of 2 seconds, we will get 100 samples per axis, or 300 in total (3 axis x 2 seconds x 50 samples). We will slide this window every 200ms, creating a larger dataset where each instance has 300 raw features. -![](images/jpg/pre-process.jpg){fig-align="center" width="6.5in"} +![](images/jpg/pre-process.jpg) Once the data is preprocessed and segmented, you can extract features that describe the motion's characteristics. Some typical features extracted from accelerometer data include: @@ -262,7 +262,7 @@ So, for an FFT length of 32 points, the resulting output of the Spectral Analysi Once we understand what the pre-processing does, it is time to finish the job. So, let's take the raw data (time-series type) and convert it to tabular data. For that, go to the `Spectral Features` section on the `Parameters` tab, define the main parameters as discussed in the previous section (`[FFT]` with `[32]` points), and select`[Save Parameters]`: -![](images/jpg/Parameters_definition.jpg){fig-align="center" width="6.5in"} +![](images/jpg/Parameters_definition.jpg) At the top menu, select the `Generate Features` option and the `Generate Features` button. Each 2-second window data will be converted into one data point of 63 features. @@ -270,21 +270,21 @@ At the top menu, select the `Generate Features` option and the `Generate Feature The visualization makes it possible to verify that after the feature generation, the classes present keep their excellent separation, which indicates that the classifier should work well. Optionally, you can analyze how important each one of the features is for one class compared with others. -![](images/jpg/feature_generation.jpg){fig-align="center" width="6.5in"} +![](images/jpg/feature_generation.jpg) ## Models Training Our classifier will be a Dense Neural Network (DNN) that will have 63 neurons on its input layer, two hidden layers with 20 and 10 neurons, and an output layer with four neurons (one per each class), as shown here: -![](images/jpg/model.jpg){fig-align="center" width="6.5in"} +![](images/jpg/model.jpg) As hyperparameters, we will use a Learning Rate of `[0.005]`, a Batch size of `[32]`, and `[20]`% of data for validation for `[30]` epochs. After training, we can see that the accuracy is 98.5%. The cost of memory and latency is meager. -![](images/jpg/train.jpg){fig-align="center" width="6.5in"} +![](images/jpg/train.jpg) For Anomaly Detection, we will choose the suggested features that are precisely the most important ones in the Feature Extraction, plus the accZ RMS. The number of clusters will be `[32]`, as suggested by the Studio: -![](images/jpg/anom_det_train.jpg){fig-align="center" width="6.5in"} +![](images/jpg/anom_det_train.jpg) ## Testing @@ -292,7 +292,7 @@ We can verify how our model will behave with unknown data using 20% of the data The default minimum threshold for a considered uncertain result is `[0.6]` for classification and `[0.3]` for anomaly. Once we have four classes (their output sum should be 1.0), you can also set up a lower threshold for a class to be considered valid (for example, 0.4). You can `Set confidence thresholds` on the `three dots` menu, besides the `Classy all` button. -![](images/jpg/model_testing.jpg){fig-align="center" width="6.5in"} +![](images/jpg/model_testing.jpg) You can also perform Live Classification with your device (which should still be connected to the Studio). @@ -302,7 +302,7 @@ You can also perform Live Classification with your device (which should still be It is time to deploy the preprocessing block and the trained model to the Nicla. The Studio will package all the needed libraries, preprocessing functions, and trained models, downloading them to your computer. You should select the option `Arduino Library`, and at the bottom, you can choose `Quantized (Int8)` or `Unoptimized (float32)` and `[Build]`. A Zip file will be created and downloaded to your computer. -![](images/jpg/deploy.jpg){fig-align="center" width="6.5in"} +![](images/jpg/deploy.jpg) On your Arduino IDE, go to the `Sketch` tab, select `Add.ZIP Library`, and Choose the.zip file downloaded by the Studio. A message will appear in the IDE Terminal: `Library installed`. @@ -312,7 +312,7 @@ Now, it is time for a real test. We will make inferences wholly disconnected fro In your Arduino IDE, go to the `File/Examples` tab and look for your project, and on examples, select `Nicla_vision_fusion`: -![](images/jpg/inference.jpg){fig-align="center" width="6.5in"} +![](images/jpg/inference.jpg) Note that the code created by Edge Impulse considers a *sensor fusion* approach where the IMU (Accelerometer and Gyroscope) and the ToF are used. At the beginning of the code, you have the libraries related to our project, IMU and ToF: @@ -333,17 +333,17 @@ Now you should try different movements with your board (similar to those done du - **Idle and lift classes:** -![](images/jpg/inference_1.jpg){fig-align="center" width="6.5in"} +![](images/jpg/inference_1.jpg) - **maritime and terrestrial:** -![](images/jpg/inference_2.jpg){fig-align="center" width="6.5in"} +![](images/jpg/inference_2.jpg) Note that in all situations above, the value of the `anomaly score` was smaller than 0.0. Try a new movement that was not part of the original dataset, for example, "rolling" the Nicla, facing the camera upside-down, as a container falling from a boat or even a boat accident: - **anomaly detection:** -![](images/jpg/anomaly-boat.jpg){fig-align="center" width="6.5in"} +![](images/jpg/anomaly-boat.jpg) In this case, the anomaly is much bigger, over 1.00 @@ -407,7 +407,7 @@ Before we finish, consider that Movement Classification and Object Detection can For real applications, as some described before, we can add a case to our device, and Eoin Jordan, from Edge Impulse, developed a great wearable and machine health case for the Nicla range of boards. It works with a 10mm magnet, 2M screws, and a 16mm strap for human and machine health use case scenarios. Here is the link: [Arduino Nicla Voice and Vision Wearable Case](https://www.thingiverse.com/thing:5923305). -![](images/jpg/case.jpg){fig-align="center" width="6.5in"} +![](images/jpg/case.jpg) The applications for motion classification and anomaly detection are extensive, and the Arduino Nicla Vision is well-suited for scenarios where low power consumption and edge processing are advantageous. Its small form factor and efficiency in processing make it an ideal choice for deploying portable and remote applications where real-time processing is crucial and connectivity may be limited. diff --git a/contents/labs/arduino/nicla_vision/nicla_vision.qmd b/contents/labs/arduino/nicla_vision/nicla_vision.qmd index d1d72132e..c19e7b118 100644 --- a/contents/labs/arduino/nicla_vision/nicla_vision.qmd +++ b/contents/labs/arduino/nicla_vision/nicla_vision.qmd @@ -2,7 +2,7 @@ These labs provide a unique opportunity to gain practical experience with machine learning (ML) systems. Unlike working with large models requiring data center-scale resources, these exercises allow you to directly interact with hardware and software using TinyML. This hands-on approach gives you a tangible understanding of the challenges and opportunities in deploying AI, albeit at a tiny scale. However, the principles are largely the same as what you would encounter when working with larger systems. -![Nicla Vision. Credit: Arduino](./images/jpg/nicla_vision_quarter.jpeg) +![Nicla Vision. Source: Arduino](./images/jpg/nicla_vision_quarter.jpeg) ## Pre-requisites @@ -17,7 +17,7 @@ These labs provide a unique opportunity to gain practical experience with machin ## Exercises | **Modality** | **Task** | **Description** | **Link** | -|--------------|--------------|-----------------|----------| +|:--------------|:--------------|:-----------------|:----------| | Vision | Image Classification | Learn to classify images | [Link](./image_classification/image_classification.qmd) | | Vision | Object Detection | Implement object detection | [Link](./object_detection/object_detection.qmd) | | Sound | Keyword Spotting | Explore voice recognition systems | [Link](./kws/kws.qmd) | diff --git a/contents/labs/arduino/nicla_vision/object_detection/object_detection.qmd b/contents/labs/arduino/nicla_vision/object_detection/object_detection.qmd index 4ec9bfad5..e143d048b 100644 --- a/contents/labs/arduino/nicla_vision/object_detection/object_detection.qmd +++ b/contents/labs/arduino/nicla_vision/object_detection/object_detection.qmd @@ -4,27 +4,27 @@ bibliography: object_detection.bib # Object Detection {.unnumbered} -![*DALL·E 3 Prompt: Cartoon in the style of the 1940s or 1950s showcasing a spacious industrial warehouse interior. A conveyor belt is prominently featured, carrying a mixture of toy wheels and boxes. The wheels are distinguishable with their bright yellow centers and black tires. The boxes are white cubes painted with alternating black and white patterns. At the end of the moving conveyor stands a retro-styled robot, equipped with tools and sensors, diligently classifying and counting the arriving wheels and boxes. The overall aesthetic is reminiscent of mid-century animation with bold lines and a classic color palette.*](images/jpg/obj_det_ini.jpg){fig-align="center" width="6.5in"} +![*DALL·E 3 Prompt: Cartoon in the style of the 1940s or 1950s showcasing a spacious industrial warehouse interior. A conveyor belt is prominently featured, carrying a mixture of toy wheels and boxes. The wheels are distinguishable with their bright yellow centers and black tires. The boxes are white cubes painted with alternating black and white patterns. At the end of the moving conveyor stands a retro-styled robot, equipped with tools and sensors, diligently classifying and counting the arriving wheels and boxes. The overall aesthetic is reminiscent of mid-century animation with bold lines and a classic color palette.*](images/jpg/obj_det_ini.jpg) ## Introduction This is a continuation of **CV on Nicla Vision**, now exploring **Object Detection** on microcontrollers. -![](images/jpg/cv_obj_detect.jpg){fig-align="center" width="6.5in"} +![](images/jpg/cv_obj_detect.jpg) ### Object Detection versus Image Classification The main task with Image Classification models is to produce a list of the most probable object categories present on an image, for example, to identify a tabby cat just after his dinner: -![](images/png/img_1.png){fig-align="center"} +![](images/png/img_1.png) But what happens when the cat jumps near the wine glass? The model still only recognizes the predominant category on the image, the tabby cat: -![](images/png/img_2.png){fig-align="center"} +![](images/png/img_2.png) And what happens if there is not a dominant category on the image? -![](images/png/img_3.png){fig-align="center"} +![](images/png/img_3.png) The model identifies the above image completely wrong as an "ashcan," possibly due to the color tonalities. @@ -34,7 +34,7 @@ To solve this issue, we need another type of model, where not only **multiple ca As we can imagine, such models are much more complicated and bigger, for example, the **MobileNetV2 SSD FPN-Lite 320x320, trained with the COCO dataset.** This pre-trained object detection model is designed to locate up to 10 objects within an image, outputting a bounding box for each object detected. The below image is the result of such a model running on a Raspberry Pi: -![](images/png/img_4.png){fig-align="center" width="6.5in"} +![](images/png/img_4.png) Those models used for Object detection (such as the MobileNet SSD or YOLO) usually have several MB in size, which is OK for use with Raspberry Pi but unsuitable for use with embedded devices, where the RAM usually is lower than 1M Bytes. @@ -48,7 +48,7 @@ In this Hands-On exercise, we will explore using FOMO with Object Detection, not All Machine Learning projects need to start with a detailed goal. Let's assume we are in an industrial facility and must sort and count **wheels** and special **boxes**. -![](images/jpg/proj_goal.jpg){fig-align="center" width="6.5in"} +![](images/jpg/proj_goal.jpg) In other words, we should perform a multi-label classification, where each image can have three classes: @@ -60,7 +60,7 @@ In other words, we should perform a multi-label classification, where each image Here are some not labeled image samples that we should use to detect the objects (wheels and boxes): -![](images/jpg/samples.jpg){fig-align="center" width="6.5in"} +![](images/jpg/samples.jpg) We are interested in which object is in the image, its location (centroid), and how many we can find on it. The object's size is not detected with FOMO, as with MobileNet SSD or YOLO, where the Bounding Box is one of the model outputs. @@ -74,7 +74,7 @@ We can use the Edge Impulse Studio, the OpenMV IDE, your phone, or other devices First, create in your computer a folder where your data will be saved, for example, "data." Next, on the OpenMV IDE, go to Tools \> Dataset Editor and select New Dataset to start the dataset collection: -![](images/jpg/data_folder.jpg){fig-align="center" width="6.5in"} +![](images/jpg/data_folder.jpg) Edge impulse suggests that the objects should be of similar size and not overlapping for better performance. This is OK in an industrial facility, where the camera should be fixed, keeping the same distance from the objects to be detected. Despite that, we will also try with mixed sizes and positions to see the result. @@ -82,7 +82,7 @@ Edge impulse suggests that the objects should be of similar size and not overlap Connect the Nicla Vision to the OpenMV IDE and run the `dataset_capture_script.py`. Clicking on the Capture Image button will start capturing images: -![](images/jpg/img_5.jpg){fig-align="center" width="6.5in"} +![](images/jpg/img_5.jpg) We suggest around 50 images mixing the objects and varying the number of each appearing on the scene. Try to capture different angles, backgrounds, and light conditions. @@ -96,23 +96,23 @@ After capturing your dataset, close the Dataset Editor Tool on the `Tools > Data Go to [Edge Impulse Studio,](https://www.edgeimpulse.com/) enter your credentials at **Login** (or create an account), and start a new project. -![](images/png/img_6.png){fig-align="center" width="6.5in"} +![](images/png/img_6.png) > Here, you can clone the project developed for this hands-on: [NICLA_Vision_Object_Detection](https://studio.edgeimpulse.com/public/292737/latest). On your Project Dashboard, go down and on **Project info** and select **Bounding boxes (object detection)** and Nicla Vision as your Target Device: -![](images/png/img_7.png){fig-align="center" width="6.5in"} +![](images/png/img_7.png) ### Uploading the unlabeled data On Studio, go to the `Data acquisition` tab, and on the `UPLOAD DATA` section, upload from your computer files captured. -![](images/png/img_8.png){fig-align="center" width="6.5in"} +![](images/png/img_8.png) > You can leave for the Studio to split your data automatically between Train and Test or do it manually. -![](images/png/img_9.png){fig-align="center" width="6.5in"} +![](images/png/img_9.png) All the not labeled images (51) were uploaded but they still need to be labeled appropriately before using them as a dataset in the project. The Studio has a tool for that purpose, which you can find in the link `Labeling queue (51)`. @@ -131,19 +131,19 @@ Ordinary objects can quickly be identified and labeled using an existing library Starting with the first image of your unlabeled data, use your mouse to drag a box around an object to add a label. Then click **Save labels** to advance to the next item. -![](images/png/img_10.png){fig-align="center" width="6.5in"} +![](images/png/img_10.png) Continue with this process until the queue is empty. At the end, all images should have the objects labeled as those samples below: -![](images/jpg/img_11.jpg){fig-align="center" width="6.5in"} +![](images/jpg/img_11.jpg) Next, review the labeled samples on the `Data acquisition` tab. If one of the labels was wrong, you can edit it using the *`three dots`* menu after the sample name: -![](images/png/img_12.png){fig-align="center" width="6.5in"} +![](images/png/img_12.png) You will be guided to replace the wrong label, correcting the dataset. -![](images/jpg/img_13.jpg){fig-align="center" width="6.5in"} +![](images/jpg/img_13.jpg) ## The Impulse Design @@ -153,17 +153,17 @@ In this phase, you should define how to: - **Design a Model,** in this case, "Object Detection." -![](images/png/img_14.png){fig-align="center" width="6.5in"} +![](images/png/img_14.png) ### Preprocessing all dataset In this section, select **Color depth** as `Grayscale`, which is suitable for use with FOMO models and Save `parameters`. -![](images/png/img_15.png){fig-align="center" width="6.5in"} +![](images/png/img_15.png) The Studio moves automatically to the next section, `Generate features`, where all samples will be pre-processed, resulting in a dataset with individual 96x96x1 images or 9,216 features. -![](images/png/img_16.png){fig-align="center" width="6.5in"} +![](images/png/img_16.png) The feature explorer shows that all samples evidence a good separation after the feature generation. @@ -179,11 +179,11 @@ FOMO is an innovative machine learning model for object detection, which can use FOMO takes the image in grayscale and divides it into blocks of pixels using a factor of 8. For the input of 96x96, the grid would be 12x12 (96/8=12). Next, FOMO will run a classifier through each pixel block to calculate the probability that there is a box or a wheel in each of them and, subsequently, determine the regions which have the highest probability of containing the object (If a pixel block has no objects, it will be classified as *background*). From the overlap of the final region, the FOMO provides the coordinates (related to the image dimensions) of the centroid of this region. -![](images/png/img_17.png){fig-align="center" width="6.5in"} +![](images/png/img_17.png) For training, we should select a pre-trained model. Let's use the **`FOMO (Faster Objects, More Objects) MobileNetV2 0.35`\`.** This model uses around 250KB RAM and 80KB of ROM (Flash), which suits well with our board since it has 1MB of RAM and ROM. -![](images/png/img_18.png){fig-align="center" width="6.5in"} +![](images/png/img_18.png) Regarding the training hyper-parameters, the model will be trained with: @@ -197,7 +197,7 @@ As a result, the model ends with practically 1.00 in the F1 score, with a simila > Note that FOMO automatically added a 3rd label background to the two previously defined (*box* and *wheel*). -![](images/png/img_19.png){fig-align="center" width="6.5in"} +![](images/png/img_19.png) > In object detection tasks, accuracy is generally not the primary [evaluation metric](https://learnopencv.com/mean-average-precision-map-object-detection-model-evaluation-metric/). Object detection involves classifying objects and providing bounding boxes around them, making it a more complex problem than simple classification. The issue is that we do not have the bounding box, only the centroids. In short, using accuracy as a metric could be misleading and may not provide a complete understanding of how well the model is performing. Because of that, we will use the F1 score. @@ -209,7 +209,7 @@ Since Edge Impulse officially supports the Nicla Vision, let's connect it to the - Open the zip file on your computer and select the uploader related to your OS: -![](images/png/image17.png){fig-align="center"} +![](images/png/image17.png) - Put the Nicla-Vision on Boot Mode, pressing the reset button twice. @@ -217,11 +217,11 @@ Since Edge Impulse officially supports the Nicla Vision, let's connect it to the Go to `Live classification` section at EI Studio, and using *webUSB,* connect your Nicla Vision: -![](images/png/img_20.png){fig-align="center" width="6.5in"} +![](images/png/img_20.png) Once connected, you can use the Nicla to capture actual images to be tested by the trained model on Edge Impulse Studio. -![](images/png/img_21.png){fig-align="center" width="6.5in"} +![](images/png/img_21.png) One thing to be noted is that the model can produce false positives and negatives. This can be minimized by defining a proper `Confidence Threshold` (use the `Three dots` menu for the set-up). Try with 0.8 or more. @@ -229,19 +229,19 @@ One thing to be noted is that the model can produce false positives and negative Select OpenMV Firmware on the Deploy Tab and press \[Build\]. -![](images/png/img_22.png){fig-align="center" width="6.5in"} +![](images/png/img_22.png) When you try to connect the Nicla with the OpenMV IDE again, it will try to update its FW. Choose the option `Load a specific firmware` instead. -![](images/png/img_24.png){fig-align="center"} +![](images/png/img_24.png) You will find a ZIP file on your computer from the Studio. Open it: -![](images/png/img_23.png){fig-align="center" width="6.5in"} +![](images/png/img_23.png) Load the .bin file to your board: -![](images/png/img_25.png){fig-align="center" width="6.5in"} +![](images/png/img_25.png) After the download is finished, a pop-up message will be displayed. `Press OK`, and open the script **ei_object_detection.py** downloaded from the Studio. @@ -290,17 +290,17 @@ colors = [ # Add more colors if you are detecting more than 7 types of classes a Keep the remaining code as it is and press the `green Play button` to run the code: -![](images/png/img_26.png){fig-align="center" width="6.5in"} +![](images/png/img_26.png) On the camera view, we can see the objects with their centroids marked with 12 pixel-fixed circles (each circle has a distinct color, depending on its class). On the Serial Terminal, the model shows the labels detected and their position on the image window (240X240). > Be ware that the coordinate origin is in the upper left corner. -![](images/jpg/img_27.jpg){fig-align="center" width="624"} +![](images/jpg/img_27.jpg) Note that the frames per second rate is around 8 fps (similar to what we got with the Image Classification project). This happens because FOMO is cleverly built over a CNN model, not with an object detection model like the SSD MobileNet. For example, when running a MobileNetV2 SSD FPN-Lite 320x320 model on a Raspberry Pi 4, the latency is around 5 times higher (around 1.5 fps) -Here is a short video showing the inference results: {{< video width="480" height="270" center >}} +Here is a short video showing the inference results: {{< video https://youtu.be/JbpoqRp3BbM >}} ## Conclusion @@ -310,7 +310,7 @@ FOMO is a significant leap in the image processing space, as Louis Moreau and Ma Multiple possibilities exist for exploring object detection (and, more precisely, counting them) on embedded devices, for example, to explore the Nicla doing sensor fusion (camera + microphone) and object detection. This can be very useful on projects involving bees, for example. -![](images/jpg/img_28.jpg){fig-align="center" width="624"} +![](images/jpg/img_28.jpg) ## Resources diff --git a/contents/labs/arduino/nicla_vision/setup/setup.qmd b/contents/labs/arduino/nicla_vision/setup/setup.qmd index 2c5514123..096daa9ca 100644 --- a/contents/labs/arduino/nicla_vision/setup/setup.qmd +++ b/contents/labs/arduino/nicla_vision/setup/setup.qmd @@ -4,13 +4,13 @@ bibliography: setup.bib # Setup {.unnumbered} -![*DALL·E 3 Prompt: Illustration reminiscent of a 1950s cartoon where the Arduino NICLA VISION board, equipped with a variety of sensors including a camera, is the focal point on an old-fashioned desk. In the background, a computer screen with rounded edges displays the Arduino IDE. The code seen is related to LED configurations and machine learning voice command detection. Outputs on the Serial Monitor explicitly display the words 'yes' and 'no'.*](images/jpg/nicla_sys_ini.jpg){fig-align="center" width="6.5in"} +![*DALL·E 3 Prompt: Illustration reminiscent of a 1950s cartoon where the Arduino NICLA VISION board, equipped with a variety of sensors including a camera, is the focal point on an old-fashioned desk. In the background, a computer screen with rounded edges displays the Arduino IDE. The code seen is related to LED configurations and machine learning voice command detection. Outputs on the Serial Monitor explicitly display the words 'yes' and 'no'.*](images/jpg/nicla_sys_ini.jpg) ## Introduction The [Arduino Nicla Vision](https://docs.arduino.cc/hardware/nicla-vision) (sometimes called *NiclaV*) is a development board that includes two processors that can run tasks in parallel. It is part of a family of development boards with the same form factor but designed for specific tasks, such as the [Nicla Sense ME](https://www.bosch-sensortec.com/software-tools/tools/arduino-nicla-sense-me/) and the [Nicla Voice](https://store-usa.arduino.cc/products/nicla-voice?_gl=1*l3abc6*_ga*MTQ3NzE4Mjk4Mi4xNjQwMDIwOTk5*_ga_NEXN8H46L5*MTY5NjM0Mzk1My4xMDIuMS4xNjk2MzQ0MjQ1LjAuMC4w). The *Niclas* can efficiently run processes created with TensorFlow Lite. For example, one of the cores of the NiclaV runs a computer vision algorithm on the fly (inference), while the other executes low-level operations like controlling a motor and communicating or acting as a user interface. The onboard wireless module allows the management of WiFi and Bluetooth Low Energy (BLE) connectivity simultaneously. -![](images/jpg/image29.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image29.jpg) ## Hardware @@ -26,7 +26,7 @@ The central processor is the dual-core [STM32H747,](https://content.arduino.cc/a - TensorFlow Lite -![](images/jpg/image22.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image22.jpg) ### Memory @@ -46,11 +46,11 @@ Memory is crucial for embedded machine learning projects. The NiclaV board can h Start connecting the board (*microUSB*) to your computer: -![](images/jpg/image14.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image14.jpg) Install the Mbed OS core for Nicla boards in the Arduino IDE. Having the IDE open, navigate to `Tools > Board > Board Manager`, look for Arduino Nicla Vision on the search window, and install the board. -![](images/jpg/image2.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image2.jpg) Next, go to `Tools > Board > Arduino Mbed OS Nicla Boards` and select `Arduino Nicla Vision`. Having your board connected to the USB, you should see the Nicla on Port and select it. @@ -60,7 +60,7 @@ Next, go to `Tools > Board > Arduino Mbed OS Nicla Boards` and select `Arduino N On Arduino IDE, go to `Examples > PDM > PDMSerialPlotter`, open and run the sketch. Open the Plotter and see the audio representation from the microphone: -![](images/png/image9.png){fig-align="center" width="6.5in"} +![](images/png/image9.png) > Vary the frequency of the sound you generate and confirm that the mic is working correctly. @@ -68,25 +68,25 @@ On Arduino IDE, go to `Examples > PDM > PDMSerialPlotter`, open and run the sket Before testing the IMU, it will be necessary to install the LSM6DSOX library. For that, go to Library Manager and look for LSM6DSOX. Install the library provided by Arduino: -![](images/jpg/image19.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image19.jpg) Next, go to `Examples > Arduino_LSM6DSOX > SimpleAccelerometer` and run the accelerometer test (you can also run Gyro and board temperature): -![](images/png/image28.png){fig-align="center" width="6.5in"} +![](images/png/image28.png) ### Testing the ToF (Time of Flight) Sensor As we did with IMU, it is necessary to install the VL53L1X ToF library. For that, go to Library Manager and look for VL53L1X. Install the library provided by Pololu: -![](images/jpg/image15.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image15.jpg) Next, run the sketch [proximity_detection.ino](https://github.com/Mjrovai/Arduino_Nicla_Vision/blob/main/Arduino-IDE/proximity_detection/proximity_detection.ino): -![](images/png/image12.png){fig-align="center" width="6.5in"} +![](images/png/image12.png) On the Serial Monitor, you will see the distance from the camera to an object in front of it (max of 4m). -![](images/jpg/image13.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image13.jpg) ### Testing the Camera @@ -100,11 +100,11 @@ OpenMV IDE is the premier integrated development environment with OpenMV Cameras Go to the [OpenMV IDE page](https://openmv.io/pages/download), download the correct version for your Operating System, and follow the instructions for its installation on your computer. -![](images/png/image21.png){fig-align="center" width="6.5in"} +![](images/png/image21.png) The IDE should open, defaulting to the helloworld_1.py code on its Code Area. If not, you can open it from `Files > Examples > HelloWord > helloword.py` -![](images/png/image7.png){fig-align="center" width="6.5in"} +![](images/png/image7.png) Any messages sent through a serial connection (using print() or error messages) will be displayed on the **Serial Terminal** during run time. The image captured by a camera will be displayed in the **Camera Viewer** Area (or Frame Buffer) and in the Histogram area, immediately below the Camera Viewer. @@ -112,33 +112,33 @@ Any messages sent through a serial connection (using print() or error messages) After updating the bootloader, put the Nicla Vision in bootloader mode by double-pressing the reset button on the board. The built-in green LED will start fading in and out. Now return to the OpenMV IDE and click on the connect icon (Left ToolBar): -![](images/jpg/image23.jpg){fig-align="center" width="4.010416666666667in"} +![](images/jpg/image23.jpg) A pop-up will tell you that a board in DFU mode was detected and ask how you would like to proceed. First, select `Install the latest release firmware (vX.Y.Z)`. This action will install the latest OpenMV firmware on the Nicla Vision. -![](images/png/image10.png){fig-align="center" width="6.5in"} +![](images/png/image10.png) You can leave the option `Erase internal file system` unselected and click `[OK]`. Nicla's green LED will start flashing while the OpenMV firmware is uploaded to the board, and a terminal window will then open, showing the flashing progress. -![](images/png/image5.png){fig-align="center" width="4.854166666666667in"} +![](images/png/image5.png) Wait until the green LED stops flashing and fading. When the process ends, you will see a message saying, "DFU firmware update complete!". Press `[OK]`. -![](images/png/image1.png){fig-align="center" width="3.875in"} +![](images/png/image1.png) A green play button appears when the Nicla Vison connects to the Tool Bar. -![](images/jpg/image18.jpg){fig-align="center" width="4.791666666666667in"} +![](images/jpg/image18.jpg) Also, note that a drive named "NO NAME" will appear on your computer.: -![](images/png/image3.png){fig-align="center" width="6.447916666666667in"} +![](images/png/image3.png) Every time you press the `[RESET]` button on the board, it automatically executes the *main.py* script stored on it. You can load the [main.py](https://github.com/Mjrovai/Arduino_Nicla_Vision/blob/main/Micropython/main.py) code on the IDE (`File > Open File...`). -![](images/png/image16.png){fig-align="center" width="4.239583333333333in"} +![](images/png/image16.png) > This code is the "Blink" code, confirming that the HW is OK. @@ -146,7 +146,7 @@ For testing the camera, let's run *helloword_1.py*. For that, select the script When clicking the green play button, the MicroPython script (*hellowolrd.py*) on the Code Area will be uploaded and run on the Nicla Vision. On-Camera Viewer, you will start to see the video streaming. The Serial Monitor will show us the FPS (Frames per second), which should be around 14fps. -![](images/png/image6.png){fig-align="center" width="6.5in"} +![](images/png/image6.png) Here is the [helloworld.py](http://helloworld.py/) script: @@ -193,7 +193,7 @@ Edge Impulse officially supports the Nicla Vision. So, for starting, please crea - Open the zip file on your computer and select the uploader corresponding to your OS: -![](images/png/image17.png){fig-align="center" width="4.416666666666667in"} +![](images/png/image17.png) - Put the Nicla-Vision on Boot Mode, pressing the reset button twice. @@ -201,19 +201,19 @@ Edge Impulse officially supports the Nicla Vision. So, for starting, please crea Go to your project on the Studio, and on the `Data Acquisition tab`, select `WebUSB` (1). A window will pop up; choose the option that shows that the `Nicla is paired` (2) and press `[Connect]` (3). -![](images/png/image27.png){fig-align="center" width="6.5in"} +![](images/png/image27.png) In the *Collect Data* section on the `Data Acquisition` tab, you can choose which sensor data to pick. -![](images/png/image25.png){fig-align="center" width="6.5in"} +![](images/png/image25.png) For example. `IMU data`: -![](images/png/image8.png){fig-align="center" width="6.5in"} +![](images/png/image8.png) Or Image (`Camera`): -![](images/png/image4.png){fig-align="center" width="6.5in"} +![](images/png/image4.png) And so on. You can also test an external sensor connected to the `ADC` (Nicla pin 0) and the other onboard sensors, such as the microphone and the ToF. @@ -225,15 +225,15 @@ The shield has 14 Grove connectors: five single analog inputs (A0-A5), one doubl > Note that all 17 Nicla Vision pins will be connected to the Shield Groves, but some Grove connections remain disconnected. -![](images/jpg/image20.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image20.jpg) This shield is MKR compatible and can be used with the Nicla Vision and Portenta. -![](images/jpg/image26.jpg){fig-align="center" width="4.34375in"} +![](images/jpg/image26.jpg) For example, suppose that on a TinyML project, you want to send inference results using a LoRaWAN device and add information about local luminosity. Often, with offline operations, a local low-power display such as an OLED is advised. This setup can be seen here: -![](images/jpg/image11.jpg){fig-align="center" width="6.5in"} +![](images/jpg/image11.jpg) The [Grove Light Sensor](https://wiki.seeedstudio.com/Grove-Light_Sensor/) would be connected to one of the single Analog pins (A0/PC4), the [LoRaWAN device](https://wiki.seeedstudio.com/Grove_LoRa_E5_New_Version/) to the UART, and the [OLED](https://arduino.cl/producto/display-oled-grove/) to the I2C connector. @@ -260,7 +260,7 @@ while(True): To verify that the UART is working, you should, for example, connect another device as the Arduino UNO, displaying "Hello Word" on the Serial Monitor. Here is the [code](https://github.com/Mjrovai/Arduino_Nicla_Vision/blob/main/Arduino-IDE/teste_uart_UNO/teste_uart_UNO.ino). -![](images/jpg/image24.jpg){fig-align="center" width="2.8125in"} +![](images/jpg/image24.jpg) Below is the *Hello World code* to be used with the I2C OLED. The MicroPython SSD1306 OLED driver (ssd1306.py), created by Adafruit, should also be uploaded to the Nicla (the ssd1306.py script can be found in [GitHub](https://github.com/Mjrovai/Arduino_Nicla_Vision/blob/main/Micropython/ssd1306.py)). @@ -302,7 +302,7 @@ while (True): The ADC can be used for other sensor variables, such as [Temperature](https://wiki.seeedstudio.com/Grove-Temperature_Sensor_V1.2/). -> Note that the above scripts ([[downloaded from Github]{.underline}](https://github.com/Mjrovai/Arduino_Nicla_Vision/tree/main/Micropython)) introduce only how to connect external devices with the Nicla Vision board using MicroPython. +> Note that the above scripts ([downloaded from Github](https://github.com/Mjrovai/Arduino_Nicla_Vision/tree/main/Micropython)) introduce only how to connect external devices with the Nicla Vision board using MicroPython. ## Conclusion diff --git a/contents/labs/seeed/xiao_esp32s3/image_classification/image_classification.qmd b/contents/labs/seeed/xiao_esp32s3/image_classification/image_classification.qmd index f61270959..7eb7b921d 100644 --- a/contents/labs/seeed/xiao_esp32s3/image_classification/image_classification.qmd +++ b/contents/labs/seeed/xiao_esp32s3/image_classification/image_classification.qmd @@ -1,6 +1,6 @@ # Image Classification {.unnumbered} -![*Image by Marcelo Rovai*](./images/png/ini.png){fig-align="center" width="6.5in"} +![*Image by Marcelo Rovai*](./images/png/ini.png) ## Introduction @@ -214,7 +214,7 @@ Upload the code to your XIAO ESP32S3 Sense, and you should be OK to start classi ## Testing the Model (Inference) -![](./images/png/inf_banana.jpg){fig-align="center" width="6.5in"} +![](./images/png/inf_banana.jpg) Getting a photo with the camera, the classification result will appear on the Serial Monitor: diff --git a/contents/labs/seeed/xiao_esp32s3/kws/kws.qmd b/contents/labs/seeed/xiao_esp32s3/kws/kws.qmd index 5b2feafd1..624108936 100644 --- a/contents/labs/seeed/xiao_esp32s3/kws/kws.qmd +++ b/contents/labs/seeed/xiao_esp32s3/kws/kws.qmd @@ -1,7 +1,7 @@ # Keyword Spotting (KWS) {.unnumbered} -![*Image by Marcelo Rovai*](images/jpeg/kws_ini.jpg){fig-align="center" width="6.5in"} +![*Image by Marcelo Rovai*](images/jpeg/kws_ini.jpg) ## Introduction diff --git a/contents/labs/seeed/xiao_esp32s3/motion_classification/motion_classification.qmd b/contents/labs/seeed/xiao_esp32s3/motion_classification/motion_classification.qmd index 7e3478c33..a5396b67b 100644 --- a/contents/labs/seeed/xiao_esp32s3/motion_classification/motion_classification.qmd +++ b/contents/labs/seeed/xiao_esp32s3/motion_classification/motion_classification.qmd @@ -1,6 +1,6 @@ # Motion Classification and Anomaly Detection {.unnumbered} -![*DALL·E prompt - 1950s style cartoon illustration set in a vintage audio lab. Scientists, dressed in classic attire with white lab coats, are intently analyzing audio data on large chalkboards. The boards display intricate FFT (Fast Fourier Transform) graphs and time-domain curves. Antique audio equipment is scattered around, but the data representations are clear and detailed, indicating their focus on audio analysis.*](./images/jpeg/ini.jpg){fig-align="center" width="6.5in"} +![*DALL·E prompt - 1950s style cartoon illustration set in a vintage audio lab. Scientists, dressed in classic attire with white lab coats, are intently analyzing audio data on large chalkboards. The boards display intricate FFT (Fast Fourier Transform) graphs and time-domain curves. Antique audio equipment is scattered around, but the data representations are clear and detailed, indicating their focus on audio analysis.*](./images/jpeg/ini.jpg) ## Introduction diff --git a/contents/labs/seeed/xiao_esp32s3/object_detection/object_detection.qmd b/contents/labs/seeed/xiao_esp32s3/object_detection/object_detection.qmd index 71fb62a52..7f9ef588c 100644 --- a/contents/labs/seeed/xiao_esp32s3/object_detection/object_detection.qmd +++ b/contents/labs/seeed/xiao_esp32s3/object_detection/object_detection.qmd @@ -1,6 +1,6 @@ # Object Detection {.unnumbered} -![*DALL·E prompt - Cartoon styled after 1950s animations, showing a detailed board with sensors, particularly a camera, on a table with patterned cloth. Behind the board, a computer with a large back showcases the Arduino IDE. The IDE's content hints at LED pin assignments and machine learning inference for detecting spoken commands. The Serial Monitor, in a distinct window, reveals outputs for the commands 'yes' and 'no'.*](./images/png/obj_detec_ini.png){fig-align="center" width="6.5in"} +![*DALL·E prompt - Cartoon styled after 1950s animations, showing a detailed board with sensors, particularly a camera, on a table with patterned cloth. Behind the board, a computer with a large back showcases the Arduino IDE. The IDE's content hints at LED pin assignments and machine learning inference for detecting spoken commands. The Serial Monitor, in a distinct window, reveals outputs for the commands 'yes' and 'no'.*](./images/png/obj_detec_ini.png) ## Introduction diff --git a/contents/labs/seeed/xiao_esp32s3/setup/setup.qmd b/contents/labs/seeed/xiao_esp32s3/setup/setup.qmd index 8d9b225b1..4cee6bac0 100644 --- a/contents/labs/seeed/xiao_esp32s3/setup/setup.qmd +++ b/contents/labs/seeed/xiao_esp32s3/setup/setup.qmd @@ -1,12 +1,12 @@ # Setup {.unnumbered} -![*DALL·E prompt - 1950s cartoon-style drawing of a XIAO ESP32S3 board with a distinctive camera module, as shown in the image provided. The board is placed on a classic lab table with various sensors, including a microphone. Behind the board, a vintage computer screen displays the Arduino IDE in muted colors, with code focusing on LED pin setups and machine learning inference for voice commands. The Serial Monitor on the IDE showcases outputs detecting voice commands like 'yes' and 'no'. The scene merges the retro charm of mid-century labs with modern electronics.*](./images/jpeg/xiao_setup.jpg){fig-align="center" width="6.5in"} +![*DALL·E prompt - 1950s cartoon-style drawing of a XIAO ESP32S3 board with a distinctive camera module, as shown in the image provided. The board is placed on a classic lab table with various sensors, including a microphone. Behind the board, a vintage computer screen displays the Arduino IDE in muted colors, with code focusing on LED pin setups and machine learning inference for voice commands. The Serial Monitor on the IDE showcases outputs detecting voice commands like 'yes' and 'no'. The scene merges the retro charm of mid-century labs with modern electronics.*](./images/jpeg/xiao_setup.jpg) ## Introduction The [XIAO ESP32S3 Sense](https://www.seeedstudio.com/XIAO-ESP32S3-Sense-p-5639.html) is Seeed Studio's affordable development board, which integrates a camera sensor, digital microphone, and SD card support. Combining embedded ML computing power and photography capability, this development board is a great tool to start with TinyML (intelligent voice and vision AI). -![](./images/png/xiao.png){fig-align="center" width="6.5in"} +![](./images/png/xiao.png) **XIAO ESP32S3 Sense Main Features** @@ -17,11 +17,11 @@ The [XIAO ESP32S3 Sense](https://www.seeedstudio.com/XIAO-ESP32S3-Sense-p-5639.h - **Outstanding RF performance**: Support 2.4GHz Wi-Fi and BLE dual wireless communication, support 100m+ remote communication when connected with U.FL antenna - **Thumb-sized Compact Design**: 21 x 17.5mm, adopting the classic form factor of XIAO, suitable for space-limited projects like wearable devices -![](./images/png/xiao_pins.png){fig-align="center" width="6.5in"} +![](./images/png/xiao_pins.png) Below is the general board pinout: -![](./images/png/xiao_esp32c3_sense_pin-out.png){fig-align="center" width="6.5in"} +![](./images/png/xiao_esp32c3_sense_pin-out.png) > For more details, please refer to the Seeed Studio WiKi page:
> diff --git a/contents/labs/seeed/xiao_esp32s3/xiao_esp32s3.qmd b/contents/labs/seeed/xiao_esp32s3/xiao_esp32s3.qmd index 461b829c4..0c768d8aa 100644 --- a/contents/labs/seeed/xiao_esp32s3/xiao_esp32s3.qmd +++ b/contents/labs/seeed/xiao_esp32s3/xiao_esp32s3.qmd @@ -2,7 +2,7 @@ These labs provide a unique opportunity to gain practical experience with machine learning (ML) systems. Unlike working with large models requiring data center-scale resources, these exercises allow you to directly interact with hardware and software using TinyML. This hands-on approach gives you a tangible understanding of the challenges and opportunities in deploying AI, albeit at a tiny scale. However, the principles are largely the same as what you would encounter when working with larger systems. -![XIAO ESP32S3 Sense. Credit: SEEED Studio](./images/jpeg/xiao_esp32s3_decked.jpeg) +![XIAO ESP32S3 Sense. Source: SEEED Studio](./images/jpeg/xiao_esp32s3_decked.jpeg) ## Pre-requisites diff --git a/contents/labs/shared/dsp_spectral_features_block/dsp_spectral_features_block.qmd b/contents/labs/shared/dsp_spectral_features_block/dsp_spectral_features_block.qmd index f0f5cc767..d97841074 100644 --- a/contents/labs/shared/dsp_spectral_features_block/dsp_spectral_features_block.qmd +++ b/contents/labs/shared/dsp_spectral_features_block/dsp_spectral_features_block.qmd @@ -4,7 +4,7 @@ bibliography: dsp_spectral_features_block.bib # DSP Spectral Features {.unnumbered} -![*DALL·E 3 Prompt: 1950s style cartoon illustration of a Latin male and female scientist in a vibration research room. The man is using a calculus ruler to examine ancient circuitry. The woman is at a computer with complex vibration graphs. The wooden table has boards with sensors, prominently an accelerometer. A classic, rounded-back computer shows the Arduino IDE with code for LED pin assignments and machine learning algorithms for movement detection. The Serial Monitor displays FFT, classification, wavelets, and DSPs. Vintage lamps, tools, and charts with FFT and Wavelets graphs complete the scene.*](images/jpg/dsp_ini.jpg){fig-align="center" width="6.5in"} +![*DALL·E 3 Prompt: 1950s style cartoon illustration of a Latin male and female scientist in a vibration research room. The man is using a calculus ruler to examine ancient circuitry. The woman is at a computer with complex vibration graphs. The wooden table has boards with sensors, prominently an accelerometer. A classic, rounded-back computer shows the Arduino IDE with code for LED pin assignments and machine learning algorithms for movement detection. The Serial Monitor displays FFT, classification, wavelets, and DSPs. Vintage lamps, tools, and charts with FFT and Wavelets graphs complete the scene.*](images/jpg/dsp_ini.jpg) ## Introduction @@ -36,7 +36,7 @@ Let's explore in more detail a typical TinyML Motion Classification project cove ## A TinyML Motion Classification project -![](images/jpg/spectral_block.jpeg){fig-align="center" width="6.5in"} +![](images/jpg/spectral_block.jpeg) In the hands-on project, *Motion Classification and Anomaly Detection*, we simulated mechanical stresses in transport, where our problem was to classify four classes of movement: @@ -47,11 +47,11 @@ In the hands-on project, *Motion Classification and Anomaly Detection*, we simul The accelerometers provided the data on the pallet (or container). -![](images/png/case_study.png){fig-align="center" width="6.5in"} +![](images/png/case_study.png) Below is one sample (raw data) of 10 seconds, captured with a sampling frequency of 50Hz: -![](images/png/data_sample.png){fig-align="center" width="6.5in"} +![](images/png/data_sample.png) > The result is similar when this analysis is done over another dataset with the same principle, using a different sampling frequency, 62.5Hz instead of 50Hz. @@ -61,11 +61,11 @@ The raw data captured by the accelerometer (a "time series" data) should be conv We should segment the data using a sliding window over the sample data for feature extraction. The project captured accelerometer data every 10 seconds with a sample rate of 62.5 Hz. A 2-second window captures 375 data points (3 axis x 2 seconds x 62.5 samples). The window is slid every 80ms, creating a larger dataset where each instance has 375 "raw features." -![](images/png/v1.png){fig-align="center" width="6.5in"} +![](images/png/v1.png) On the Studio, the previous version (V1) of the **Spectral Analysis Block** extracted as time-domain features only the RMS, and for the frequency-domain, the peaks and frequency (using FFT) and the power characteristics (PSD) of the signal over time resulting in a fixed tabular dataset of 33 features (11 per each axis), -![](images/png/v1_features.png){fig-align="center" width="6.5in"} +![](images/png/v1_features.png) Those 33 features were the Input tensor of a Neural Network Classifier. @@ -122,11 +122,11 @@ axis = ['accX', 'accY', 'accZ'] n_sensors = len(axis) ``` -![](images/png/impulse.png){fig-align="center" width="5.6in"} +![](images/png/impulse.png) Selecting the *Raw Features* on the Studio Spectral Analysis tab, we can copy all 375 data points of a particular 2-second window to the clipboard. -![](images/png/features.png){fig-align="center" width="6.5in"} +![](images/png/features.png) Paste the data points to a new variable *data*: @@ -140,7 +140,7 @@ The total raw features are 375, but we will work with each axis individually, wh We aim to understand how Edge Impulse gets the processed features. -![](images/png/process_features.png){fig-align="center" width="4.57in"} +![](images/png/process_features.png) So, you should also past the processed features on a variable (to compare the calculated features in Python with the ones provided by the Studio) : @@ -182,7 +182,7 @@ sensors = [accX, accY, accZ] plot_data(sensors, axis, 'Raw Features') ``` -![](images/png/sample.png){fig-align="center" width="6.5in"} +![](images/png/sample.png) **Subtracting the mean** @@ -207,7 +207,7 @@ sensors = [accX, accY, accZ] plot_data(sensors, axis, 'Raw Features - Subctract the Mean') ``` -![](images/png/sample_no_mean.png){fig-align="center" width="6.5in"} +![](images/png/sample_no_mean.png) ## Time Domain Statistical features @@ -217,7 +217,7 @@ The RMS value of a set of values (or a continuous-time waveform) is the square r In the case of a set of n values {𝑥1, 𝑥2, ..., 𝑥𝑛}, the RMS is: -![](images/png/rms.png){fig-align="center"} +![](images/png/rms.png) > NOTE that the RMS value is different for the original raw data, and after subtracting the mean @@ -262,11 +262,11 @@ plt.suptitle('IMU Sensors distribution', fontsize=16, y=1.02) plt.show() ``` -![](images/png/skew.png){fig-align="center" width="6.5in"} +![](images/png/skew.png) [**Skewness**](https://en.wikipedia.org/wiki/Skewness) is a measure of the asymmetry of a distribution. This value can be positive or negative. -![](images/png/skew_2.png){fig-align="center" width="4.65in"} +![](images/png/skew_2.png) - A negative skew indicates that the tail is on the left side of the distribution, which extends towards more negative values. - A positive skew indicates that the tail is on the right side of the distribution, which extends towards more positive values. @@ -291,7 +291,7 @@ Compared with Edge Impulse result features: [**Kurtosis**](https://en.wikipedia.org/wiki/Kurtosis) is a measure of whether or not a distribution is heavy-tailed or light-tailed relative to a normal distribution. -![](images/png/kurto.png){fig-align="center"} +![](images/png/kurto.png) - The kurtosis of a normal distribution is zero. - If a given distribution has a negative kurtosis, it is said to be playkurtic, which means it tends to produce fewer and less extreme outliers than the normal distribution. @@ -367,7 +367,7 @@ plt.box(False) plt.show() ``` -![](images/png/fft.png){fig-align="center" width="6.5in"} +![](images/png/fft.png) Besides the Power Spectrum, we can also include the skewness and kurtosis of the features in the frequency domain (should be available on a new version): @@ -447,7 +447,7 @@ Let's select Wavelet on the Spectral Features block in the same project: - Wavelet Decomposition Level: 1 - Wavelet: bior1.3 -![](images/png/fft_result.png){fig-align="center"} +![](images/png/fft_result.png) **The Wavelet Function** @@ -466,11 +466,11 @@ plt.box(False) plt.show() ``` -![](images/png/wav.png){fig-align="center" width="6.5in"} +![](images/png/wav.png) As we did before, let's copy and past the Processed Features: -![](images/png/wav_processed.png){fig-align="center" width="6.5in"} +![](images/png/wav_processed.png) ``` python features = [3.6251, 0.0615, 0.0615, -7.3517, -2.7641, 2.8462, 5.0924, ...] @@ -515,7 +515,7 @@ plt.box(False) plt.show() ``` -![](images/png/wavelet_input.png){fig-align="center" width="6.5in"} +![](images/png/wavelet_input.png) ### Feature Extraction @@ -610,7 +610,7 @@ all_feat_l1 = [item for sublist in all_feat_l1 for item in sublist] print(f"\nAll L1 Features = {len(all_feat_l1)}") ``` -![](images/png/wav_result.png){fig-align="center" width="3.58in"} +![](images/png/wav_result.png) ## Conclusion diff --git a/contents/labs/shared/kws_feature_eng/kws_feature_eng.qmd b/contents/labs/shared/kws_feature_eng/kws_feature_eng.qmd index 58267a9ab..4bc78f98d 100644 --- a/contents/labs/shared/kws_feature_eng/kws_feature_eng.qmd +++ b/contents/labs/shared/kws_feature_eng/kws_feature_eng.qmd @@ -4,7 +4,7 @@ bibliography: kws_feature_eng.bib # KWS Feature Engineering {.unnumbered} -![*DALL·E 3 Prompt: 1950s style cartoon scene set in an audio research room. Two scientists, one holding a magnifying glass and the other taking notes, examine large charts pinned to the wall. These charts depict FFT graphs and time curves related to audio data analysis. The room has a retro ambiance, with wooden tables, vintage lamps, and classic audio analysis tools.*](images/jpg/kws_under_the_hood_ini.jpg){fig-align="center" width="6.5in"} +![*DALL·E 3 Prompt: 1950s style cartoon scene set in an audio research room. Two scientists, one holding a magnifying glass and the other taking notes, examine large charts pinned to the wall. These charts depict FFT graphs and time curves related to audio data analysis. The room has a retro ambiance, with wooden tables, vintage lamps, and classic audio analysis tools.*](images/jpg/kws_under_the_hood_ini.jpg) ## Introduction @@ -20,7 +20,7 @@ The most common TinyML application is Keyword Spotting (KWS), a subset of the br Here a typical KWS Process using MFCC Feature Converter: -![](images/jpg/kws_diagram.jpg){fig-align="center" width="7.29in"} +![](images/jpg/kws_diagram.jpg) #### Applications of KWS @@ -50,7 +50,7 @@ Understanding the basic properties of audio signals is crucial for effective fea The image below shows the words `YES` and `NO` with typical representations in the Time (Raw Audio) and Frequency domains: -![](images/jpg/time_vs_freq.jpg){fig-align="center" width="6.5in"} +![](images/jpg/time_vs_freq.jpg) ### Why Not Raw Audio? @@ -80,7 +80,7 @@ For these reasons, feature extraction techniques such as Mel-frequency Cepstral The image below shows the words `YES` and `NO` in their MFCC representation: -![](images/jpg/yes_no_mfcc.jpg){fig-align="center" width="6.5in"} +![](images/jpg/yes_no_mfcc.jpg) > This [video](https://youtu.be/SJo7vPgRlBQ?si=KSgzmDg8DtSVqzXp) explains the Mel Frequency Cepstral Coefficients (MFCC) and how to compute them. @@ -105,21 +105,21 @@ The computation of Mel-frequency Cepstral Coefficients (MFCCs) involves several - **Windowing:** Each frame is then windowed to minimize the discontinuities at the frame boundaries. A commonly used window function is the Hamming window. Windowing prepares the signal for a Fourier transform by minimizing the edge effects. The image below shows three frames (10, 20, and 30) and the time samples after windowing (note that the frame length and frame stride are 20 ms): -![](images/jpg/frame_wind.jpg){fig-align="center" width="6.5in"} +![](images/jpg/frame_wind.jpg) - **Fast Fourier Transform (FFT)** The Fast Fourier Transform (FFT) is applied to each windowed frame to convert it from the time domain to the frequency domain. The FFT gives us a complex-valued representation that includes both magnitude and phase information. However, for MFCCs, only the magnitude is used to calculate the Power Spectrum. The power spectrum is the square of the magnitude spectrum and measures the energy present at each frequency component. > The power spectrum $P(f)$ of a signal $x(t)$ is defined as $P(f) = |X(f)|^2$, where $X(f)$ is the Fourier Transform of $x(t)$. By squaring the magnitude of the Fourier Transform, we emphasize *stronger* frequencies over *weaker* ones, thereby capturing more relevant spectral characteristics of the audio signal. This is important in applications like audio classification, speech recognition, and Keyword Spotting (KWS), where the focus is on identifying distinct frequency patterns that characterize different classes of audio or phonemes in speech. -![](images/jpg/frame_to_fft.jpg){fig-align="center" width="6.5in"} +![](images/jpg/frame_to_fft.jpg) - **Mel Filter Banks:** The frequency domain is then mapped to the [Mel scale](https://en.wikipedia.org/wiki/Mel_scale), which approximates the human ear's response to different frequencies. The idea is to extract more features (more filter banks) in the lower frequencies and less in the high frequencies. Thus, it performs well on sounds distinguished by the human ear. Typically, 20 to 40 triangular filters extract the Mel-frequency energies. These energies are then log-transformed to convert multiplicative factors into additive ones, making them more suitable for further processing. -![](images/jpg/melbank-1_00.hires.jpg){fig-align="center" width="6.5in"} +![](images/jpg/melbank-1_00.hires.jpg) - **Discrete Cosine Transform (DCT):** The last step is to apply the [Discrete Cosine Transform (DCT)](https://en.wikipedia.org/wiki/Discrete_cosine_transform) to the log Mel energies. The DCT helps to decorrelate the energies, effectively compressing the data and retaining only the most discriminative features. Usually, the first 12-13 DCT coefficients are retained, forming the final MFCC feature vector. -![](images/jpg/mfcc_final.jpg){fig-align="center" width="6.5in"} +![](images/jpg/mfcc_final.jpg) ## Hands-On using Python diff --git a/contents/labs/shared/shared.qmd b/contents/labs/shared/shared.qmd index 1a35c2ee4..1a32a7500 100644 --- a/contents/labs/shared/shared.qmd +++ b/contents/labs/shared/shared.qmd @@ -5,7 +5,7 @@ The labs in this section cover topics and techniques that are applicable across By exploring these shared labs, you'll gain a deeper understanding of the common challenges and solutions in embedded machine learning. The knowledge and skills acquired here will be valuable regardless of the specific hardware you work with in the future. | Exercise | Nicla Vision | XIAO ESP32S3 | -|------------------------------|--------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------| +|:------------------------------|:--------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------| | KWS Feature Engineering | ✔ [Link](./kws_feature_eng/kws_feature_eng.qmd) | ✔ [Link](./kws_feature_eng/kws_feature_eng.qmd) | | DSP Spectral Features Block | ✔ [Link](./dsp_spectral_features_block/dsp_spectral_features_block.qmd) | ✔ [Link](./dsp_spectral_features_block/dsp_spectral_features_block.qmd) | diff --git a/contents/ml_systems/ml_systems.bib b/contents/ml_systems/ml_systems.bib index 941d43fbc..f70ad8c56 100644 --- a/contents/ml_systems/ml_systems.bib +++ b/contents/ml_systems/ml_systems.bib @@ -10,7 +10,7 @@ @misc{armcomfuture @article{lin2023tiny, author = {Lin, Ji and Zhu, Ligeng and Chen, Wei-Ming and Wang, Wei-Chen and Han, Song}, - title = {Tiny Machine Learning: {Progress} and Futures {[Feature]}}, + title = {Tiny Machine Learning: Progress and Futures Feature}, journal = {IEEE Circuits Syst. Mag.}, volume = {23}, number = {3}, diff --git a/contents/ml_systems/ml_systems.qmd b/contents/ml_systems/ml_systems.qmd index 497653baa..fe16d5bbf 100644 --- a/contents/ml_systems/ml_systems.qmd +++ b/contents/ml_systems/ml_systems.qmd @@ -28,17 +28,17 @@ As this chapter progresses, we will explore embedded systems' complex and fascin ::: -## Machine Learning Systems +## Introductions ML is rapidly evolving, with new paradigms reshaping how models are developed, trained, and deployed. One such paradigm is embedded machine learning, which is experiencing significant innovation driven by the proliferation of smart sensors, edge devices, and microcontrollers. Embedded machine learning refers to the integration of machine learning algorithms into the hardware of a device, enabling real-time data processing and analysis without relying on cloud connectivity. This chapter explores the landscape of embedded machine learning, covering the key approaches of Cloud ML, Edge ML, and TinyML (@fig-cloud-edge-tinyml-comparison). -![Cloud vs. Edge vs. TinyML: The Spectrum of Distributed Intelligence. Credit: ABI Research -- TinyML.](images/png/cloud-edge-tiny.png){#fig-cloud-edge-tinyml-comparison} +![Cloud vs. Edge vs. TinyML: The Spectrum of Distributed Intelligence. Source: ABI Research -- TinyML.](images/png/cloud-edge-tiny.png){#fig-cloud-edge-tinyml-comparison} ML began with Cloud ML, where powerful servers in the cloud were used to train and run large ML models. However, as the need for real-time, low-latency processing grew, Edge ML emerged, bringing inference capabilities closer to the data source on edge devices such as smartphones. The latest development in this progression is TinyML, which enables ML models to run on extremely resource-constrained microcontrollers and small embedded systems. TinyML allows for on-device inference without relying on connectivity to the cloud or edge, opening up new possibilities for intelligent, battery-operated devices. @fig-vMLsizes shows the key differences between Cloud ML, Edge ML, and TinyML in terms of hardware, latency, connectivity, power requirements, and model complexity. This significant disparity in available resources poses challenges when attempting to deploy deep learning models on microcontrollers, as these models often require substantial memory and storage. For instance, widely used deep learning models such as ResNet-50 exceed the resource limits of microcontrollers by a factor of around 100, while more efficient models like MobileNet-V2 still surpass these constraints by a factor of approximately 20. Even when quantized to use 8-bit integers (int8) for reduced memory usage, MobileNetV2 requires more than 5 times the memory typically available on a microcontroller, making it difficult to fit the model on these tiny devices. -![From cloud GPUs to microcontrollers: Navigating the memory and storage landscape across computing devices. Credit: @lin2023tiny.](./images/jpg/cloud_mobile_tiny_sizes.jpg){#fig-vMLsizes} +![From cloud GPUs to microcontrollers: Navigating the memory and storage landscape across computing devices. Source: [@lin2023tiny]](./images/jpg/cloud_mobile_tiny_sizes.jpg){#fig-vMLsizes} ## Cloud ML @@ -72,7 +72,7 @@ By leveraging the pay-as-you-go pricing model offered by cloud service providers Cloud ML has revolutionized the way machine learning is approached, making it more accessible, scalable, and efficient. It has opened up new possibilities for organizations to harness the power of machine learning without the need for significant investments in hardware and infrastructure. -![Cloud TPU data center at Google. Credit: [Google.](https://blog.google/technology/ai/google-gemini-ai/#scalable-efficient)](images/png/cloud_ml_tpu.png){#fig-cloudml-example} +![Cloud TPU data center at Google. Source: [Google.](https://blog.google/technology/ai/google-gemini-ai/#scalable-efficient)](images/png/cloud_ml_tpu.png){#fig-cloudml-example} ### Benefits @@ -166,7 +166,7 @@ In Edge ML, data processing happens in a decentralized fashion. Instead of sendi Local data storage and computation are key features of Edge ML. This setup ensures that data can be stored and analyzed directly on the devices, thereby maintaining the privacy of the data and reducing the need for constant internet connectivity. Moreover, this often leads to more efficient computation, as data doesn't have to travel long distances, and computations are performed with a more nuanced understanding of the local context, which can sometimes result in more insightful analyses. -![Edge ML Examples. Credit: Edge Impulse.](images/jpg/edge_ml_iot.jpg){#fig-edgeml-example} +![Edge ML Examples. Source: Edge Impulse.](images/jpg/edge_ml_iot.jpg){#fig-edgeml-example} ### Benefits @@ -230,7 +230,7 @@ In TinyML, the focus is on on-device machine learning. This means that machine l TinyML excels in low-power and resource-constrained settings. These environments require highly optimized solutions that function within the available resources. TinyML meets this need through specialized algorithms and models designed to deliver decent performance while consuming minimal energy, thus ensuring extended operational periods, even in battery-powered devices. -![Examples of TinyML device kits. Credit: [Widening Access to Applied Machine Learning with TinyML.](https://arxiv.org/pdf/2106.04008.pdf)](images/jpg/tiny_ml.jpg){#fig-tinyml-example} +![Examples of TinyML device kits. Source: [Widening Access to Applied Machine Learning with TinyML.](https://arxiv.org/pdf/2106.04008.pdf)](images/jpg/tiny_ml.jpg){#fig-tinyml-example} :::{#exr-tinyml .callout-caution collapse="true"} @@ -294,21 +294,33 @@ In summary, TinyML serves as a trailblazer in the evolution of machine learning, Up to this point, we've explored each of the different ML variants individually. Now, let's bring them all together for a comprehensive view. @tbl-big_vs_tiny offers a comparative analysis of Cloud ML, Edge ML, and TinyML based on various features and aspects. This comparison aims to provide a clear perspective on the unique advantages and distinguishing factors, aiding in making informed decisions based on the specific needs and constraints of a given application or project. -| Feature/Aspect | Cloud ML | Edge ML | TinyML | -|--------------------------|--------------------------------------------------------|------------------------------------------------------|------------------------------------------------------| -| **Processing Location** | Centralized servers (Data Centers) | Local devices (closer to data sources) | On-device (microcontrollers, embedded systems) | -| **Latency** | High (Depends on internet connectivity) | Moderate (Reduced latency compared to Cloud ML) | Low (Immediate processing without network delay) | -| **Data Privacy** | Moderate (Data transmitted over networks) | High (Data remains on local networks) | Very High (Data processed on-device, not transmitted) | -| **Computational Power** | High (Utilizes powerful data center infrastructure) | Moderate (Utilizes local device capabilities) | Low (Limited to the power of the embedded system) | -| **Energy Consumption** | High (Data centers consume significant energy) | Moderate (Less than data centers, more than TinyML) | Low (Highly energy-efficient, designed for low power) | -| **Scalability** | High (Easy to scale with additional server resources) | Moderate (Depends on local device capabilities) | Low (Limited by the hardware resources of the device) | -| **Cost** | High (Recurring costs for server usage, maintenance) | Variable (Depends on the complexity of local setup) | Low (Primarily upfront costs for hardware components) | -| **Connectivity Dependence**| High (Requires stable internet connectivity) | Low (Can operate with intermittent connectivity) | Very Low (Can operate without any network connectivity)| -| **Real-time Processing** | Moderate (Can be affected by network latency) | High (Capable of real-time processing locally) | Very High (Immediate processing with minimal latency) | -| **Application Examples** | Big Data Analysis, Virtual Assistants | Autonomous Vehicles, Smart Homes | Wearables, Sensor Networks | -| **Development Complexity** | Moderate to High (Requires knowledge in cloud computing) | Moderate (Requires knowledge in local network setup) | Moderate to High (Requires expertise in embedded systems)| - -: Comparison of feature aspects across Cloud ML, Edge ML, and TinyML. {#tbl-big_vs_tiny} ++--------------------------+---------------------------------------------------------+---------------------------------------------------------+----------------------------------------------------------+ +| Aspect | Cloud ML | Edge ML | TinyML | ++:=========================+:========================================================+:========================================================+:=========================================================+ +| Processing Location | Centralized servers (Data Centers) | Local devices (closer to data sources) | On-device (microcontrollers, embedded systems) | ++--------------------------+---------------------------------------------------------+---------------------------------------------------------+----------------------------------------------------------+ +| Latency | High (Depends on internet connectivity) | Moderate (Reduced latency compared to Cloud ML) | Low (Immediate processing without network delay) | ++--------------------------+---------------------------------------------------------+---------------------------------------------------------+----------------------------------------------------------+ +| Data Privacy | Moderate (Data transmitted over networks) | High (Data remains on local networks) | Very High (Data processed on-device, not transmitted) | ++--------------------------+---------------------------------------------------------+---------------------------------------------------------+----------------------------------------------------------+ +| Computational Power | High (Utilizes powerful data center infrastructure) | Moderate (Utilizes local device capabilities) | Low (Limited to the power of the embedded system) | ++--------------------------+---------------------------------------------------------+---------------------------------------------------------+----------------------------------------------------------+ +| Energy Consumption | High (Data centers consume significant energy) | Moderate (Less than data centers, more than TinyML) | Low (Highly energy-efficient, designed for low power) | ++--------------------------+---------------------------------------------------------+---------------------------------------------------------+----------------------------------------------------------+ +| Scalability | High (Easy to scale with additional server resources) | Moderate (Depends on local device capabilities) | Low (Limited by the hardware resources of the device) | ++--------------------------+---------------------------------------------------------+---------------------------------------------------------+----------------------------------------------------------+ +| Cost | High (Recurring costs for server usage, maintenance) | Variable (Depends on the complexity of local setup) | Low (Primarily upfront costs for hardware components) | ++--------------------------+---------------------------------------------------------+---------------------------------------------------------+----------------------------------------------------------+ +| Connectivity | High (Requires stable internet connectivity) | Low (Can operate with intermittent connectivity) | Very Low (Can operate without any network connectivity) | ++--------------------------+---------------------------------------------------------+---------------------------------------------------------+----------------------------------------------------------+ +| Real-time Processing | Moderate (Can be affected by network latency) | High (Capable of real-time processing locally) | Very High (Immediate processing with minimal latency) | ++--------------------------+---------------------------------------------------------+---------------------------------------------------------+----------------------------------------------------------+ +| Application Examples | Big Data Analysis, Virtual Assistants | Autonomous Vehicles, Smart Homes | Wearables, Sensor Networks | ++--------------------------+---------------------------------------------------------+---------------------------------------------------------+----------------------------------------------------------+ +| Complexity | Moderate to High (Requires knowledge in cloud computing)| Moderate (Requires knowledge in local network setup) | Moderate to High (Requires expertise in embedded systems)| ++--------------------------+---------------------------------------------------------+---------------------------------------------------------+----------------------------------------------------------+ + +: Comparison of feature aspects across Cloud ML, Edge ML, and TinyML. {#tbl-big_vs_tiny .hover .striped} ## Conclusion diff --git a/contents/ondevice_learning/ondevice_learning.qmd b/contents/ondevice_learning/ondevice_learning.qmd index 7922620aa..969b3c71f 100644 --- a/contents/ondevice_learning/ondevice_learning.qmd +++ b/contents/ondevice_learning/ondevice_learning.qmd @@ -36,7 +36,7 @@ An example of On-Device Learning can be seen in a smart thermostat that adapts t Another example is in predictive text on smartphones. As users type, the phone learns from the user's language patterns and suggests words or phrases that are likely to be used next. This learning happens directly on the device, and the model updates in real-time as more data is collected. A widely used real-world example of on-device learning is Gboard. On an Android phone, Gboard learns from typing and dictation patterns to enhance the experience for all users. On-device learning is also called federated learning. @fig-federated-cycle shows the cycle of federated learning on mobile devices: A. the device learns from user patterns; B. local model updates are communicated to the cloud; C. the cloud server updates the global model and sends the new model to all the devices. -![Federated learning cycle. Credit: [Google Research.](https://ai.googleblog.com/2017/04/federated-learning-collaborative.html)](images/png/ondevice_intro.png){#fig-federated-cycle} +![Federated learning cycle. Source: [Google Research.](https://ai.googleblog.com/2017/04/federated-learning-collaborative.html)](images/png/ondevice_intro.png){#fig-federated-cycle} ## Advantages and Limitations @@ -195,7 +195,7 @@ A specific algorithmic technique is Quantization-Aware Scaling (QAS), which impr As we discussed in the Model Optimizations chapter, quantization is the process of mapping a continuous range of values to a discrete set of values. In the context of neural networks, quantization often involves reducing the precision of the weights and activations from 32-bit floating point to lower-precision formats such as 8-bit integers. This reduction in precision can significantly reduce the computational cost and memory footprint of the model, making it suitable for deployment on low-precision hardware. @fig-float-int-quantization is an example of float-to-integer quantization. -![Float to integer qunatization. Credit: [Nvidia.](https://developer-blogs.nvidia.com/wp-content/uploads/2021/07/qat-training-precision.png)](images/png/ondevice_quantization_matrix.png){#fig-float-int-quantization} +![Float to integer qunatization. Source: [Nvidia.](https://developer-blogs.nvidia.com/wp-content/uploads/2021/07/qat-training-precision.png)](images/png/ondevice_quantization_matrix.png){#fig-float-int-quantization} However, the quantization process can also introduce quantization errors that can degrade the model's performance. Quantization-aware scaling is a technique that aims to minimize these errors by adjusting the scale factors used in the quantization process. @@ -237,7 +237,7 @@ Transfer learning is an ML technique in which a model developed for a particular @fig-transfer-learning-apps includes some intuitive examples of transfer learning from the real world. For instance, if you can ride a bicycle, you know how to balance yourself on two-wheel vehicles. Then, it would be easier for you to learn how to ride a motorcycle than it would be for someone who cannot ride a bicycle. -![Transferring knowledge between tasks. Credit: @zhuang2021comprehensive.](images/png/ondevice_transfer_learning_apps.png){#fig-transfer-learning-apps} +![Transferring knowledge between tasks. Source: @zhuang2021comprehensive.](images/png/ondevice_transfer_learning_apps.png){#fig-transfer-learning-apps} Let's take the example of a smart sensor application that uses on-device AI to recognize objects in images captured by the device. Traditionally, this would require sending the image data to a server, where a large neural network model processes the data and sends back the results. With on-device AI, the model is stored and runs directly on-device, eliminating the need to send data to a server. @@ -356,15 +356,25 @@ By leveraging these different types of transfer learning, practitioners can choo @tbl-tltypes presents a matrix that outlines in a bit more detail the similarities and differences between the types of transfer learning: -| | Inductive Transfer Learning | Transductive Transfer Learning | Unsupervised Transfer Learning | -|------------------------------|-----------------------------|--------------------------------|---------------------------------| -| **Labeled Data for Target Task** | Required | Not Required | Not Required | -| **Source Task** | Can be different | Same | Same or Different | -| **Target Task** | Can be different | Same | Can be different | -| **Objective** | Improve target task performance with source data | Transfer knowledge from source to target domain | Leverage source task to improve target task performance without labeled data | -| **Example** | ImageNet to bird classification | Sentiment analysis in different languages | Topic modeling for different text data | - -: Comparison of transfer learning types. {#tbl-tltypes} ++--------------------------------+-----------------------------+--------------------------------+---------------------------------+ +| Aspect | Inductive Transfer Learning | Transductive Transfer Learning | Unsupervised Transfer Learning | ++:===============================+:============================+:===============================+:================================+ +| Labeled Data for Target Task | Required | Not Required | Not Required | ++--------------------------------+-----------------------------+--------------------------------+---------------------------------+ +| Source Task | Can be different | Same | Same or Different | ++--------------------------------+-----------------------------+--------------------------------+---------------------------------+ +| Target Task | Can be different | Same | Can be different | ++--------------------------------+-----------------------------+--------------------------------+---------------------------------+ +| Objective | Improve target task | Transfer knowledge from | Leverage source task to | +| | performance with source | source to target domain | improve target task | +| | data | | performance without | +| | | | labeled data | ++--------------------------------+-----------------------------+--------------------------------+---------------------------------+ +| Example | ImageNet to bird | Sentiment analysis in | Topic modeling for | +| | classification | different languages | different text data | ++--------------------------------+-----------------------------+--------------------------------+---------------------------------+ + +: Comparison of transfer learning types. {#tbl-tltypes .striped .hover} ### Constraints and Considerations @@ -398,7 +408,7 @@ Learn more about transfer learning in @vid-tl below. # Transfer Learning -{{< video >}} +{{< video https://www.youtube.com/watch?v=FQM13HkEfBk >}} ::: @@ -416,7 +426,7 @@ w_{t+1} \rightarrow w_t - \eta \sum_{k=1}^{K} \frac{n_k}{n}g_k $$ This summarizes the basic algorithm for federated learning on the right. For each round of training, the server takes a random set of client devices and calls each client to train on its local batch using the most recent server-side weights. Those weights are then returned to the server, where they are collected individually and averaged to update the global model weights. -![Google's Proposed FederatedAverage Algorithm. Credit: McMahan et al. ([2017](https://arxiv.org/abs/1602.05629)).](images/png/ondevice_fed_averaging.png){#fig-federated-avg-algo} +![Google's Proposed FederatedAverage Algorithm. Source: McMahan et al. ([2017](https://arxiv.org/abs/1602.05629)).](images/png/ondevice_fed_averaging.png){#fig-federated-avg-algo} With this proposed structure, there are a few key vectors for further optimizing federated learning. We will outline each in the following subsections. @@ -426,7 +436,7 @@ With this proposed structure, there are a few key vectors for further optimizing # Transfer Learning -{{< video >}} +{{< video https://www.youtube.com/watch?v=zqv1eELa7fs >}} ::: @@ -446,7 +456,7 @@ There are many methods for selectively sharing updates. The general principle is However, we cannot just reduce communication by sending pieces of those gradients from each client to the server because the gradients are part of an entire update required to improve the model. Instead, you need to architecturally design the model such that each client trains only a small portion of the broader model, reducing the total communication while still gaining the benefit of training on client data. A paper [@shi2022data] from the University of Sheffield applies this concept to a CNN by splitting the global model into two parts: an upper and a lower part, as shown in @chen2023learning. -![Split model architecture for selective sharing. Credit: Shi et al., ([2022](https://doi.org/10.1145/3517207.3526980)).](images/png/ondevice_split_model.png){#fig-split-model} +![Split model architecture for selective sharing. Source: Shi et al., ([2022](https://doi.org/10.1145/3517207.3526980)).](images/png/ondevice_split_model.png){#fig-split-model} The lower part is designed to focus on generic features in the dataset, while the upper part, trained on those generic features, is designed to be more sensitive to the activation maps. This means that the lower part of the model is trained through standard federated averaging across all of the clients. Meanwhile, the upper part of the model is trained entirely on the server side from the activation maps generated by the clients. This approach drastically reduces communication for the model while still making the network robust to various types of input found in the data on the client devices. @@ -480,11 +490,11 @@ A primary example of a deployed federated learning system is Google's Keyboard, NWP will anticipate the next word the user tries to type based on the previous one. SC gives inline suggestions to speed up the typing based on each character. OTF will re-rank the proposed next words based on the active typing process. All three of these models need to run quickly on the edge, and federated learning can accelerate training on the users' data. However, uploading every word a user typed to the cloud for training would be a massive privacy violation. Therefore, federated learning emphasizes differential privacy, which protects the user while enabling a better user experience. -![Google G Board Features. Credit: Zheng et al., ([2023](https://arxiv.org/abs/2305.18465)).](images/png/ondevice_gboard_example.png){#fig-gboard-features} +![Google G Board Features. Source: Zheng et al., ([2023](https://arxiv.org/abs/2305.18465)).](images/png/ondevice_gboard_example.png){#fig-gboard-features} To accomplish this goal, Google employed its algorithm DP-FTRL, which provides a formal guarantee that trained models will not memorize specific user data or identities. The algorithm system design is shown in @fig-differential-privacy. DP-FTRL, combined with secure aggregation, encrypts model updates and provides an optimal balance of privacy and utility. Furthermore, adaptive clipping is applied in the aggregation process to limit the impact of individual users on the global model (step 3 in @fig-differential-privacy). By combining all these techniques, Google can continuously refine its keyboard while preserving user privacy in a formally provable way. -![Differential Privacy in G Board. Credit: Zheng et al., ([2023](https://arxiv.org/abs/2305.18465)).](images/png/ondevice_gboard_approach.png){#fig-differential-privacy} +![Differential Privacy in G Board. Source: Zheng et al., ([2023](https://arxiv.org/abs/2305.18465)).](images/png/ondevice_gboard_approach.png){#fig-differential-privacy} :::{#exr-flg .callout-caution collapse="true"} @@ -561,17 +571,29 @@ To mitigate these risks, several defenses can be employed: By understanding the potential risks and implementing these defenses, we can help secure on-device training at the endpoint/edge and mitigate the impact of adversarial attacks. Most people easily confuse data poisoning and adversarial attacks. So @tbl-attacks compares data poisoning and adversarial attacks: -| Aspect | Data Poisoning | Adversarial Attacks | -|--------------------|-----------------------------------------|-------------------------------------------| -| **Timing** | Training phase | Inference phase | -| **Target** | Training data | Input data | -| **Goal** | Negatively affect model's performance | Cause incorrect predictions | -| **Method** | Insert malicious examples into training data, often with incorrect labels | Add carefully crafted noise to input data | -| **Example** | Adding images of cats labeled as dogs to a dataset used for training an image classification model | Adding a small amount of noise to an image in a way that causes a face recognition system to misidentify a person | -| **Potential Effects** | Model learns incorrect patterns and makes incorrect predictions | Immediate and potentially dangerous incorrect predictions | -| **Applications Affected** | Any ML model | Autonomous vehicles, security systems, etc | - -: Comparison of data poisoning and adversarial attacks. {#tbl-attacks} ++-------------------------+----------------------------------------------+-----------------------------------------------+ +| Aspect | Data Poisoning | Adversarial Attacks | ++:========================+:=============================================+:==============================================+ +| Timing | Training phase | Inference phase | ++-------------------------+----------------------------------------------+-----------------------------------------------+ +| Target | Training data | Input data | ++-------------------------+----------------------------------------------+-----------------------------------------------+ +| Goal | Negatively affect model's performance | Cause incorrect predictions | ++-------------------------+----------------------------------------------+-----------------------------------------------+ +| Method | Insert malicious examples into training | Add carefully crafted noise to input data | +| | data, often with incorrect labels | | ++-------------------------+----------------------------------------------+-----------------------------------------------+ +| Example | Adding images of cats labeled as dogs | Adding a small amount of noise to an image | +| | to a dataset used for training an image | in a way that causes a face recognition | +| | classification model | system to misidentify a person | ++-------------------------+----------------------------------------------+-----------------------------------------------+ +| Potential Effects | Model learns incorrect patterns and makes | Immediate and potentially dangerous | +| | incorrect predictions | incorrect predictions | ++-------------------------+----------------------------------------------+-----------------------------------------------+ +| Applications Affected | Any ML model | Autonomous vehicles, security systems, etc. | ++-------------------------+----------------------------------------------+-----------------------------------------------+ + +: Comparison of data poisoning and adversarial attacks. {#tbl-attacks .striped .hover} ### Model Inversion @@ -660,7 +682,7 @@ Tiny Transfer Learning (TinyTL) enables memory-efficient on-device training thro To reduce this memory overhead, TinyTL freezes the majority of the weights so they do not need to be updated during training. This eliminates the need to store intermediate activations for frozen parts of the network. TinyTL only finetunes the bias terms, which are much smaller than the weights. An overview of TinyTL workflow is shown in @fig-tinytl-workflow. -![TinyTL workflow. Credit: @cai2020tinytl.)](images/png/ondevice_transfer_tinytl.png){#fig-tinytl-workflow} +![TinyTL workflow. Source: @cai2020tinytl.)](images/png/ondevice_transfer_tinytl.png){#fig-tinytl-workflow} Freezing weights apply to fully connected layers as well as convolutional and normalization layers. However, only adapting the biases limits the model's ability to learn and adapt to new data. @@ -674,7 +696,7 @@ TinyTrain significantly reduces the time required for on-device training by sele Based on the user data, memory, and computing available on the device, TinyTrain dynamically chooses which neural network layers to update during training. This layer selection is optimized to reduce computation and memory usage while maintaining high accuracy. -![TinyTrain workflow. Credit: @kwon2023tinytrain.](images/png/ondevice_pretraining.png){#fig-tiny-train} +![TinyTrain workflow. Source: @kwon2023tinytrain.](images/png/ondevice_pretraining.png){#fig-tiny-train} More specifically, TinyTrain first does offline pretraining of the model. During pretraining, it not only trains the model on the task data but also meta-trains the model. Meta-training means training the model on metadata about the training process itself. This meta-learning improves the model's ability to adapt accurately even when limited data is available for the target task. diff --git a/contents/ops/ops.qmd b/contents/ops/ops.qmd index c12f1dcb5..64e260b34 100644 --- a/contents/ops/ops.qmd +++ b/contents/ops/ops.qmd @@ -79,15 +79,29 @@ While DevOps and MLOps share similarities in their goals and principles, they di @tbl-mlops compares and summarizes them side by side. -| Aspect | DevOps | MLOps | -|----------------------|----------------------------------|--------------------------------------| -| **Objective** | Streamlining software development and operations processes | Optimizing the lifecycle of machine learning models | -| **Methodology** | Continuous Integration and Continuous Delivery (CI/CD) for software development | Similar to CI/CD but focuses on machine learning workflows | -| **Primary Tools** | Version control (Git), CI/CD tools (Jenkins, Travis CI), Configuration management (Ansible, Puppet) | Data versioning tools, Model training and deployment tools, CI/CD pipelines tailored for ML | -| **Primary Concerns** | Code integration, Testing, Release management, Automation, Infrastructure as code | Data management, Model versioning, Experiment tracking, Model deployment, Scalability of ML workflows | -| **Typical Outcomes** | Faster and more reliable software releases, Improved collaboration between development and operations teams | Efficient management and deployment of machine learning models, Enhanced collaboration between data scientists and engineers | - -: Comparison of DevOps and MLOps. {#tbl-mlops} ++----------------------+--------------------------------------------+-------------------------------------------------------+ +| Aspect | DevOps | MLOps | ++======================+============================================+=======================================================+ +| Objective | Streamlining software development | Optimizing the lifecycle of machine learning models | +| | and operations processes | | ++----------------------+--------------------------------------------+-------------------------------------------------------+ +| Methodology | Continuous Integration and Continuous | Similar to CI/CD but focuses on machine learning | +| | Delivery (CI/CD) for software development | workflows | ++----------------------+--------------------------------------------+-------------------------------------------------------+ +| Primary Tools | Version control (Git), CI/CD tools | Data versioning tools, Model training and deployment | +| | (Jenkins, Travis CI), Configuration | tools, CI/CD pipelines tailored for ML | +| | management (Ansible, Puppet) | | ++----------------------+--------------------------------------------+-------------------------------------------------------+ +| Primary Concerns | Code integration, Testing, Release | Data management, Model versioning, Experiment | +| | management, Automation, Infrastructure | tracking, Model deployment, Scalability of ML | +| | as code | workflows | ++----------------------+--------------------------------------------+-------------------------------------------------------+ +| Typical Outcomes | Faster and more reliable software releases,| Efficient management and deployment of machine | +| | Improved collaboration between development | learning models, Enhanced collaboration between | +| | and operations teams | data scientists and engineers | ++----------------------+--------------------------------------------+-------------------------------------------------------+ + +: Comparison of DevOps and MLOps. {#tbl-mlops .striped .hover} Learn more about ML Lifecycles through a case study featuring speech recognition in @vid-mlops. @@ -95,7 +109,7 @@ Learn more about ML Lifecycles through a case study featuring speech recognition # MLOps -{{< video >}} +{{< video https://www.youtube.com/watch?v=YJsRD_hU4tc&list=PLkDaE6sCZn6GMoA0wbpJLi3t34Gd8l0aK&index=3 >}} ::: @@ -125,7 +139,7 @@ In an industrial predictive maintenance use case, sensor data is ingested from d # Data Pipelines -{{< video >}} +{{< video https://www.youtube.com/watch?v=gz-44N3MMOA&list=PLkDaE6sCZn6GMoA0wbpJLi3t34Gd8l0aK&index=33 >}} ::: @@ -137,7 +151,7 @@ CI/CD pipelines orchestrate key steps, including checking out new code changes, @fig-ci-cd illustrates a CI/CD pipeline specifically tailored for MLOps. The process starts with a dataset and feature repository (on the left), which feeds into a dataset ingestion stage. Post-ingestion, the data undergoes validation to ensure its quality before being transformed for training. Parallel to this, a retraining trigger can initiate the pipeline based on specified criteria. The data then passes through a model training/tuning phase within a data processing engine, followed by model evaluation and validation. Once validated, the model is registered and stored in a machine learning metadata and artifact repository. The final stage involves deploying the trained model back into the dataset and feature repository, thereby creating a cyclical process for continuous improvement and deployment of machine learning models. -![MLOps CI/CD diagram. Credit: HarvardX.](images/png/cicd_pipelines.png){#fig-ci-cd} +![MLOps CI/CD diagram. Source: HarvardX.](images/png/cicd_pipelines.png){#fig-ci-cd} For example, when a data scientist checks improvements to an image classification model into a [GitHub](https://github.com/) repository, this actively triggers a Jenkins CI/CD pipeline. The pipeline reruns data transformations and model training on the latest data, tracking experiments with [MLflow](https://mlflow.org/). After automated validation testing, teams deploy the model container to a [Kubernetes](https://kubernetes.io/) staging cluster for further QA. Once approved, Jenkins facilitates a phased rollout of the model to production with [canary deployments](https://kubernetes.io/docs/concepts/cluster-administration/manage-deployment/#canary-deployments) to catch any issues. If anomalies are detected, the pipeline enables teams to roll back to the previous model version gracefully. @@ -233,7 +247,7 @@ Watch the video below to learn more about monitoring. # Model Monitoring -{{< video >}} +{{< video https://www.youtube.com/watch?v=hq_XyP9y0xg&list=PLkDaE6sCZn6GMoA0wbpJLi3t34Gd8l0aK&index=7 >}} ::: @@ -267,7 +281,7 @@ Enabling transparency, traceability, and communication via MLOps empowers teams # Deployment Challenges -{{< video >}} +{{< video https://www.youtube.com/watch?v=UyEtTyeahus&list=PLkDaE6sCZn6GMoA0wbpJLi3t34Gd8l0aK&index=5 >}} ::: @@ -277,7 +291,7 @@ Technical debt is increasingly pressing for ML systems (see Figure 14.2). This m @fig-technical-debt illustrates the various components contributing to ML systems' hidden technical debt. It shows the interconnected nature of configuration, data collection, and feature extraction, which is foundational to the ML codebase. The box sizes indicate the proportion of the entire system represented by each component. In industry ML systems, the code for the model algorithm makes up only a tiny fraction (see the small black box in the middle compared to all the other large boxes). The complexity of ML systems and the fast-paced nature of the industry make it very easy to accumulate technical debt. -![ML system components. Credit: @sculley2015hidden](images/png/hidden_debt.png){#fig-technical-debt} +![ML system components. Source: @sculley2015hidden](images/png/hidden_debt.png){#fig-technical-debt} ### Model Boundary Erosion @@ -291,7 +305,7 @@ Tight coupling between ML model components makes isolating changes difficult. Mo The flowchart in @fig-correction-cascades-flowchart depicts the concept of correction cascades in the ML workflow, from problem statement to model deployment. The arcs represent the potential iterative corrections needed at each workflow stage, with different colors corresponding to distinct issues such as interacting with physical world brittleness, inadequate application-domain expertise, conflicting reward systems, and poor cross-organizational documentation. The red arrows indicate the impact of cascades, which can lead to significant revisions in the model development process. In contrast, the dotted red line represents the drastic measure of abandoning the process to restart. This visual emphasizes the complex, interconnected nature of ML system development and the importance of addressing these issues early in the development cycle to mitigate their amplifying effects downstream. -![Correction cascades flowchart. Credit: @sculley2015hidden.](images/png/data_cascades.png){#fig-correction-cascades-flowchart} +![Correction cascades flowchart. Source: @sculley2015hidden.](images/png/data_cascades.png){#fig-correction-cascades-flowchart} Building models sequentially creates risky dependencies where later models rely on earlier ones. For example, taking an existing model and fine-tuning it for a new use case seems efficient. However, this bakes in assumptions from the original model that may eventually need correction. @@ -304,7 +318,7 @@ While fine-tuning can be efficient, modifying foundational components later beco @fig-data-cascades-debt depicts the concept of correction cascades in the ML workflow, from problem statement to model deployment. The arcs represent the potential iterative corrections needed at each stage of the workflow, with different colors corresponding to distinct issues such as interacting with physical world brittleness, inadequate application-domain expertise, conflicting reward systems, and poor cross-organizational documentation. The red arrows indicate the impact of cascades, which can lead to significant revisions in the model development process. In contrast, the dotted red line represents the drastic measure of abandoning the process to restart. This visual emphasizes the complex, interconnected nature of ML system development and the importance of addressing these issues early in the development cycle to mitigate their amplifying effects downstream. -![Data cascades. Credit: @sambasivan2021.](images/png/data_cascades.png){#fig-data-cascades-debt} +![Data cascades. Source: @sambasivan2021.](images/png/data_cascades.png){#fig-data-cascades-debt} ### Undeclared Consumers @@ -553,7 +567,7 @@ Strategies like transfer learning become essential to mitigate data scarcity and @fig-transfer-learning-mlops illustrates the concept of transfer learning in model training within an MLOps framework. It showcases a neural network where the initial layers (W_{A1} to W_{A4}), which are responsible for general feature extraction, are frozen (indicated by the green dashed line), meaning their weights are not updated during training. This reuse of pre-trained layers accelerates learning by utilizing knowledge gained from previous tasks. The latter layers (W_{A5} to W_{A7}), depicted beyond the blue dashed line, are finetuned for the specific task at hand, focusing on task-specific feature learning. This approach allows the model to adapt to the new task using fewer resources and potentially achieve higher performance on specialized tasks by reusing the general features learned from a broader dataset. -![Transfer learning in MLOps. Credit: HarvardX.](images/png/transfer_learning.png){#fig-transfer-learning-mlops} +![Transfer learning in MLOps. Source: HarvardX.](images/png/transfer_learning.png){#fig-transfer-learning-mlops} For example, a smart home assistant may pre-train an audio recognition model on public YouTube clips, which helps bootstrap with general knowledge. It then transfers learning to a small sample of home data to classify customized appliances and events, specializing in the model. The model transforms into a lightweight neural network optimized for microphone-enabled devices across the home. @@ -577,7 +591,7 @@ Model delivery relies on physical interfaces like USB or UART serial connections @fig-model-lifecycle presents an overview of Model Lifecycle Management in an MLOps context, illustrating the flow from development (top left) to deployment and monitoring (bottom right). The process begins with ML Development, where code and configurations are version-controlled. Data and model management are central to the process, involving datasets and feature repositories. Continuous training, model conversion, and model registry are key stages in the operationalization of training. The model deployment includes serving the model and managing serving logs. Alerting mechanisms are in place to flag issues, which feed into continuous monitoring to ensure model performance and reliability over time. This integrated approach ensures that models are developed and maintained effectively throughout their lifecycle. -![Model lifecycle management. Credit: HarvardX.](images/png/mlops_flow.png){#fig-model-lifecycle} +![Model lifecycle management. Source: HarvardX.](images/png/mlops_flow.png){#fig-model-lifecycle} ### Development and Operations Integration @@ -808,7 +822,7 @@ The ClinAIOps framework, shown in @fig-clinaiops, provides these mechanisms thro These feedback loops, which we will discuss below, help maintain clinician responsibility and control over treatment plans by reviewing AI suggestions before they impact patients. They help dynamically customize AI model behavior and outputs to each patient's changing health status. They help improve model accuracy and clinical utility over time by learning from clinician and patient responses. They facilitate shared decision-making and personalized care during patient-clinician interactions. They enable rapid optimization of therapies based on frequent patient data that clinicians cannot manually analyze. -![ClinAIOps cycle. Credit: @chen2023framework.](images/png/clinaiops.png){#fig-clinaiops} +![ClinAIOps cycle. Source: @chen2023framework.](images/png/clinaiops.png){#fig-clinaiops} ##### Patient-AI Loop @@ -847,7 +861,7 @@ In the Clinician-AI loop, the provider would receive summaries of the patient's In the Patient-Clinician loop, shown in @fig-interactive-loop, the in-person visits would focus less on collecting data or basic medication adjustments. Instead, the clinician could interpret high-level trends and patterns in the patient's continuous monitoring data and have focused discussions about diet, exercise, stress management, and other lifestyle changes to improve their blood pressure control holistically. The frequency of appointments could be dynamically optimized based on the patient's stability rather than following a fixed calendar. Since the clinician would not need to review all the granular data, they could concentrate on delivering personalized care and recommendations during visits. With continuous monitoring and AI-assisted optimization of medications between visits, the clinician-patient relationship focuses on overall wellness goals and becomes more impactful. This proactive and tailored data-driven approach can help avoid hypertension complications like stroke, heart failure, and other threats to patient health and well-being. -![ClinAIOps interactive loop. Credit: @chen2023framework.](images/png/clinaiops_loops.png){#fig-interactive-loop} +![ClinAIOps interactive loop. Source: @chen2023framework.](images/png/clinaiops_loops.png){#fig-interactive-loop} #### MLOps vs. ClinAIOps @@ -867,16 +881,25 @@ The hypertension case clearly shows the need to look beyond training and deployi @tbl-clinical_ops compares them. This table highlights how, when MLOps is implemented, we need to consider more than just ML models. -| | Traditional MLOps | ClinAIOps | -|-|-------------------|------------------| -| Focus | ML model development and deployment | Coordinating human and AI decision-making | -| Stakeholders | Data scientists, IT engineers | Patients, clinicians, AI developers | -| Feedback loops | Model retraining, monitoring | Patient-AI, clinician-AI, patient-clinician | -| Objective | Operationalize ML deployments | Optimize patient health outcomes | -| Processes | Automated pipelines and infrastructure | Integrates clinical workflows and oversight | -| Data considerations | Building training datasets | Privacy, ethics, protected health information | -| Model validation | Testing model performance metrics | Clinical evaluation of recommendations | -| Implementation | Focuses on technical integration | Aligns incentives of human stakeholders | ++------------------------+---------------------------------------------+----------------------------------------------+ +| | Traditional MLOps | ClinAIOps | ++:=======================+:============================================+:=============================================+ +| Focus | ML model development and deployment | Coordinating human and AI decision-making | ++------------------------+---------------------------------------------+----------------------------------------------+ +| Stakeholders | Data scientists, IT engineers | Patients, clinicians, AI developers | ++------------------------+---------------------------------------------+----------------------------------------------+ +| Feedback loops | Model retraining, monitoring | Patient-AI, clinician-AI, patient-clinician | ++------------------------+---------------------------------------------+----------------------------------------------+ +| Objective | Operationalize ML deployments | Optimize patient health outcomes | ++------------------------+---------------------------------------------+----------------------------------------------+ +| Processes | Automated pipelines and infrastructure | Integrates clinical workflows and oversight | ++------------------------+---------------------------------------------+----------------------------------------------+ +| Data considerations | Building training datasets | Privacy, ethics, protected health information| ++------------------------+---------------------------------------------+----------------------------------------------+ +| Model validation | Testing model performance metrics | Clinical evaluation of recommendations | ++------------------------+---------------------------------------------+----------------------------------------------+ +| Implementation | Focuses on technical integration | Aligns incentives of human stakeholders | ++------------------------+---------------------------------------------+----------------------------------------------+ : Comparison of MLOps versus AI operations for clinical use. {#@tbl-clinical_ops} @@ -975,3 +998,4 @@ In addition to exercises, we also offer a series of hands-on labs that allow stu * _Coming soon._ ::: + diff --git a/contents/optimizations/optimizations.bib b/contents/optimizations/optimizations.bib index 635a7d2f1..fb0af302f 100644 --- a/contents/optimizations/optimizations.bib +++ b/contents/optimizations/optimizations.bib @@ -1,6 +1,31 @@ %comment{This file was created with betterbib v5.0.11.} +@article{tran2022pruning, + author = {Tran, Cuong and Fioretto, Ferdinando and Kim, Jung-Eun and Naidu, Rakshit}, + title = {Pruning has a disparate impact on model accuracy}, + journal = {Adv Neural Inf Process Syst}, + volume = {35}, + pages = {17652--17664}, + year = {2022}, +} + +@inproceedings{rachwan2022winning, + author = {Rachwan, John and Z\"ugner, Daniel and Charpentier, Bertrand and Geisler, Simon and Ayle, Morgane and G\"unnemann, Stephan}, + title = {Winning the lottery ahead of time: {Efficient} early network pruning}, + booktitle = {International Conference on Machine Learning}, + pages = {18293--18309}, + year = {2022}, + organization = {PMLR}, +} + +@article{lubana2020gradient, + author = {Lubana, Ekdeep Singh and Dick, Robert P}, + title = {A gradient flow framework for analyzing network pruning}, + journal = {arXiv preprint arXiv:2009.11839}, + year = {2020}, +} + @inproceedings{yao2021hawq, author = {Yao, Zhewei and Dong, Zhen and Zheng, Zhangcheng and Gholami, Amir and Yu, Jiali and Tan, Eric and Wang, Leyuan and Huang, Qijing and Wang, Yida and Mahoney, Michael and others}, title = {Hawq-v3: {Dyadic} neural network quantization}, @@ -546,12 +571,19 @@ @article{annette2020 } @article{alexnet2012, - author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E}, + author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E.}, editor = {Pereira, F. and Burges, C.J. and Bottou, L. and Weinberger, K.Q.}, booktitle = {Advances in Neural Information Processing Systems}, - publisher = {Curran Associates, Inc.}, - title = {{ImageNet} Classification with Deep Convolutional Neural Networks}, - url = {https://proceedings.neurips.cc/paper_files/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf}, - volume = {25}, - year = {2012}, + publisher = {Association for Computing Machinery (ACM)}, + title = {{ImageNet} classification with deep convolutional neural networks}, + url = {https://doi.org/10.1145/3065386}, + volume = {60}, + year = {2017}, + doi = {10.1145/3065386}, + number = {6}, + source = {Crossref}, + journal = {Commun. ACM}, + issn = {0001-0782, 1557-7317}, + pages = {84--90}, + month = may, } diff --git a/contents/optimizations/optimizations.qmd b/contents/optimizations/optimizations.qmd index 822e1060f..ca1823644 100644 --- a/contents/optimizations/optimizations.qmd +++ b/contents/optimizations/optimizations.qmd @@ -102,7 +102,7 @@ There are several techniques for assigning these importance scores: * Activation-based pruning tracks activation values for neurons/filters over a validation dataset. Consistently low activation values suggest less relevance, warranting removal. * Taylor expansion approximates the change in loss function from removing a given weight. Weights with negligible impact on loss are prime candidates for pruning. -The idea is to measure, either directly or indirectly, the contribution of each component to the model's output. Structures with minimal influence according to the defined criteria are pruned first. This enables selective, optimized pruning that maximally compresses models while preserving predictive capacity. In general, it is important to evaluate the impact of removing particular structures on the model's output. +The idea is to measure, either directly or indirectly, the contribution of each component to the model's output. Structures with minimal influence according to the defined criteria are pruned first. This enables selective, optimized pruning that maximally compresses models while preserving predictive capacity. In general, it is important to evaluate the impact of removing particular structures on the model's output, with recent works such as [@rachwan2022winning] and [@lubana2020gradient] investigating combinations of techniques like magnitude-based pruning and gradient-based pruning. ##### Selecting a pruning strategy @@ -149,21 +149,29 @@ Unstructured pruning, while offering the potential for significant model size re @tbl-pruning_methods provides a concise comparison between structured and unstructured pruning. In this table, aspects related to the nature and architecture of the pruned model (Definition, Model Regularity, and Compression Level) are grouped together, followed by aspects related to computational considerations (Computational Efficiency and Hardware Compatibility), and ending with aspects related to the implementation and adaptation of the pruned model (Implementation Complexity and Fine-Tuning Complexity). Both pruning strategies offer unique advantages and challenges, as shown in @tbl-pruning_methods, and the selection between them should be influenced by specific project and deployment requirements. -| **Aspect** | **Structured Pruning** | **Unstructured Pruning** | -|------------------------------|------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------| -| **Definition** | Pruning entire structures (e.g., neurons, channels, layers) within the network | Pruning individual weights or neurons, resulting in sparse matrices or non-regular network structures | -| **Model Regularity** | Maintains a regular, structured network architecture | Results in irregular, sparse network architectures | -| **Compression Level** | May offer limited model compression compared to unstructured pruning | Can achieve higher model compression due to fine-grained pruning | -| **Computational Efficiency** | Typically more computationally efficient due to maintaining regular structures | Can be computationally inefficient due to sparse weight matrices, unless specialized hardware/software is used | -| **Hardware Compatibility** | Generally better compatible with various hardware due to regular structures | May require hardware that efficiently handles sparse computations to realize benefits | -| **Implementation Complexity**| Often simpler to implement and manage due to maintaining network structure | Can be complex to manage and compute due to sparse representations | -| **Fine-Tuning Complexity** | May require less complex fine-tuning strategies post-pruning | Might necessitate more complex retraining or fine-tuning strategies post-pruning | - -: Comparison of structured versus unstructured pruning. {#tbl-pruning_methods} ++------------------------------+------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+ +| Aspect | Structured Pruning | Unstructured Pruning | ++:=============================+:===========================================================================================================+:=================================================================================================================+ +| Definition | Pruning entire structures (e.g., neurons, channels, layers) within the network | Pruning individual weights or neurons, resulting in sparse matrices or non-regular network structures | ++------------------------------+------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+ +| Model Regularity | Maintains a regular, structured network architecture | Results in irregular, sparse network architectures | ++------------------------------+------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+ +| Compression Level | May offer limited model compression compared to unstructured pruning | Can achieve higher model compression due to fine-grained pruning | ++------------------------------+------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+ +| Computational Efficiency | Typically more computationally efficient due to maintaining regular structures | Can be computationally inefficient due to sparse weight matrices, unless specialized hardware/software is used | ++------------------------------+------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+ +| Hardware Compatibility | Generally better compatible with various hardware due to regular structures | May require hardware that efficiently handles sparse computations to realize benefits | ++------------------------------+------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+ +| Implementation Complexity | Often simpler to implement and manage due to maintaining network structure | Can be complex to manage and compute due to sparse representations | ++------------------------------+------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+ +| Fine-Tuning Complexity | May require less complex fine-tuning strategies post-pruning | Might necessitate more complex retraining or fine-tuning strategies post-pruning | ++------------------------------+------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+ + +: Comparison of structured versus unstructured pruning. {#tbl-pruning_methods .striped .hover} In @fig-structured-unstructured we have exapmles that illustrate the differences between unstructured and structured pruning. Observe that unstructured pruning can lead to models that no longer obey high-level structural guaruntees of their original unpruned counterparts: the left network is no longer a fully connected network after pruning. Structured pruning on the other hand maintains those invariants: in the middle, the fully connected network is pruned in a way that the pruned network is still fully connected; likewise, the CNN maintains its convolutional structure, albeit with fewer filters. -![Unstructured vs structured pruning. Credit: @qi2021efficient.](images/png/modeloptimization_pruning_comparison.png){#fig-structured-unstructured} +![Unstructured vs structured pruning. Source: @qi2021efficient.](images/png/modeloptimization_pruning_comparison.png){#fig-structured-unstructured} #### Lottery Ticket Hypothesis @@ -223,7 +231,7 @@ Unstructured pruning might introduce additional complexity in implementing pruni ##### Legal and Ethical Considerations -Last but not least, adherence to legal and ethical guidelines is paramount, especially in domains with significant consequences. Both pruning methods must undergo rigorous validation, testing, and potentially certification processes to ensure compliance with relevant regulations and standards. This is especially important in use cases like medical AI applications or autonomous driving where quality drops due to pruning like optimizations can be life threatening. +Last but not least, adherence to legal and ethical guidelines is important, especially in domains with significant consequences. Pruning methods must undergo rigorous validation, testing, and potentially certification processes to ensure compliance with relevant regulations and standards, though arguably at this time no such formal standards and best practices exist that are vetted and validated by 3rd party entities. This is particularly crucial in high-stakes applications like medical AI and autonomous driving, where quality drops due to pruning-like optimizations can be life-threatening. Moreover, ethical considerations extend beyond safety to fairness and equality; recent work by [@tran2022pruning] has revealed that pruning can disproportionately impact people of color, underscoring the need for comprehensive ethical evaluation in the pruning process. ![Sparse weight matrix.](images/jpg/modeloptimization_sprase_matrix.jpeg){#fig-sparse-matrix} @@ -255,7 +263,7 @@ Another core concept is "temperature scaling" in the softmax function. It plays These components, when adeptly configured and harmonized, enable the student model to assimilate the teacher model's knowledge, crafting a pathway towards efficient and robust smaller models that retain the predictive prowess of their larger counterparts. @fig-knowledge-distillation visualizes the training procedure of knowledge distillation. Note how the logits or soft labels of the teacher model are used to provide a distillation loss for the student model to learn from. -![Knowledge distillation training process. Credit: @intellabs2023knowledge.](images/png/modeloptimization_knowledge_distillation.png){#fig-knowledge-distillation} +![Knowledge distillation training process. Source: @intellabs2023knowledge.](images/png/modeloptimization_knowledge_distillation.png){#fig-knowledge-distillation} ##### Challenges @@ -277,7 +285,7 @@ The main advantage of low-rank matrix factorization lies in its ability to reduc @fig-matrix-factorization illustrates the decrease in parameterization enabled by low-rank matrix factorization. Observe how the matrix $M$ can be approximated by the product of matrices $L_k$ and $R_k^T$. For intuition, most fully connected layers in networks are stored as a projection matrix $M$, which requires $m \times n$ parameter to be loaded on computation. However, by decomposing and approximating it as the product of two lower rank matrices, we thus only need to store $m \times k + k\times n$ parameters in terms of storage while incurring an additional compute cost of the matrix multiplication. So long as $k < n/2$, this factorization has fewer parameters total to store while adding a computation of runtime $O(mkn)$ [@gu2023deep]. -![Low matrix factorization. Credit: [The Clever Machine.](https://dustinstansbury.github.io/theclevermachine/svd-data-compression)](images/png/modeloptimization_low_rank_matrix_factorization.png){#fig-matrix-factorization} +![Low matrix factorization. Source: [The Clever Machine.](https://dustinstansbury.github.io/theclevermachine/svd-data-compression)](images/png/modeloptimization_low_rank_matrix_factorization.png){#fig-matrix-factorization} ##### Challenges @@ -297,7 +305,7 @@ Similar to low-rank matrix factorization, more complex models may store weights The work of Tamara G. Kolda and Brett W. Bader, ["Tensor Decompositions and Applications"](https://epubs.siam.org/doi/abs/10.1137/07070111X) (2009), stands out as a seminal paper in the field of tensor decompositions. The authors provide a comprehensive overview of various tensor decomposition methods, exploring their mathematical underpinnings, algorithms, and a wide array of applications, ranging from signal processing to data mining. Of course, the reason we are discussing it is because it has huge potential for system performance improvements, particularly in the space of TinyML, where throughput and memory footprint savings are crucial to feasibility of deployments. -![Tensor decomposition. Credit: @xinyu.](images/png/modeloptimization_tensor_decomposition.png){#fig-tensor-decomposition} +![Tensor decomposition. Source: @xinyu.](images/png/modeloptimization_tensor_decomposition.png){#fig-tensor-decomposition} :::{#exr-mc .callout-caution collapse="true"} @@ -319,7 +327,7 @@ As covered in previous sections, edge devices are constrained specifically with One edge friendly architecture design is depthwise separable convolutions. Commonly used in deep learning for image processing, it consists of two distinct steps: the first is the depthwise convolution, where each input channel is convolved independently with its own set of learnable filters, as show in @fig-depthwise-convolution. This step reduces computational complexity by a significant margin compared to standard convolutions, as it drastically reduces the number of parameters and computations involved. The second step is the pointwise convolution, which combines the output of the depthwise convolution channels through a 1x1 convolution, creating inter-channel interactions. This approach offers several advantages. Pros include reduced model size, faster inference times, and often better generalization due to fewer parameters, making it suitable for mobile and embedded applications. However, depthwise separable convolutions may not capture complex spatial interactions as effectively as standard convolutions and might require more depth (layers) to achieve the same level of representational power, potentially leading to longer training times. Nonetheless, their efficiency in terms of parameters and computation makes them a popular choice in modern convolutional neural network architectures. -![Depthwise separable convolutions. Credit: @hegde2023introduction.](images/png/modeloptimization_depthwise_separable_convolution.png){#fig-depthwise-convolution} +![Depthwise separable convolutions. Source: @hegde2023introduction.](images/png/modeloptimization_depthwise_separable_convolution.png){#fig-depthwise-convolution} #### Example Model Architectures @@ -401,7 +409,7 @@ Precision, delineating the exactness with which a number is represented, bifurca **Integer:** Integer representations are made using 8, 4, and 2 bits. They are often used during the inference phase of neural networks, where the weights and activations of the model are quantized to these lower precisions. Integer representations are deterministic and offer significant speed and memory advantages over floating-point representations. For many inference tasks, especially on edge devices, the slight loss in accuracy due to quantization is often acceptable given the efficiency gains. An extreme form of integer numerics is for binary neural networks (BNNs), where weights and activations are constrained to one of two values: either +1 or -1. | **Precision** | **Pros** | **Cons** | -|---------------------------------------|------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------| +|:---------------------------------------|:------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------| | **FP32** (Floating Point 32-bit) | Standard precision used in most deep learning frameworks.
High accuracy due to ample representational capacity.
Well-suited for training. | High memory usage.
Slower inference times compared to quantized models.
Higher energy consumption. | | **FP16** (Floating Point 16-bit) | Reduces memory usage compared to FP32.
Speeds up computations on hardware that supports FP16.
Often used in mixed-precision training to balance speed and accuracy. | Lower representational capacity compared to FP32.
Risk of numerical instability in some models or layers. | | **INT8** (8-bit Integer) | Significantly reduced memory footprint compared to floating-point representations.
Faster inference if hardware supports INT8 computations.
Suitable for many post-training quantization scenarios. | Quantization can lead to some accuracy loss.
Requires careful calibration during quantization to minimize accuracy degradation. | @@ -454,13 +462,13 @@ The numeric representation casts a significant impact on the storage and computa Numerical precision directly impacts computational complexity, influencing the time and resources required to perform arithmetic operations. For example, operations using Float64 generally consume more computational resources than their Float32 or Float16 counterparts (see @fig-quantized-energy). In the realm of ML, where models might need to process millions of operations (e.g., multiplications and additions in matrix operations during forward and backward passes), even minor differences in the computational complexity per operation can aggregate into a substantial impact on training and inference times. As shown in @fig-models-speeds, quantized models can be many times faster than their unquantized versions. -![Energy use by quantized operations. Credit: Mark Horowitz, Stanford University.](images/png/efficientnumerics_horowitz.png){#fig-quantized-energy} +![Energy use by quantized operations. Source: Mark Horowitz, Stanford University.](images/png/efficientnumerics_horowitz.png){#fig-quantized-energy} ![Speed of three different models in normal and quantized form.](images/png/efficientnumerics_int8vsfloat.png){#fig-models-speeds} In addition to pure runtimes, there is also a concern over energy efficiency. Not all numerical computations are created equal from the underlying hardware standpoint. Some numerical operations are more energy efficient than others. For example, @fig-operations-energy-comparison below shows that integer addition is much more energy efficient than integer multiplication. -![Energy use by quantized operations. Credit: @isscc2014computings.](images/png/efficientnumerics_100x.png){#fig-operations-energy-comparison} +![Energy use by quantized operations. Source: @isscc2014computings.](images/png/efficientnumerics_100x.png){#fig-operations-energy-comparison} #### Hardware Compatibility @@ -537,7 +545,7 @@ In the quantized version shown in @fig-quantized-sine-wave, the continuous sine Returning to the context of Machine Learning (ML), quantization refers to the process of constraining the possible values that numerical parameters (such as weights and biases) can take to a discrete set, thereby reducing the precision of the parameters and consequently, the model's memory footprint. When properly implemented, quantization can reduce model size by up to 4x and improve inference latency and throughput by up to 2-3x. @fig-quantized-models-size illustrates the impact that quantization has on different models' sizes: for example, an Image Classification model like ResNet-v2 can be compressed from 180MB down to 45MB with 8-bit quantization. There is typically less than 1% loss in model accuracy from well tuned quantization. Accuracy can often be recovered by re-training the quantized model with quantization-aware training techniques. Therefore, this technique has emerged to be very important in deploying ML models to resource-constrained environments, such as mobile devices, IoT devices, and edge computing platforms, where computational resources (memory and processing power) are limited. -![Effect of quantization on model sizes. Credit: HarvardX.](images/png/efficientnumerics_reducedmodelsize.png){#fig-quantized-models-size} +![Effect of quantization on model sizes. Source: HarvardX.](images/png/efficientnumerics_reducedmodelsize.png){#fig-quantized-models-size} There are several dimensions to quantization such as uniformity, stochasticity (or determinism), symmetry, granularity (across layers/channels/groups or even within channels), range calibration considerations (static vs dynamic), and fine-tuning methods (QAT, PTQ, ZSQ). We examine these below. @@ -575,7 +583,7 @@ $$ Furthermore, learnable quantizers can be jointly trained with model parameters, and the quantization steps/levels are generally trained with iterative optimization or gradient descent. Additionally, clustering has been used to alleviate information loss from quantization. While capable of capturing higher levels of detail, non-uniform quantization schemes can be difficult to deploy efficiently on general computation hardware, making it less-preferred to methods which use uniform quantization. -![Quantization uniformity. Credit: @gholami2021survey.](images/png/efficientnumerics_uniformnonuniform.png){#fig-quantization-uniformity} +![Quantization uniformity. Source: @gholami2021survey.](images/png/efficientnumerics_uniformnonuniform.png){#fig-quantization-uniformity} #### Stochastic Quantization @@ -599,7 +607,7 @@ There are many calibration methods but a few commonly used include: * Entropy: Use KL divergence to minimize information loss between the original floating-point values and values that could be represented by the quantized format. This is the default method used by TensorRT. * Percentile: Set the range to a percentile of the distribution of absolute values seen during calibration. For example, 99% calibration would clip 1% of the largest magnitude values. -![Input activations to layer 3 in ResNet50. Credit: @@wu2020integer.](images/png/efficientnumerics_calibrationcopy.png){#fig-resnet-activations-histogram} +![Input activations to layer 3 in ResNet50. Source: @@wu2020integer.](images/png/efficientnumerics_calibrationcopy.png){#fig-resnet-activations-histogram} Importantly, the quality of calibration can make a difference between a quantized model that retains most of its accuracy and one that degrades significantly. Hence, it's an essential step in the quantization process. When choosing a calibration range, there are two types: symmetric and asymmetric. @@ -614,13 +622,13 @@ Symmetric clipping ranges are the most widely adopted in practice as they have t Asymmetric quantization maps real values to an asymmetrical clipping range that isn't necessarily centered around 0, as shown in @fig-quantization-symmetry on the right. It involves choosing a range [$\alpha$, $\beta$] where $\alpha \neq -\beta$. For example, selecting a range based on the minimum and maximum real values, or where $\alpha = r_{min}$ and $\beta = r_{max}$, creates an asymmetric range. Typically, asymmetric quantization produces tighter clipping ranges compared to symmetric quantization, which is important when target weights and activations are imbalanced, e.g., the activation after the ReLU always has non-negative values. Despite producing tighter clipping ranges, asymmetric quantization is less preferred to symmetric quantization as it doesn't always zero out the real value zero. -![Quantization (a)symmetry. Credit: @gholami2021survey.](images/png/efficientnumerics_symmetry.png){#fig-quantization-symmetry} +![Quantization (a)symmetry. Source: @gholami2021survey.](images/png/efficientnumerics_symmetry.png){#fig-quantization-symmetry} #### Granularity Upon deciding the type of clipping range, it is essential to tighten the range to allow a model to retain as much of its accuracy as possible. We'll be taking a look at convolutional neural networks as our way of exploring methods that fine tune the granularity of clipping ranges for quantization. The input activation of a layer in our CNN undergoes convolution with multiple convolutional filters. Every convolutional filter can possess a unique range of values. Notice how in @fig-quantization-granularity, the range for Filter1 is much smaller than that for Filter 3. Consequently, one distinguishing feature of quantization approaches is the precision with which the clipping range [α,β] is determined for the weights. -![Quantization granularity: variable ranges. Credit: @gholami2021survey.](images/png/efficientnumerics_granularity.png){#fig-quantization-granularity} +![Quantization granularity: variable ranges. Source: @gholami2021survey.](images/png/efficientnumerics_granularity.png){#fig-quantization-granularity} 1. Layerwise Quantization: This approach determines the clipping range by considering all of the weights in the convolutional filters of a layer. Then, the same clipping range is used for all convolutional filters. It's the simplest to implement, and, as such, it often results in sub-optimal accuracy due the wide variety of differing ranges between filters. For example, a convolutional kernel with a narrower range of parameters loses its quantization resolution due to another kernel in the same layer having a wider range. 2. Groupwise Quantization: This approach groups different channels inside a layer to calculate the clipping range. This method can be helpful when the distribution of parameters across a single convolution/activation varies a lot. In practice, this method was useful in Q-BERT [@sheng2019qbert] for quantizing Transformer [@vaswani2017attention] models that consist of fully-connected attention layers. The downside with this approach comes with the extra cost of accounting for different scaling factors. @@ -648,23 +656,23 @@ The two prevailing techniques for quantizing models are Post Training Quantizati In PTQ, a pretrained model undergoes a calibration process, as shown in @fig-PTQ-diagram. Calibration involves using a separate dataset known as calibration data, a specific subset of the training data reserved for quantization to help find the appropriate clipping ranges and scaling factors. -![Post-Training Quantization and calibration. Credit: @gholami2021survey.](images/png/efficientnumerics_PTQ.png){#fig-PTQ-diagram} +![Post-Training Quantization and calibration. Source: @gholami2021survey.](images/png/efficientnumerics_PTQ.png){#fig-PTQ-diagram} **Quantization-Aware Training** - Quantization-aware training (QAT) is a fine-tuning of the PTQ model. The model is trained aware of quantization, allowing it to adjust for quantization effects. This produces better accuracy with quantized inference. Quantizing a trained neural network model with methods such as PTQ introduces perturbations that can deviate the model from its original convergence point. For instance, Krishnamoorthi showed that even with per-channel quantization, networks like MobileNet do not reach baseline accuracy with int8 Post Training Quantization (PTQ) and require Quantization-Aware Training (QAT) [[@krishnamoorthi2018quantizing]](https://arxiv.org/abs/1806.08342).To address this, QAT retrains the model with quantized parameters, employing forward and backward passes in floating point but quantizing parameters after each gradient update. Handling the non-differentiable quantization operator is crucial; a widely used method is the Straight Through Estimator (STE), approximating the rounding operation as an identity function. While other methods and variations exist, STE remains the most commonly used due to its practical effectiveness. In QAT, a pretrained model is quantized and then finetuned using training data to adjust parameters and recover accuracy degradation, as shown in @fig-QAT-diagram. The calibration process is often conducted in parallel with the finetuning process for QAT. -![Quantization-Aware Training. Credit: @gholami2021survey.](images/png/efficientnumerics_QAT.png){#fig-QAT-diagram} +![Quantization-Aware Training. Source: @gholami2021survey.](images/png/efficientnumerics_QAT.png){#fig-QAT-diagram} Quantization-Aware Training serves as a natural extension of Post-Training Quantization. Following the initial quantization performed by PTQ, QAT is used to further refine and fine-tune the quantized parameters - see how in @fig-QAT-PTQ-relation, the PTQ model undergoes an additional step, QAT. It involves a retraining process where the model is exposed to additional training iterations using the original data. This dynamic training approach allows the model to adapt and adjust its parameters, compensating for the performance degradation caused by quantization. -![PTQ and QAT. Credit: @ultimate.](images/png/efficientnumerics_PTQQAT.png){#fig-QAT-PTQ-relation} +![PTQ and QAT. Source: @ultimate.](images/png/efficientnumerics_PTQQAT.png){#fig-QAT-PTQ-relation} @fig-quantization-methods-summary shows the relative accuracy of different models after PTQ and QAT. In almost all cases, QAT yields a better accuracy than PTQ. Consider for example EfficientNet b0. After PTQ, the accuracy drops from 76.85% to 72.06%. But when we apply QAT, the accuracy rebounds to 76.95% (with even a slight improvement over the original accuracy). -![Relative accuracies of PTQ and QAT. Credit: @wu2020integer.](images/png/efficientnumerics_PTQQATsummary.png){#fig-quantization-methods-summary} +![Relative accuracies of PTQ and QAT. Source: @wu2020integer.](images/png/efficientnumerics_PTQQATsummary.png){#fig-quantization-methods-summary} -| **Feature/Technique** | **Post Training Quantization** | **Quantization-Aware Training** | **Dynamic Quantization** | -|------------------------------|------------------------------|------------------------------|------------------------------| +| **Aspect** | **Post Training Quantization** | **Quantization-Aware Training** | **Dynamic Quantization** | +|:------------------------------|:------------------------------|:------------------------------|:------------------------------| | **Pros** | | | | | Simplicity | ✓ | ✗ | ✗ | | Accuracy Preservation | ✗ | ✓ | ✓ | @@ -683,7 +691,7 @@ Quantization-Aware Training serves as a natural extension of Post-Training Quant Weight Quantization: Involves converting the continuous or high-precision weights of a model to lower-precision, such as converting Float32 weights to quantized INT8 (integer) weights - in @fig-weight-activations-quantization, weight quantization is taking place in the second step (red squares) when we multiply the inputs. This reduces the model size, thereby reducing the memory required to store the model and the computational resources needed to perform inference. For example, consider a weight matrix in a neural network layer with Float32 weights as [0.215, -1.432, 0.902, ...]. Through weight quantization, these might be mapped to INT8 values like [27, -183, 115, ...], significantly reducing the memory required to store them. -![Weight and activation quantization. Credit: HarvardX.](images/png/efficientnumerics_weightsactivations.png){#fig-weight-activations-quantization} +![Weight and activation quantization. Source: HarvardX.](images/png/efficientnumerics_weightsactivations.png){#fig-weight-activations-quantization} Activation Quantization: Involves quantizing the activation values (outputs of layers) during model inference. This can reduce the computational resources required during inference, but it introduces additional challenges in maintaining model accuracy due to the reduced precision of intermediate computations. For example, in a convolutional neural network (CNN), the activation maps (feature maps) produced by convolutional layers, originally in Float32, might be quantized to INT8 during inference to accelerate computation, especially on hardware optimized for integer arithmetic. Additionally, recent work has explored the use of Activation-aware Weight Quantization for LLM compression and acceleration, which involves protecting only 1% of the most important salient weights by observing the activations not weights [[@lin2023awq]](https://arxiv.org/pdf/2306.00978.pdf). @@ -693,11 +701,11 @@ Quantization invariably introduces a trade-off between model size/performance an Model Size: A model with weights represented as Float32 being quantized to INT8 can theoretically reduce the model size by a factor of 4, enabling it to be deployed on devices with limited memory. The model size of large language models is developing at a faster pace than the GPU memory in recent years, leading to a big gap between the supply and demand for memory. @fig-model-size-pace illustrates the recent trend of the widening gap between model size (red line) and acceleartor memory (yellow line). Quantization and model compression techniques can help bridge the gap -![Model size vs. accelerator memory. Credit: @xiao2022smoothquant.](images/png/efficientnumerics_modelsizes.png){#fig-model-size-pace} +![Model size vs. accelerator memory. Source: @xiao2022smoothquant.](images/png/efficientnumerics_modelsizes.png){#fig-model-size-pace} Inference Speed: Quantization can also accelerate inference, as lower-precision arithmetic is computationally less expensive. For example, certain hardware accelerators, like Google's Edge TPU, are optimized for INT8 arithmetic and can perform inference significantly faster with INT8 quantized models compared to their floating-point counterparts. The reduction in memory from quantization helps reduce the amount of data transmission, saving up memory and speeding the process. @fig-nvidia-turing compares the increase in throughput and the reduction in bandwidth memory for different data type on the NVIDIA Turing GPU. -![Benefits of lower precision data types. Credit: @wu2020integer.](images/png/efficientnumerics_benefitsofprecision.png){#fig-nvidia-turing} +![Benefits of lower precision data types. Source: @wu2020integer.](images/png/efficientnumerics_benefitsofprecision.png){#fig-nvidia-turing} Accuracy: The reduction in numerical precision post-quantization can lead to a degradation in model accuracy, which might be acceptable in certain applications (e.g., image classification) but not in others (e.g., medical diagnosis). Therefore, post-quantization, the model typically requires re-calibration or fine-tuning to mitigate accuracy loss. Furthermore, recent work has explored the use of [Activation-aware Weight Quantization [@lin2023awq]](https://arxiv.org/pdf/2306.00978.pdf) which is based on the observation that protecting only 1% of salient weights can greatly reduce quantization error. @@ -705,7 +713,7 @@ Accuracy: The reduction in numerical precision post-quantization can lead to a d Pruning and quantization work well together, and it's been found that pruning doesn't hinder quantization. In fact, pruning can help reduce quantization error. Intuitively, this is due to pruning reducing the number of weights to quantize, thereby reducing the accumulated error from quantization. For example, an unpruned AlexNet has 60 million weights to quantize whereas a pruned AlexNet only has 6.7 million weights to quantize. This significant drop in weights helps reduce the error between quantizing the unpruned AlexNet vs. the pruned AlexNet. Furthermore, recent work has found that quantization-aware pruning generates more computationally efficient models than either pruning or quantization alone; It typically performs similar to or better in terms of computational efficiency compared to other neural architecture search techniques like Bayesian optimization [[@hawks2021psandqs]](https://arxiv.org/pdf/2102.11289.pdf). -![Accuracy vs. compression rate under different compression methods. Credit: @han2015deep.](images/png/efficientnumerics_qp1.png){#fig-compression-methods} +![Accuracy vs. compression rate under different compression methods. Source: @han2015deep.](images/png/efficientnumerics_qp1.png){#fig-compression-methods} ### Edge-aware Quantization @@ -766,7 +774,7 @@ First, TinyNAS generate multiple search spaces by varying the input resolution o Then, TinyNAS performs a search operation on the chosen space to find the optimal architecture for the specific constraints of the microcontroller. [@lin2020mcunet] -![Search spaces accuracy. Credit: @lin2020mcunet.](images/png/modeloptimization_TinyNAS.png){#fig-search-space-flops} +![Search spaces accuracy. Source: @lin2020mcunet.](images/png/modeloptimization_TinyNAS.png){#fig-search-space-flops} #### Topology-Aware NAS @@ -806,7 +814,7 @@ This is one example of Algorithm-Hardware Co-design. CiM is a computing paradigm Through algorithm-hardware co-design, the algorithms can be optimized to leverage the unique characteristics of CiM architectures, and conversely, the CiM hardware can be customized or configured to better support the computational requirements and characteristics of the algorithms. This is achieved by using the analog properties of memory cells, such as addition and multiplication in DRAM. [@zhou2021analognets] -![CiM for keyword spotting. Credit: @zhou2021analognets.](images/png/modeloptimization_CiM.png){#fig-computing-memory} +![CiM for keyword spotting. Source: @zhou2021analognets.](images/png/modeloptimization_CiM.png){#fig-computing-memory} ### Memory Access Optimization @@ -820,7 +828,7 @@ Pruning is a fundamental approach to compress models to make them compatible wit Optimization Frameworks have been introduced to exploit the specific capabilities of the hardware to accelerate the software. One example of such a framework is hls4ml - @fig-hls4ml-workflow provides an overview of the framework's workflow. This open-source software-hardware co-design workflow aids in interpreting and translating machine learning algorithms for implementation with both FPGA and ASIC technologies. Features such as network optimization, new Python APIs, quantization-aware pruning, and end-to-end FPGA workflows are embedded into the hls4ml framework, leveraging parallel processing units, memory hierarchies, and specialized instruction sets to optimize models for edge hardware. Moreover, hls4ml is capable of translating machine learning algorithms directly into FPGA firmware. -![hls4ml framework workflow. Credit: @fahim2021hls4ml.](images/png/modeloptimization_hls4ml.png){#fig-hls4ml-workflow} +![hls4ml framework workflow. Source: @fahim2021hls4ml.](images/png/modeloptimization_hls4ml.png){#fig-hls4ml-workflow} One other framework for FPGAs that focuses on a holistic approach is CFU Playground [@prakash2022cfu] @@ -828,14 +836,14 @@ One other framework for FPGAs that focuses on a holistic approach is CFU Playgro In a contrasting approach, hardware can be custom-designed around software requirements to optimize the performance for a specific application. This paradigm creates specialized hardware to better adapt to the specifics of the software, thus reducing computational overhead and improving operational efficiency. One example of this approach is a voice-recognition application by [@kwon2021hardwaresoftware]. The paper proposes a structure wherein preprocessing operations, traditionally handled by software, are allocated to custom-designed hardware. This technique was achieved by introducing resistor-transistor logic to an inter-integrated circuit sound module for windowing and audio raw data acquisition in the voice-recognition application. Consequently, this offloading of preprocessing operations led to a reduction in computational load on the software, showcasing a practical application of building hardware around software to enhance the efficiency and performance. -![Delegating data processing to an FPGA. Credit: @kwon2021hardwaresoftware.](images/png/modeloptimization_preprocessor.png){#fig-fpga-preprocessing} +![Delegating data processing to an FPGA. Source: @kwon2021hardwaresoftware.](images/png/modeloptimization_preprocessor.png){#fig-fpga-preprocessing} #### SplitNets SplitNets were introduced in the context of Head-Mounted systems. They distribute the Deep Neural Networks (DNNs) workload among camera sensors and an aggregator. This is particularly compelling the in context of TinyML. The SplitNet framework is a split-aware NAS to find the optimal neural network architecture to achieve good accuracy, split the model among the sensors and the aggregator, and minimize the communication between the sensors and the aggregator. @fig-splitnet-performance demonstrates how SplitNets (in red) achieves higher accuracy for lower latency (running on ImageNet) than different approaches, such as running the DNN on-sensor (All-on-sensor; in green) or on mobile (All-on-aggregator; in blue). Minimal communication is important in TinyML where memory is highly constrained, this way the sensors conduct some of the processing on their chips and then they send only the necessary information to the aggregator. When testing on ImageNet, SplitNets were able to reduce the latency by one order of magnitude on head-mounted devices. This can be helpful when the sensor has its own chip. [@dong2022splitnets] -![SplitNets vs other approaches. Credit: @dong2022splitnets.](images/png/modeloptimization_SplitNets.png){#fig-splitnet-performance} +![SplitNets vs other approaches. Source: @dong2022splitnets.](images/png/modeloptimization_SplitNets.png){#fig-splitnet-performance} #### Hardware Specific Data Augmentation @@ -867,7 +875,7 @@ Automated optimization tools provided by frameworks can analyze models and autom * [Pruning](https://www.tensorflow.org/model_optimization/guide/pruning/pruning_with_keras) - Automatically removes unnecessary connections in a model based on analysis of weight importance. Can prune entire filters in convolutional layers or attention heads in transformers. Handles iterative re-training to recover any accuracy loss. * [GraphOptimizer](https://www.tensorflow.org/guide/graph_optimization) - Applies graph optimizations like operator fusion to consolidate operations and reduce execution latency, especially for inference. In @fig-graph-optimizer, you can see the original (Source Graph) on the left, and how its operations are transformed (consolidated) on the right. Notice how Block1 in Source Graph has 3 separate steps (Convolution, BiasAdd, and Activation), which are then consolidated together in Block1 on Optimized Graph. -![GraphOptimizer. Credit: @annette2020.](./images/png/source_opt.png){#fig-graph-optimizer} +![GraphOptimizer. Source: @annette2020.](./images/png/source_opt.png){#fig-graph-optimizer} These automated modules only require the user to provide the original floating point model, and handle the end-to-end optimization pipeline including any re-training to regain accuracy. Other frameworks like PyTorch also offer increasing automation support, for example through torch.quantization.quantize\_dynamic. Automated optimization makes efficient ML accessible to practitioners without optimization expertise. @@ -897,7 +905,7 @@ Implementing model optimization techniques without visibility into the effects o For example, consider sparsity optimizations. Sparsity visualization tools can provide critical insights into pruned models by mapping out exactly which weights have been removed. For example, sparsity heat maps can use color gradients to indicate the percentage of weights pruned in each layer of a neural network. Layers with higher percentages pruned appear darker (see @fig-sprase-heat-map). This identifies which layers have been simplified the most by pruning ([Souza 2020](https://www.numenta.com/blog/2020/10/30/case-for-sparsity-in-neural-networks-part-2-dynamic-sparsity/)). -![Sparse network heat map. Credit: [Numenta](https://www.numenta.com/blog/2020/10/30/case-for-sparsity-in-neural-networks-part-2-dynamic-sparsity/).](https://www.numenta.com/wp-content/uploads/2020/10/Picture1.png){#fig-sprase-heat-map} +![Sparse network heat map. Source: [Numenta](https://www.numenta.com/blog/2020/10/30/case-for-sparsity-in-neural-networks-part-2-dynamic-sparsity/).](https://www.numenta.com/wp-content/uploads/2020/10/Picture1.png){#fig-sprase-heat-map} Trend plots can also track sparsity over successive pruning rounds - they may show initial rapid pruning followed by more gradual incremental increases. Tracking the current global sparsity along with statistics like average, minimum, and maximum sparsity per-layer in tables or plots provides an overview of the model composition. For a sample convolutional network, these tools could reveal that the first convolution layer is pruned 20% while the final classifier layer is pruned 70% given its redundancy. The global model sparsity may increase from 10% after initial pruning to 40% after five rounds. @@ -909,11 +917,11 @@ Sparsity visualization turns pruning into a transparent technique instead of a b Converting models to lower numeric precisions through quantization introduces errors that can impact model accuracy if not properly tracked and addressed. Visualizing quantization error distributions provides valuable insights into the effects of reduced precision numerics applied to different parts of a model. For this, histograms of the quantization errors for weights and activations can be generated. These histograms can reveal the shape of the error distribution - whether they resemble a Gaussian distribution or contain significant outliers and spikes. @fig-quantization-error shows the distributions of different quantization methods. Large outliers may indicate issues with particular layers handling the quantization. Comparing the histograms across layers highlights any problem areas standing out with abnormally high errors. -![Quantization errors. Credit: @kuzmin2022fp8.](images/png/modeloptimization_quant_hist.png){#fig-quantization-error} +![Quantization errors. Source: @kuzmin2022fp8.](images/png/modeloptimization_quant_hist.png){#fig-quantization-error} Activation visualizations are also important to detect overflow issues. By color mapping the activations before and after quantization, any values pushed outside the intended ranges become visible. This reveals saturation and truncation issues that could skew the information flowing through the model. Detecting these errors allows recalibrating activations to prevent loss of information ([Mandal 2022](https://medium.com/exemplifyml-ai/visualizing-neural-network-activation-a27caa451ff)). @fig-color-mapping is a color mapping of the AlexNet convolutional kernels. -![Color mapping of activations. Credit: @alexnet2012.](https://compsci697l.github.io/assets/cnnvis/filt1.jpeg){#fig-color-mapping} +![Color mapping of activations. Source: @alexnet2012.](https://compsci697l.github.io/assets/cnnvis/filt1.jpeg){#fig-color-mapping} Other techniques, such as tracking the overall mean square quantization error at each step of the quantization-aware training process identifies fluctuations and divergences. Sudden spikes in the tracking plot may indicate points where quantization is disrupting the model training. Monitoring this metric builds intuition on model behavior under quantization. Together these techniques turn quantization into a transparent process. The empirical insights enable practitioners to properly assess quantization effects. They pinpoint areas of the model architecture or training process to recalibrate based on observed quantization issues. This helps achieve numerically stable and accurate quantized models. @@ -929,7 +937,7 @@ TensorFlow Lite - TensorFlow's platform to convert models to a lightweight forma ONNX Runtime - Performs model conversion and inference for models in the open ONNX model format. Provides optimized kernels, supports hardware accelerators like GPUs, and cross-platform deployment from cloud to edge. Allows framework-agnostic deployment. @fig-interop is an ONNX interoperability map, including major popular frameworks. -![Interoperablily of ONNX. Credit: [TowardsDataScience](https://towardsdatascience.com/onnx-preventing-framework-lock-in-9a798fb34c92).](https://miro.medium.com/v2/resize:fit:1400/1*3N6uPaLNEYDjtWBW1vdNoQ.jpeg){#fig-interop} +![Interoperablily of ONNX. Source: [TowardsDataScience](https://towardsdatascience.com/onnx-preventing-framework-lock-in-9a798fb34c92).](https://miro.medium.com/v2/resize:fit:1400/1*3N6uPaLNEYDjtWBW1vdNoQ.jpeg){#fig-interop} PyTorch Mobile - Enables PyTorch models to be run on iOS and Android by converting to mobile-optimized representations. Provides efficient mobile implementations of ops like convolution and special functions optimized for mobile hardware. @@ -1009,3 +1017,4 @@ In addition to exercises, we also offer a series of hands-on labs that allow stu * *Coming soon.* ::: + diff --git a/contents/privacy_security/privacy_security.qmd b/contents/privacy_security/privacy_security.qmd index eb079e2f3..b85b7b4e4 100644 --- a/contents/privacy_security/privacy_security.qmd +++ b/contents/privacy_security/privacy_security.qmd @@ -88,7 +88,7 @@ The Jeep Cherokee hack was a groundbreaking event demonstrating the risks inhere # Jeep Cherokee Hack -{{< video }}> +{{< video https://www.youtube.com/watch?v=MK0SrxBC1xs&ab_channel=WIRED >}} ::: @@ -100,9 +100,9 @@ The Mirai botnet involved the infection of networked devices such as digital cam :::{#vid-mirai .callout-important} -# Jeep Cherokee Hack +# Mirai Botnet -{{< video >}} +{{< video https://www.youtube.com/watch?v=1pywzRTJDaY >}} ::: @@ -235,7 +235,7 @@ On the other hand, this tool can be used maliciously and can affect legitimate a @fig-poisoning demonstrates the effects of different levels of data poisoning (50 samples, 100 samples, and 300 samples of poisoned images) on generating images in different categories. Notice how the images start deforming and deviating from the desired category. For example, after 300 poison samples, a car prompt generates a cow. -![Data poisoning. Credit: @shan2023prompt.](images/png/image14.png){#fig-poisoning} +![Data poisoning. Source: @shan2023prompt.](images/png/image14.png){#fig-poisoning} ### Adversarial Attacks @@ -275,15 +275,23 @@ Advancing to side-channel attacks next will show the increasing complexity, as t @tbl-threat_types overview table summarizing the topics: -| Threat Type | Description | Relevance to Embedded ML Hardware Security | -| ----------------------- | -------------------------------------------------------------------------------------------------- | ------------------------------------------------ | -| Hardware Bugs | Intrinsic flaws in hardware designs that can compromise system integrity. | Foundation of hardware vulnerability. | -| Physical Attacks | Direct exploitation of hardware through physical access or manipulation. | Basic and overt threat model. | -| Fault-injection Attacks | Induction of faults to cause errors in hardware operation, leading to potential system compromise. | Systematic manipulation leading to failure. | -| Side-Channel Attacks | Exploitation of leaked information from hardware operation to extract sensitive data. | Indirect attack via environmental observation. | -| Leaky Interfaces | Vulnerabilities arising from interfaces that expose data unintentionally. | Data exposure through communication channels. | -| Counterfeit Hardware | Use of unauthorized hardware components that may have security flaws. | Compounded vulnerability issues. | -| Supply Chain Risks | Risks introduced through the hardware lifecycle, from production to deployment. | Cumulative and multifaceted security challenges. | ++-------------------------+--------------------------------------------------------------------------------------------------+------------------------------------------------+ +| Threat Type | Description | Relevance to Embedded ML Hardware Security | ++:========================+:=================================================================================================+:===============================================+ +| Hardware Bugs | Intrinsic flaws in hardware designs that can compromise system integrity. | Foundation of hardware vulnerability. | ++-------------------------+--------------------------------------------------------------------------------------------------+------------------------------------------------+ +| Physical Attacks | Direct exploitation of hardware through physical access or manipulation. | Basic and overt threat model. | ++-------------------------+--------------------------------------------------------------------------------------------------+------------------------------------------------+ +| Fault-injection Attacks | Induction of faults to cause errors in hardware operation, leading to potential system crashes. | Systematic manipulation leading to failure. | ++-------------------------+--------------------------------------------------------------------------------------------------+------------------------------------------------+ +| Side-Channel Attacks | Exploitation of leaked information from hardware operation to extract sensitive data. | Indirect attack via environmental observation. | ++-------------------------+--------------------------------------------------------------------------------------------------+------------------------------------------------+ +| Leaky Interfaces | Vulnerabilities arising from interfaces that expose data unintentionally. | Data exposure through communication channels. | ++-------------------------+--------------------------------------------------------------------------------------------------+------------------------------------------------+ +| Counterfeit Hardware | Use of unauthorized hardware components that may have security flaws. | Compounded vulnerability issues. | ++-------------------------+--------------------------------------------------------------------------------------------------+------------------------------------------------+ +| Supply Chain Risks | Risks introduced through the hardware lifecycle, from production to deployment. | Cumulative & multifaceted security challenges. | ++-------------------------+--------------------------------------------------------------------------------------------------+------------------------------------------------+ : Threat types on hardware security. {#tbl-threat_types} @@ -331,7 +339,7 @@ For ML systems, consequences include impaired model accuracy, denial of service, For example, in [@breier2018deeplaser], the authors successfully injected a fault attack into a deep neural network deployed on a microcontroller. They used a laser to heat specific transistors, forcing them to switch states. In one instance, they used this method to attack a ReLU activation function, resulting in the function always outputting a value of 0, regardless of the input. In the assembly code in @fig-injection, the attack caused the executing program to always skip the `jmp` end instruction on line 6. This means that `HiddenLayerOutput[i]` is always set to 0, overwriting any values written to it on lines 4 and 5. As a result, the targeted neurons are rendered inactive, resulting in misclassifications. -![Fault-injection demonstrated with assembly code. Credit: @breier2018deeplaser.](images/png/image3.png){#fig-injection} +![Fault-injection demonstrated with assembly code. Source: @breier2018deeplaser.](images/png/image3.png){#fig-injection} An attacker's strategy could be to infer information about the activation functions using side-channel attacks (discussed next). Then, the attacker could attempt to target multiple activation function computations by randomly injecting faults into the layers as close to the output layer as possible, increasing the likelihood and impact of the attack. @@ -359,15 +367,15 @@ Below is a simplified visualization of how analyzing the power consumption patte First, consider the power analysis of the device's operations after entering a correct password in the first picture in @fig-encryption. The dense blue graph outputs the encryption device's voltage measurement. What matters here is the comparison between the different analysis charts rather than the specific details of what is going on in each scenario. -![Power analysis of an encryption device with a correct password. Credit: [Colin O'Flynn.](https://www.youtube.com/watch?v=2iDLfuEBcs8)](images/png/image5.png){#fig-encryption} +![Power analysis of an encryption device with a correct password. Source: [Colin O'Flynn.](https://www.youtube.com/watch?v=2iDLfuEBcs8)](images/png/image5.png){#fig-encryption} Let's look at the power analysis chart when we enter an incorrect password in @fig-encryption2. The first three bytes of the password are correct. As a result, we can see that the voltage patterns are very similar or identical between the two charts, up to and including the fourth byte. After the device processes the fourth byte, it determines a mismatch between the secret key and the attempted input. We notice a change in the pattern at the transition point between the fourth and fifth bytes: the voltage has gone up (the current has gone down) because the device has stopped processing the rest of the input. -![Power analysis of an encryption device with a (partially) wrong password. Credit: [Colin O'Flynn.](https://www.youtube.com/watch?v=2iDLfuEBcs8)](images/png/image16.png){#fig-encryption2} +![Power analysis of an encryption device with a (partially) wrong password. Source: [Colin O'Flynn.](https://www.youtube.com/watch?v=2iDLfuEBcs8)](images/png/image16.png){#fig-encryption2} @fig-encryption3 describes another chart of a completely wrong password. After the device finishes processing the first byte, it determines that it is incorrect and stops further processing - the voltage goes up and the current down. -![Power analysis of an encryption device with a wrong password. Credit: [Colin O'Flynn.](https://www.youtube.com/watch?v=2iDLfuEBcs8)](images/png/image15.png){#fig-encryption3} +![Power analysis of an encryption device with a wrong password. Source: [Colin O'Flynn.](https://www.youtube.com/watch?v=2iDLfuEBcs8)](images/png/image15.png){#fig-encryption3} The example above shows how we can infer information about the encryption process and the secret key by analyzing different inputs and trying to 'eavesdrop' on the device's operations on each input byte. For a more detailed explanation, watch @vid-powerattack below. @@ -375,7 +383,7 @@ The example above shows how we can infer information about the encryption proces # Power Attack -{{< video >}} +{{< video https://www.youtube.com/watch?v=2iDLfuEBcs8 >}} ::: @@ -505,7 +513,7 @@ Here are some examples of TEEs that provide hardware-based security for sensitiv @fig-enclave is a diagram demonstrating a secure enclave isolated from the main processor to provide an extra layer of security. The secure enclave has a boot ROM to establish a hardware root of trust, an AES engine for efficient and secure cryptographic operations, and protected memory. It also has a mechanism to store information securely on attached storage separate from the NAND flash storage used by the application processor and operating system. This design keeps sensitive user data secure even when the Application Processor kernel becomes compromised. -![System-on-chip secure enclave. Credit: [Apple.](https://support.apple.com/guide/security/secure-enclave-sec59b0b31ff/web)](images/png/image1.png){#fig-enclave} +![System-on-chip secure enclave. Source: [Apple.](https://support.apple.com/guide/security/secure-enclave-sec59b0b31ff/web)](images/png/image1.png){#fig-enclave} #### Tradeoffs @@ -551,7 +559,7 @@ Secure Boot helps protect embedded ML hardware in several ways: TEEs benefit from Secure Boot in multiple ways. @fig-secure-boot illustrates a flow diagram of a trusted embedded system. For instance, during initial validation, Secure Boot ensures that the code running inside the TEE is the correct and untampered version approved by the device manufacturer. It can ensure resilience against tampering by verifying the digital signatures of the firmware and other critical components; Secure Boot prevents unauthorized modifications that could undermine the TEE's security properties. Secure Boot establishes a foundation of trust upon which the TEE can securely operate, enabling secure operations such as cryptographic key management, secure processing, and sensitive data handling. -![Secure Boot flow. Credit: @Rashmi2018Secure.](images/png/image4.png){#fig-secure-boot} +![Secure Boot flow. Source: @Rashmi2018Secure.](images/png/image4.png){#fig-secure-boot} #### Case Study: Apple's Face ID @@ -631,7 +639,7 @@ HSMs involve several tradeoffs for embedded ML. These tradeoffs are similar to T #### About -Physical Unclonable Functions (PUFs) provide a hardware-intrinsic means for cryptographic key generation and device authentication by harnessing the inherent manufacturing variability in semiconductor components. During fabrication, random physical factors such as doping variations, line edge roughness, and dielectric thickness result in microscale differences between semiconductors, even when produced from the same masks. These create detectable timing and power variances that act as a \"fingerprint" unique to each chip. PUFs exploit this phenomenon by incorporating integrated circuits to amplify minute timing or power differences into measurable digital outputs. +Physical Unclonable Functions (PUFs) provide a hardware-intrinsic means for cryptographic key generation and device authentication by harnessing the inherent manufacturing variability in semiconductor components. During fabrication, random physical factors such as doping variations, line edge roughness, and dielectric thickness result in microscale differences between semiconductors, even when produced from the same masks. These create detectable timing and power variances that act as a "fingerprint" unique to each chip. PUFs exploit this phenomenon by incorporating integrated circuits to amplify minute timing or power differences into measurable digital outputs. When stimulated with an input challenge, the PUF circuit produces an output response based on the device's intrinsic physical characteristics. Due to their physical uniqueness, the same challenge will yield a different response on other devices. This challenge-response mechanism can be used to generate keys securely and identifiers tied to the specific hardware, perform device authentication, or securely store secrets. For example, a key derived from a PUF will only work on that device and cannot be cloned or extracted even with physical access or full reverse engineering [@Gao2020Physical]. @@ -651,11 +659,11 @@ PUFs enable all this security through their challenge-response behavior's inhere #### Mechanics -The working principle behind PUFs, shown in @fig-pfu, involves generating a \"challenge-response" pair, where a specific input (the challenge) to the PUF circuit results in an output (the response) that is determined by the unique physical properties of that circuit. This process can be likened to a fingerprinting mechanism for electronic devices. Devices that utilize ML for processing sensor data can employ PUFs to secure communication between devices and prevent the execution of ML models on counterfeit hardware. +The working principle behind PUFs, shown in @fig-pfu, involves generating a "challenge-response" pair, where a specific input (the challenge) to the PUF circuit results in an output (the response) that is determined by the unique physical properties of that circuit. This process can be likened to a fingerprinting mechanism for electronic devices. Devices that utilize ML for processing sensor data can employ PUFs to secure communication between devices and prevent the execution of ML models on counterfeit hardware. @fig-pfu illustrates an overview of the PUF basics: a) PUF can be thought of as a unique fingerprint for each piece of hardware; b) an Optical PUF is a special plastic token that is illuminated, creating a unique speckle pattern that is then recorded; c) in an APUF (Arbiter PUF), challenge bits select different paths, and a judge decides which one is faster, giving a response of '1' or '0'; d) in an SRAM PUF, the response is determined by the mismatch in the threshold voltage of transistors, where certain conditions lead to a preferred response of '1'. Each of these methods uses specific characteristics of the hardware to create a unique identifier. -![PUF basics. Credit: @Gao2020Physical.](images/png/image2.png){#fig-pfu} +![PUF basics. Source: @Gao2020Physical.](images/png/image2.png){#fig-pfu} #### Challenges @@ -759,11 +767,11 @@ Privacy and security concerns have also risen with the public use of generative While ChatGPT has instituted protections to prevent people from accessing private and ethically questionable information, several individuals have successfully bypassed these protections through prompt injection and other security attacks. As demonstrated in @fig-role-play, users can bypass ChatGPT protections to mimic the tone of a "deceased grandmother" to learn how to bypass a web application firewall [@Gupta2023ChatGPT]. -![Grandma role play to bypass safety restrictions. Credit: @Gupta2023ChatGPT.](images/png/image6.png){#fig-role-play} +![Grandma role play to bypass safety restrictions. Source: @Gupta2023ChatGPT.](images/png/image6.png){#fig-role-play} Further, users have also successfully used reverse psychology to manipulate ChatGPT and access information initially prohibited by the model. In @fig-role-play2, a user is initially prevented from learning about piracy websites through ChatGPT but can bypass these restrictions using reverse psychology. -![Reverse psychology to bypass safety restrictions. Credit: @Gupta2023ChatGPT.](images/png/image10.png){#fig-role-play2} +![Reverse psychology to bypass safety restrictions. Source: @Gupta2023ChatGPT.](images/png/image10.png){#fig-role-play2} The ease at which security attacks can manipulate ChatGPT is concerning, given the private information it was trained upon without consent. Further research on data privacy in LLMs and generative AI should focus on preventing the model from being so naive to prompt injection attacks. @@ -797,7 +805,7 @@ While the Laplace distribution is common, other distributions like Gaussian can To illustrate the tradeoff of Privacy and accuracy in ($\epsilon$, $\delta$)-differential Privacy, the following graphs in @fig-tradeoffs show the results on accuracy for different noise levels on the MNIST dataset, a large dataset of handwritten digits [@abadi2016deep]. The delta value (black line; right y-axis) denotes the level of privacy relaxation (a high value means Privacy is less stringent). As Privacy becomes more relaxed, the accuracy of the model increases. -![Privacy-accuracy tradeoff. Credit: @abadi2016deep.](images/png/image8.png){#fig-tradeoffs} +![Privacy-accuracy tradeoff. Source: @abadi2016deep.](images/png/image8.png){#fig-tradeoffs} The key points to remember about differential Privacy are the following: @@ -857,7 +865,7 @@ Federated Learning (FL) is a type of machine learning in which a model is built FL aims to train machine learning models across decentralized networks of devices or systems while keeping all training data localized. @fig-fl-lifecycle illustrates this process: each participating device leverages its local data to calculate model updates, which are then aggregated to build an improved global model. However, the raw training data is never directly shared, transferred, or compiled. This privacy-preserving approach allows for the joint development of ML models without centralizing the potentially sensitive training data in one place. -![Federated Learning lifecycle. Credit: @jin2020towards.](images/png/image7.png){#fig-fl-lifecycle} +![Federated Learning lifecycle. Source: @jin2020towards.](images/png/image7.png){#fig-fl-lifecycle} One of the most common model aggregation algorithms is Federated Averaging (FedAvg), where the global model is created by averaging all of the parameters from local parameters. While FedAvg works well with independent and identically distributed data (IID), alternate algorithms like Federated Proximal (FedProx) are crucial in real-world applications where data is often non-IID. FedProx is designed for the FL process when there is significant heterogeneity in the client updates due to diverse data distributions across devices, computational capabilities, or varied amounts of data. @@ -917,7 +925,7 @@ Machine unlearning is a fairly new process that describes how the influence of a Some researchers demonstrate a real-life example of machine unlearning approaches applied to SOTA machine learning models through training an LLM, LLaMA2-7b, to unlearn any references to Harry Potter [@eldan2023whos]. Though this model took 184K GPU hours to pre-train, it only took 1 GPU hour of fine-tuning to erase the model's ability to generate or recall Harry Potter-related content without noticeably compromising the accuracy of generating content unrelated to Harry Potter. @fig-hp-prompts demonstrates how the model output changes before (Llama-7b-chat-hf column) and after (Finetuned Llama-b column) unlearning has occurred. -![Llama unlearning Harry Potter. Credit: @eldan2023whos.](images/png/image13.png){#fig-hp-prompts} +![Llama unlearning Harry Potter. Source: @eldan2023whos.](images/png/image13.png){#fig-hp-prompts} #### Other Uses @@ -1051,7 +1059,7 @@ Researchers can freely share this synthetic data and collaborate on modeling wit * **Generative Adversarial Networks (GANs):** GANs are an AI algorithm used in unsupervised learning where two neural networks compete against each other in a game. @fig-gans is an overview of the GAN system. The generator network (big red box) is responsible for producing the synthetic data, and the discriminator network (yellow box) evaluates the authenticity of the data by distinguishing between fake data created by the generator network and the real data. The generator and discriminator networks learn and update their parameters based on the results. The discriminator acts as a metric on how similar the fake and real data are to one another. It is highly effective at generating realistic data and is a popular approach for generating synthetic data. -![Flowchart of GANs. Credit: @rosa2021.](images/png/image9.png){#fig-gans} +![Flowchart of GANs. Source: @rosa2021.](images/png/image9.png){#fig-gans} * **Variational Autoencoders (VAEs):** VAEs are neural networks capable of learning complex probability distributions and balancing data generation quality and computational efficiency. They encode data into a latent space where they learn the distribution to decode the data back. @@ -1150,3 +1158,4 @@ In addition to exercises, we offer a series of hands-on labs allowing students t * _Coming soon._ ::: + diff --git a/contents/responsible_ai/responsible_ai.qmd b/contents/responsible_ai/responsible_ai.qmd index 931d730be..586d6fd8b 100644 --- a/contents/responsible_ai/responsible_ai.qmd +++ b/contents/responsible_ai/responsible_ai.qmd @@ -90,7 +90,7 @@ Adversarial attacks that subtly alter input data can also fool ML models and cau # Fake Obama -{{< video >}} +{{< video https://www.youtube.com/watch?v=AmUC4m6w1wo&ab_channel=BBCNews >}} ::: @@ -231,7 +231,7 @@ Recent incidents have demonstrated how AI models can memorize sensitive user dat Imagine if a smart speaker uses our conversations to improve the quality of service to end users who genuinely want it. Still, others could violate privacy by trying to extract what the speaker "remembers." @fig-diffusion-model-example below shows how diffusion models can memorize and generate individual training examples [@carlini2023extracting]. -![Diffusion models memorizing samples from training data. Credit: @carlini2023extracting.](images/png/diffusion_memorization.png){#fig-diffusion-model-example} +![Diffusion models memorizing samples from training data. Source: @carlini2023extracting.](images/png/diffusion_memorization.png){#fig-diffusion-model-example} Adversaries can use these memorization capabilities and train models to detect if specific training data influenced a target model. For example, membership inference attacks train a secondary model that learns to detect a change in the target model's outputs when making inferences over data it was trained on versus not trained on [@shokri2017membership]. @@ -261,7 +261,7 @@ Deep neural networks demonstrate an almost paradoxical dual nature - human-like @fig-adversarial-example includes an example of a small meaningless perturbation that changes a model prediction. This fragility has real-world impacts: lack of robustness undermines trust in deploying models for high-stakes applications like self-driving cars or medical diagnosis. Moreover, the vulnerability leads to security threats: attackers can deliberately craft adversarial examples that are perceptually indistinguishable from normal data but cause model failures. -![Perturbation effect on prediction. Credit: [Microsoft.](https://www.microsoft.com/en-us/research/blog/adversarial-robustness-as-a-prior-for-better-transfer-learning/)](images/png/adversarial_robustness.png){#fig-adversarial-example} +![Perturbation effect on prediction. Source: [Microsoft.](https://www.microsoft.com/en-us/research/blog/adversarial-robustness-as-a-prior-for-better-transfer-learning/)](images/png/adversarial_robustness.png){#fig-adversarial-example} For instance, past work shows successful attacks that trick models for tasks like NSFW detection [@bhagoji2018practical], ad-blocking [@tramer2019adversarial], and speech recognition [@carlini2016hidden]. While errors in these domains already pose security risks, the problem extends beyond IT security. Recently, adversarial robustness has been proposed as an additional performance metric by approximating worst-case behavior. @@ -331,7 +331,7 @@ To ensure that models keep up to date with such changes in the real world, devel While innovation and regulation are often seen as having competing interests, many countries have found it necessary to provide oversight as AI systems expand into more sectors. As illustrated in @fig-human-centered-ai, this oversight has become crucial as these systems continue permeating various industries and impacting people's lives (see [Human-Centered AI, Chapter 8 "Government Interventions and Regulations"](https://academic-oup-com.ezp-prod1.hul.harvard.edu/book/41126/chapter/350465542). -![How various groups impact human-centered AI. Credit: @schneiderman2020.](images/png/human_centered_ai.png){#fig-human-centered-ai} +![How various groups impact human-centered AI. Source: @schneiderman2020.](images/png/human_centered_ai.png){#fig-human-centered-ai} Among these are: @@ -502,3 +502,5 @@ In addition to exercises, we offer a series of hands-on labs allowing students t * _Coming soon._ ::: + + diff --git a/contents/robust_ai/robust_ai.qmd b/contents/robust_ai/robust_ai.qmd index 7a54cf5cd..6dda29223 100644 --- a/contents/robust_ai/robust_ai.qmd +++ b/contents/robust_ai/robust_ai.qmd @@ -68,7 +68,7 @@ In February 2017, Amazon Web Services (AWS) experienced [a significant outage](h In another example [@dixit2021silent], Facebook encountered a silent data corruption (SDC) issue within its distributed querying infrastructure, as shown in [@fig-sdc-example](#8owvod923jax). Facebook's infrastructure includes a querying system that fetches and executes SQL and SQL-like queries across multiple datasets using frameworks like Presto, Hive, and Spark. One of the applications that utilized this querying infrastructure was a compression application to reduce the footprint of data stores. In this compression application, files were compressed when not being read and decompressed when a read request was made. Before decompression, the file size was checked to ensure it was greater than zero, indicating a valid compressed file with contents. -![Silent data corruption in database applications (Credit: [Facebook](https://arxiv.org/pdf/2102.11245))](./images/png/sdc_example.png){#fig-sdc-example} +![Silent data corruption in database applications. Source: [Facebook](https://arxiv.org/pdf/2102.11245)](./images/png/sdc_example.png){#fig-sdc-example} However, in one instance, when the file size was being computed for a valid non-zero-sized file, the decompression algorithm invoked a power function from the Scala library. Unexpectedly, the Scala function returned a zero size value for the file despite having a known non-zero decompressed size. As a result, the decompression was not performed, and the file was not written to the output database. This issue manifested sporadically, with some occurrences of the same file size computation returning the correct non-zero value. @@ -76,7 +76,7 @@ The impact of this silent data corruption was significant, leading to missing fi This case study illustrates how silent data corruption can propagate through multiple layers of an application stack, leading to data loss and application failures in a large-scale distributed system. The intermittent nature of the issue and the lack of explicit error messages made it particularly challenging to diagnose and resolve. But this is not restricted to just Meta, even other companies such as Google that operate AI hypercomputers face this challenge. @fig-sdc-jeffdean [Jeff Dean](https://en.wikipedia.org/wiki/Jeff_Dean), Chief Scientist at Google DeepMind and Google Research, discusses SDCS and their impact on ML systems. -![Silent data corruption (SDC) errors are a major issue for AI hypercomputers. (Credit: [Jeff Dean](https://en.wikipedia.org/wiki/Jeff_Dean) at [MLSys 2024](https://mlsys.org/), Keynote (Google))](./images/jpg/sdc-google-jeff-dean.jpeg){#fig-sdc-jeffdean} +![Silent data corruption (SDC) errors are a major issue for AI hypercomputers. Source: [Jeff Dean](https://en.wikipedia.org/wiki/Jeff_Dean) at [MLSys 2024](https://mlsys.org/), Keynote (Google)](./images/jpg/sdc-google-jeff-dean.jpeg){#fig-sdc-jeffdean} ### Edge @@ -84,7 +84,7 @@ Regarding examples of faults and errors in edge ML systems, one area that has ga In May 2016, a fatal accident occurred when a Tesla Model S operating on Autopilot crashed into a white semi-trailer truck crossing the highway. The Autopilot system, which relied on computer vision and machine learning algorithms, failed to recognize the white trailer against a bright sky background. The driver, who was reportedly watching a movie when the crash, did not intervene in time, and the vehicle collided with the trailer at full speed. This incident raised concerns about the limitations of AI-based perception systems and the need for robust failsafe mechanisms in autonomous vehicles. It also highlighted the importance of driver awareness and the need for clear guidelines on using semi-autonomous driving features, as shown in [@fig-tesla-example](#tckwqf2ctxw). -![Tesla in the fatal California crash was on Autopilot (Credit: [BBC News](https://www.bbc.com/news/world-us-canada-43604440))](./images/jpg/tesla_example.jpg){#fig-tesla-example} +![Tesla in the fatal California crash was on Autopilot. Source: [BBC News](https://www.bbc.com/news/world-us-canada-43604440)](./images/jpg/tesla_example.jpg){#fig-tesla-example} In March 2018, an Uber self-driving test vehicle struck and killed a pedestrian crossing the street in Tempe, Arizona. The incident was caused by a software flaw in the vehicle's object recognition system, which failed to identify the pedestrians appropriately to avoid them as obstacles. The safety driver, who was supposed to monitor the vehicle's operation and intervene if necessary, was found distracted during the crash. [This incident](https://money.cnn.com/2018/03/19/technology/uber-autonomous-car-fatal-crash/index.html?iid=EL) led to widespread scrutiny of Uber's self-driving program and raised questions about the readiness of autonomous vehicle technology for public roads. It also emphasized the need for rigorous testing, validation, and safety measures in developing and deploying AI-based self-driving systems. @@ -96,7 +96,7 @@ Embedded systems, which often operate in resource-constrained environments and s Let's consider a few examples, starting with outer space exploration. NASA's Mars Polar Lander mission in 1999 suffered [a catastrophic failure](https://spaceref.com/uncategorized/nasa-reveals-probable-cause-of-mars-polar-lander-and-deep-space-2-mission-failures/) due to a software error in the touchdown detection system ([@fig-nasa-example](#e3z8hq3qpwn4)). The spacecraft's onboard software mistakenly interpreted the noise from the deployment of its landing legs as a sign that it had touched down on the Martian surface. As a result, the spacecraft prematurely shut down its engines, causing it to crash into the surface. This incident highlights the critical importance of robust software design and extensive testing in embedded systems, especially those operating in remote and unforgiving environments. As AI capabilities are integrated into future space missions, ensuring these systems' reliability and fault tolerance will be paramount to mission success. -![NASA's Failed Mars Polar Lander mission in 1999 cost over \$200M (Credit: [SlashGear](https://www.slashgear.com/1094840/nasas-failed-mars-missions-that-cost-over-200-million/))](./images/png/nasa_example.png){#fig-nasa-example} +![NASA's Failed Mars Polar Lander mission in 1999 cost over \$200M. Source: [SlashGear](https://www.slashgear.com/1094840/nasas-failed-mars-missions-that-cost-over-200-million/)](./images/png/nasa_example.png){#fig-nasa-example} Back on earth, in 2015, a Boeing 787 Dreamliner experienced a complete electrical shutdown during a flight due to a software bug in its generator control units. The bug caused the generator control units to enter a failsafe mode, cutting power to the aircraft's electrical systems and forcing an emergency landing. [This incident](https://www.engineering.com/story/vzrxw) underscores the potential for software faults to have severe consequences in complex embedded systems like aircraft. As AI technologies are increasingly applied in aviation, such as in autonomous flight systems and predictive maintenance, ensuring the robustness and reliability of these systems will be critical to passenger safety. @@ -136,7 +136,7 @@ All of these transient faults are characterized by their short duration and non- Transient faults can be attributed to various external factors. One common cause is cosmic rays, high-energy particles originating from outer space. When these particles strike sensitive areas of the hardware, such as memory cells or transistors, they can induce charge disturbances that alter the stored or transmitted data. This is illustrated in [@fig-transient-fault](#9jd0z5evi3fa). Another cause of transient faults is [electromagnetic interference (EMI)](https://www.trentonsystems.com/en-us/resource-hub/blog/what-is-electromagnetic-interference) from nearby devices or power fluctuations. EMI can couple with the circuits and cause voltage spikes or glitches that temporarily disrupt the normal operation of the hardware. -![Mechanism of Hardware Transient Fault Occurrence (Credit: [NTT](https://group.ntt/en/newsrelease/2018/11/22/181122a.html))](./images/png/transient_fault.png){#fig-transient-fault} +![Mechanism of Hardware Transient Fault Occurrence. Source: [NTT](https://group.ntt/en/newsrelease/2018/11/22/181122a.html)](./images/png/transient_fault.png){#fig-transient-fault} #### Mechanisms of Transient Faults @@ -148,7 +148,7 @@ A common example of a transient fault is a bit flip in the main memory. If an im In ML systems, transient faults can have significant implications during the training phase [@he2023understanding]. ML training involves iterative computations and updates to model parameters based on large datasets. If a transient fault occurs in the memory storing the model weights or gradients, it can lead to incorrect updates and compromise the convergence and accuracy of the training process. @fig-sdc-training-fault Show a real-world example from Google's production fleet where an SDC anomaly caused a significant difference in the gradient norm. -![SDC in ML training phase results in anomalies in the gradient norm. (Credit: Jeff Dean, MLSys 2024 Keynote (Google))](./images/jpg/google_sdc_jeff_dean_anomaly.jpg){#fig-sdc-training-fault} +![SDC in ML training phase results in anomalies in the gradient norm. Source: Jeff Dean, MLSys 2024 Keynote (Google)](./images/jpg/google_sdc_jeff_dean_anomaly.jpg){#fig-sdc-training-fault} For example, a bit flip in the weight matrix of a neural network can cause the model to learn incorrect patterns or associations, leading to degraded performance [@wan2021analyzing]. Transient faults in the data pipeline, such as corruption of training samples or labels, can also introduce noise and affect the quality of the learned model. @@ -174,7 +174,7 @@ The FDIV bug occurred due to an error in the lookup table used by the division u Although the error was small, it could compound over many division operations, leading to significant inaccuracies in mathematical calculations. The impact of the FDIV bug was significant, especially for applications that relied heavily on precise floating-point division, such as scientific simulations, financial calculations, and computer-aided design. The bug led to incorrect results, which could have severe consequences in fields like finance or engineering. -![Intel Pentium processor with the FDIV permanent fault. The triangular regions are where erroneous calculations occurred. (Credit: [Byte Magazine](https://www.halfhill.com/byte/1995-3_truth.html))](./images/png/permanent_fault.png){#fig-permanent-fault} +![Intel Pentium processor with the FDIV permanent fault. The triangular regions are where erroneous calculations occurred. Source: [Byte Magazine](https://www.halfhill.com/byte/1995-3_truth.html)](./images/png/permanent_fault.png){#fig-permanent-fault} The Intel FDIV bug is a cautionary tale for the potential impact of permanent faults on ML systems. In the context of ML, permanent faults in hardware components can lead to incorrect computations, affecting the accuracy and reliability of the models. For example, if an ML system relies on a processor with a faulty floating-point unit, similar to the Intel FDIV bug, it could introduce errors in the calculations performed during training or inference. @@ -192,7 +192,7 @@ On the other hand, [wear-out mechanisms](https://semiengineering.com/what-causes Permanent faults can manifest through various mechanisms, depending on the nature and location of the fault. Stuck-at faults [@seong2010safer] are common permanent faults where a signal or memory cell remains fixed at a particular value (either 0 or 1) regardless of the inputs, as illustrated in [@fig-stuck-fault](#ahtmh1s1mxgf). -![Stuck-at Fault Model in Digital Circuits (Credit: [Accendo Reliability](https://accendoreliability.com/digital-circuits-stuck-fault-model/))](./images/png/stuck_fault.png){#fig-stuck-fault} +![Stuck-at Fault Model in Digital Circuits. Source: [Accendo Reliability](https://accendoreliability.com/digital-circuits-stuck-fault-model/)](./images/png/stuck_fault.png){#fig-stuck-fault} Stuck-at faults can occur in logic gates, memory cells, or interconnects, causing incorrect computations or data corruption. Another mechanism is device failures, where a component, such as a transistor or a memory cell, completely ceases to function. This can be due to manufacturing defects or severe wear-out. Bridging faults occur when two or more signal lines are unintentionally connected, causing short circuits or incorrect logic behavior. @@ -214,7 +214,7 @@ Designing ML systems with fault tolerance in mind is crucial to ensure their rel Intermittent faults are hardware faults that occur sporadically and unpredictably in a system. An example is illustrated in [@fig-intermittent-fault](#kix.1c0l0udn3cp7), where cracks in the material can introduce increased resistance in circuitry. These faults are particularly challenging to detect and diagnose because they appear and disappear intermittently, making it difficult to reproduce and isolate the root cause. Intermittent faults can lead to system instability, data corruption, and performance degradation. -![Increased resistance due to an intermittent fault -- crack between copper bump and package solder (Credit: [Constantinescu](https://ieeexplore.ieee.org/document/4925824))](./images/png/intermittent_fault.png){#fig-intermittent-fault} +![Increased resistance due to an intermittent fault -- crack between copper bump and package solder. Source: [Constantinescu](https://ieeexplore.ieee.org/document/4925824)](./images/png/intermittent_fault.png){#fig-intermittent-fault} #### Definition and Characteristics @@ -230,7 +230,7 @@ Manufacturing defects or process variations can also introduce intermittent faul Environmental factors, such as temperature fluctuations, humidity, or vibrations, can trigger intermittent faults by altering the electrical characteristics of the components. Loose or degraded connections, such as those in connectors or printed circuit boards, can cause intermittent faults. -![Residue induced intermittent fault in a DRAM chip (Credit: [Hynix Semiconductor](https://ieeexplore.ieee.org/document/4925824))](./images/png/intermittent_fault_dram.png){#fig-intermittent-fault-dram} +![Residue induced intermittent fault in a DRAM chip. Source: [Hynix Semiconductor](https://ieeexplore.ieee.org/document/4925824)](./images/png/intermittent_fault_dram.png){#fig-intermittent-fault-dram} #### Mechanisms of Intermittent Faults @@ -264,7 +264,7 @@ During the BIST process, predefined test patterns are applied to the processor's **Error detection codes:** Error detection codes are widely used to detect data storage and transmission errors [@hamming1950error]. These codes add redundant bits to the original data, allowing the detection of bit errors. Example: Parity checks are a simple form of error detection code shown in [@fig-parity](#kix.2vxlbeehnemj). In a single-bit parity scheme, an extra bit is appended to each data word, making the number of 1s in the word even (even parity) or odd (odd parity). -![Parity bit example (Credit: [Computer Hope](https://www.computerhope.com/jargon/p/paritybi.htm))](./images/png/parity.png){#fig-parity} +![Parity bit example. Source: [Computer Hope](https://www.computerhope.com/jargon/p/paritybi.htm)](./images/png/parity.png){#fig-parity} When reading the data, the parity is checked, and if it doesn't match the expected value, an error is detected. More advanced error detection codes, such as cyclic redundancy checks (CRC), calculate a checksum based on the data and append it to the message. The checksum is recalculated at the receiving end and compared with the transmitted checksum to detect errors. Error-correcting code (ECC) memory modules, commonly used in servers and critical systems, employ advanced error detection and correction codes to detect and correct single-bit or multi-bit errors in memory. @@ -274,7 +274,7 @@ In a TMR system, three identical instances of a hardware component, such as a pr Tesla's self-driving computers employ a redundant hardware architecture to ensure the safety and reliability of critical functions, such as perception, decision-making, and vehicle control, as shown in [@fig-tesla-dmr](#kix.nsc1yczcug9r). One key component of this architecture is using dual modular redundancy (DMR) in the car's onboard computer systems. -![Tesla full self-driving computer with dual redundant SoCs (Credit: [Tesla](https://old.hotchips.org/hc31/HC31_2.3_Tesla_Hotchips_ppt_Final_0817.pdf))](./images/png/tesla_dmr.png){#fig-tesla-dmr} +![Tesla full self-driving computer with dual redundant SoCs. Source: [Tesla](https://old.hotchips.org/hc31/HC31_2.3_Tesla_Hotchips_ppt_Final_0817.pdf)](./images/png/tesla_dmr.png){#fig-tesla-dmr} In Tesla's DMR implementation, two identical hardware units, often called "redundant computers" or "redundant control units," perform the same computations in parallel [@bannon2019computer]. Each unit independently processes sensor data, executes perception and decision-making algorithms, and generates control commands for the vehicle's actuators (e.g., steering, acceleration, and braking). @@ -292,11 +292,11 @@ The use of DMR in Tesla's self-driving computer highlights the importance of har Google employs redundant hot spares to deal with SDC issues within its data centers, thereby enhancing the reliability of critical functions. As illustrated in [@fig-sdc-controller], during the normal training phase, multiple synchronous training workers function flawlessly. However, if a worker becomes defective and causes SDC, an SDC checker automatically identifies the issues. Upon detecting the SDC, the SDC checker moves the training to a hot spare and sends the defective machine for repair. This redundancy safeguards the continuity and reliability of ML training, effectively minimizing downtime and preserving data integrity. -![Google employs hot spare cores to transparently handle SDCs in the data center. (Credit: Jeff Dean, MLSys 2024 Keynote (Google))](./images/jpg/sdc_controller_google.jpg){#fig-sdc-controller} +![Google employs hot spare cores to transparently handle SDCs in the data center. Source: Jeff Dean, MLSys 2024 Keynote (Google)](./images/jpg/sdc_controller_google.jpg){#fig-sdc-controller} **Watchdog timers:** Watchdog timers are hardware components that monitor the execution of critical tasks or processes [@pont2002using]. They are commonly used to detect and recover from software or hardware faults that cause a system to become unresponsive or stuck in an infinite loop. In an embedded system, a watchdog timer can be configured to monitor the execution of the main control loop, as illustrated in [@fig-watchdog](#3l259jcz0lli). The software periodically resets the watchdog timer to indicate that it functions correctly. Suppose the software fails to reset the timer within a specified time limit (timeout period). In that case, the watchdog timer assumes that the system has encountered a fault and triggers a predefined recovery action, such as resetting the system or switching to a backup component. Watchdog timers are widely used in automotive electronics, industrial control systems, and other safety-critical applications to ensure the timely detection and recovery from faults. -![Watchdog timer example in detecting MCU faults (Credit: [Ablic](https://www.ablic.com/en/semicon/products/automotive/automotive-watchdog-timer/intro/))](./images/png/watchdog.png){#fig-watchdog} +![Watchdog timer example in detecting MCU faults. Source: [Ablic](https://www.ablic.com/en/semicon/products/automotive/automotive-watchdog-timer/intro/)](./images/png/watchdog.png){#fig-watchdog} ##### Software-level fault detection @@ -306,19 +306,19 @@ Software-level fault detection techniques rely on software algorithms and monito Anomaly detection algorithms can be applied to the model's predictions or intermediate layer activations, such as statistical outlier detection or machine learning-based approaches (e.g., One-Class SVM or Autoencoders) [@chandola2009anomaly]. [@fig-ad](#a0u8fu59ui0r) shows example of anomaly detection. Suppose the monitoring system detects a significant deviation from the expected patterns, such as a sudden drop in classification accuracy or out-of-distribution samples. In that case, it can raise an alert indicating a potential fault in the model or the input data pipeline. This early detection allows for timely intervention and fault mitigation strategies to be applied. -![Examples of anomaly detection. (a) Fully supervised anomaly detection, (b) normal-only anomaly detection, (c, d, e) semi-supervised anomaly detection, (f) unsupervised anomaly detection (Credit: [Google](https://www.google.com/url?sa=i&url=http%3A%2F%2Fresearch.google%2Fblog%2Funsupervised-and-semi-supervised-anomaly-detection-with-data-centric-ml%2F&psig=AOvVaw1p9owe13lxfZogUHTZnxrj&ust=1714877457779000&source=images&cd=vfe&opi=89978449&ved=0CBIQjRxqFwoTCIjMmMP-8oUDFQAAAAAdAAAAABAE))](./images/png/ad.png){#fig-ad} +![Examples of anomaly detection. (a) Fully supervised anomaly detection, (b) normal-only anomaly detection, (c, d, e) semi-supervised anomaly detection, (f) unsupervised anomaly detection. Source: [Google](https://www.google.com/url?sa=i&url=http%3A%2F%2Fresearch.google%2Fblog%2Funsupervised-and-semi-supervised-anomaly-detection-with-data-centric-ml%2F&psig=AOvVaw1p9owe13lxfZogUHTZnxrj&ust=1714877457779000&source=images&cd=vfe&opi=89978449&ved=0CBIQjRxqFwoTCIjMmMP-8oUDFQAAAAAdAAAAABAE)](./images/png/ad.png){#fig-ad} **Consistency checks and data validation:** Consistency checks and data validation techniques ensure data integrity and correctness at different processing stages in an ML system [@lindholm2019data]. These checks help detect data corruption, inconsistencies, or errors that may propagate and affect the system's behavior. Example: In a distributed ML system where multiple nodes collaborate to train a model, consistency checks can be implemented to validate the integrity of the shared model parameters. Each node can compute a checksum or hash of the model parameters before and after the training iteration, as shown in @fig-ad. Any inconsistencies or data corruption can be detected by comparing the checksums across nodes. Additionally, range checks can be applied to the input data and model outputs to ensure they fall within expected bounds. For instance, if an autonomous vehicle's perception system detects an object with unrealistic dimensions or velocities, it can indicate a fault in the sensor data or the perception algorithms [@wan2023vpp]. **Heartbeat and timeout mechanisms:** Heartbeat mechanisms and timeouts are commonly used to detect faults in distributed systems and ensure the liveness and responsiveness of components [@kawazoe1997heartbeat]. These are quite similar to the watchdog timers found in hardware. For example, in a distributed ML system, where multiple nodes collaborate to perform tasks such as data preprocessing, model training, or inference, heartbeat mechanisms can be implemented to monitor the health and availability of each node. Each node periodically sends a heartbeat message to a central coordinator or its peer nodes, indicating its status and availability. Suppose a node fails to send a heartbeat within a specified timeout period, as shown in [@fig-heartbeat](#ojufkz2g56e). In that case, it is considered faulty, and appropriate actions can be taken, such as redistributing the workload or initiating a failover mechanism. Timeouts can also be used to detect and handle hanging or unresponsive components. For example, if a data loading process exceeds a predefined timeout threshold, it may indicate a fault in the data pipeline, and the system can take corrective measures. -![Heartbeat messages in distributed systems (Credit: [GeeksforGeeks](https://www.geeksforgeeks.org/what-are-heartbeat-messages/))](./images/png/heartbeat.png){#fig-heartbeat} +![Heartbeat messages in distributed systems. Source: [GeeksforGeeks](https://www.geeksforgeeks.org/what-are-heartbeat-messages/)](./images/png/heartbeat.png){#fig-heartbeat} - + **Software-implemented fault tolerance (SIFT) techniques:** SIFT techniques introduce redundancy and fault detection mechanisms at the software level to enhance the reliability and fault tolerance of the system [@reis2005swift]. Example: N-version programming is a SIFT technique where multiple functionally equivalent software component versions are developed independently by different teams. This can be applied to critical components such as the model inference engine in an ML system. Multiple versions of the inference engine can be executed in parallel, and their outputs can be compared for consistency. It is considered the correct result if most versions produce the same output. If there is a discrepancy, it indicates a potential fault in one or more versions, and appropriate error-handling mechanisms can be triggered. Another example is using software-based error correction codes, such as Reed-Solomon codes [@plank1997tutorial], to detect and correct errors in data storage or transmission, as shown in [@fig-Reed-Solomon](#kjmtegsny44z). These codes add redundancy to the data, enabling detecting and correcting certain errors and enhancing the system's fault tolerance. -![n-bits representation of the Reed-Solomon codes (Credit: [GeeksforGeeks](https://www.geeksforgeeks.org/what-is-reed-solomon-code/))](./images/png/Reed-Solomon.png){#fig-Reed-Solomon} +![n-bits representation of the Reed-Solomon codes. Source: [GeeksforGeeks](https://www.geeksforgeeks.org/what-is-reed-solomon-code/)](./images/png/Reed-Solomon.png){#fig-Reed-Solomon} :::{#exr-ad .callout-caution collapse="true"} @@ -333,17 +333,38 @@ In this Colab, play the role of an AI fault detective! You'll build an autoencod @tbl-fault_types provides an extensive comparative analysis of transient, permanent, and intermittent faults. It outlines the primary characteristics or dimensions that distinguish these fault types. Here, we summarize the relevant dimensions we examined and explore the nuances that differentiate transient, permanent, and intermittent faults in greater detail. -| Dimension | Transient Faults | Permanent Faults | Intermittent Faults | -|-----------|------------------|------------------|---------------------| -| Duration | Short-lived, temporary | Persistent, remains until repair or replacement | Sporadic, appears and disappears intermittently | -| Persistence | Disappears after the fault condition passes | Consistently present until addressed | Recurs irregularly, not always present | -| Causes | External factors (e.g., electromagnetic interference, cosmic rays) | Hardware defects, physical damage, wear-out | Unstable hardware conditions, loose connections, aging components | -| Manifestation | Bit flips, glitches, temporary data corruption | Stuck-at faults, broken components, complete device failures | Occasional bit flips, intermittent signal issues, sporadic malfunctions | -| Impact on ML Systems | Introduces temporary errors or noise in computations | Causes consistent errors or failures, affecting reliability | Leads to sporadic and unpredictable errors, challenging to diagnose and mitigate | -| Detection | Error detection codes, comparison with expected values | Built-in self-tests, error detection codes, consistency checks | Monitoring for anomalies, analyzing error patterns and correlations | -| Mitigation | Error correction codes, redundancy, checkpoint and restart | Hardware repair or replacement, component redundancy, failover mechanisms | Robust design, environmental control, runtime monitoring, fault-tolerant techniques | - -: Comparison of transient, permanent, and intermittent faults. {#tbl-fault_types} ++----------------+-----------------------------+-----------------------------+--------------------------------------------------+ +| Dimension | Transient Faults | Permanent Faults | Intermittent Faults | ++:===============+:============================+:============================+:=================================================+ +| Duration | Short-lived, temporary | Persistent, remains until | Sporadic, appears and disappears intermittently | +| | | repair or replacement | | ++----------------+-----------------------------+-----------------------------+--------------------------------------------------+ +| Persistence | Disappears after the fault | Consistently present until | Recurs irregularly, not always present | +| | condition passes | addressed | | ++----------------+-----------------------------+-----------------------------+--------------------------------------------------+ +| Causes | External factors (e.g., | Hardware defects, physical | Unstable hardware conditions, loose connections, | +| | electromagnetic interference| damage, wear-out | aging components | +| | cosmic rays) | | | ++----------------+-----------------------------+-----------------------------+--------------------------------------------------+ +| Manifestation | Bit flips, glitches, | Stuck-at faults, broken | Occasional bit flips, intermittent signal issues,| +| | temporary data corruption | components, complete device | sporadic malfunctions | +| | | failures | | ++----------------+-----------------------------+-----------------------------+--------------------------------------------------+ +| Impact on ML | Introduces temporary errors | Causes consistent errors or | Leads to sporadic and unpredictable errors, | +| Systems | or noise in computations | failures, affecting | challenging to diagnose and mitigate | +| | | reliability | | ++----------------+-----------------------------+-----------------------------+--------------------------------------------------+ +| Detection | Error detection codes, | Built-in self-tests, error | Monitoring for anomalies, analyzing error | +| | comparison with expected | detection codes, consistency| patterns and correlations | +| | values | checks | | ++----------------+-----------------------------+-----------------------------+--------------------------------------------------+ +| Mitigation | Error correction codes, | Hardware repair or | Robust design, environmental control, runtime | +| | redundancy, checkpoint and | replacement, component | monitoring, fault-tolerant techniques | +| | restart | redundancy, failover | | +| | | mechanisms | | ++----------------+-----------------------------+-----------------------------+--------------------------------------------------+ + +: Comparison of transient, permanent, and intermittent faults. {#tbl-fault_types .striped .hover} ## ML Model Robustness @@ -351,9 +372,9 @@ In this Colab, play the role of an AI fault detective! You'll build an autoencod #### Definition and Characteristics -Adversarial attacks aim to trick models into making incorrect predictions by providing them with specially crafted, deceptive inputs (called adversarial examples) [@parrish2023adversarial]. By adding slight perturbations to input data, adversaries can \"hack\" a model's pattern recognition and deceive it. These are sophisticated techniques where slight, often imperceptible alterations to input data can trick an ML model into making a wrong prediction, as shown in [@fig-adversarial-attack-noise-example]. +Adversarial attacks aim to trick models into making incorrect predictions by providing them with specially crafted, deceptive inputs (called adversarial examples) [@parrish2023adversarial]. By adding slight perturbations to input data, adversaries can "hack" a model's pattern recognition and deceive it. These are sophisticated techniques where slight, often imperceptible alterations to input data can trick an ML model into making a wrong prediction, as shown in [@fig-adversarial-attack-noise-example]. -![A small adversarial noise added to the original image can make the neural network classify the image as a Guacamole instead of an Egyptian cat (Credit: [Sutanto](https://www.mdpi.com/2079-9292/10/1/52))](./images/png/adversarial_attack_detection.png){#fig-adversarial-attack-noise-example} +![A small adversarial noise added to the original image can make the neural network classify the image as a Guacamole instead of an Egyptian cat. Source: [Sutanto](https://www.mdpi.com/2079-9292/10/1/52)](./images/png/adversarial_attack_detection.png){#fig-adversarial-attack-noise-example} One can generate prompts that lead to unsafe images in text-to-image models like DALLE [@ramesh2021zero] or Stable Diffusion [@rombach2022highresolution]. For example, by altering the pixel values of an image, attackers can deceive a facial recognition system into identifying a face as a different person. @@ -371,11 +392,11 @@ The landscape of machine learning models is complex and broad, especially given * **Generative Adversarial Networks (GANs)** are deep learning models that consist of two networks competing against each other: a generator and a discriminator [@goodfellow2020generative]. The generator tries to synthesize realistic data while the discriminator evaluates whether they are real or fake. GANs can be used to craft adversarial examples. The generator network is trained to produce inputs that the target model misclassifies. These GAN-generated images can then attack a target classifier or detection model. The generator and the target model are engaged in a competitive process, with the generator continually improving its ability to create deceptive examples and the target model enhancing its resistance to such examples. GANs provide a powerful framework for crafting complex and diverse adversarial inputs, illustrating the adaptability of generative models in the adversarial landscape. -* **Transfer Learning Adversarial Attacks** exploit the knowledge transferred from a pre-trained model to a target model, creating adversarial examples that can deceive both models. These attacks pose a growing concern, particularly when adversaries have knowledge of the feature extractor but lack access to the classification head (the part or layer responsible for making the final classifications). Referred to as \"headless attacks,\" these transferable adversarial strategies leverage the expressive capabilities of feature extractors to craft perturbations while being oblivious to the label space or training data. The existence of such attacks underscores the importance of developing robust defenses for transfer learning applications, especially since pre-trained models are commonly used [@ahmed2020headless]. +* **Transfer Learning Adversarial Attacks** exploit the knowledge transferred from a pre-trained model to a target model, creating adversarial examples that can deceive both models. These attacks pose a growing concern, particularly when adversaries have knowledge of the feature extractor but lack access to the classification head (the part or layer responsible for making the final classifications). Referred to as "headless attacks," these transferable adversarial strategies leverage the expressive capabilities of feature extractors to craft perturbations while being oblivious to the label space or training data. The existence of such attacks underscores the importance of developing robust defenses for transfer learning applications, especially since pre-trained models are commonly used [@ahmed2020headless]. #### Mechanisms of Adversarial Attacks -![Gradient-Based Attacks (Credit: [Ivezic](https://defence.ai/ai-security/gradient-based-attacks/))](./images/png/gradient_attack.png){#fig-gradient-attack} +![Gradient-Based Attacks. Source: [Ivezic](https://defence.ai/ai-security/gradient-based-attacks/)](./images/png/gradient_attack.png){#fig-gradient-attack} **Gradient-based Attacks** @@ -401,16 +422,22 @@ Physical-world attacks bring adversarial examples into the realm of real-world s @tbl-attack_types a concise overview of the different categories of adversarial attacks, including gradient-based attacks (FGSM, PGD, JSMA), optimization-based attacks (C&W, EAD), transfer-based attacks, and physical-world attacks (adversarial patches and objects). Each attack is briefly described, highlighting its key characteristics and mechanisms. -| Attack Category | Attack Name | Description | -|-----------------------|-------------------------------------|-----------------------------------------------------------------------------------------------------------------| -| Gradient-based | Fast Gradient Sign Method (FGSM) | Perturbs input data by adding small noise in the gradient direction to maximize prediction error. | -| | Projected Gradient Descent (PGD) | Extends FGSM by iteratively applying the gradient update step for more refined adversarial examples. | -| | Jacobian-based Saliency Map Attack (JSMA) | Identifies influential input features and perturbs them to create adversarial examples. | -| Optimization-based | Carlini and Wagner (C&W) Attack | Finds the smallest perturbation that causes misclassification while maintaining perceptual similarity. | -| | Elastic Net Attack to DNNs (EAD) | Incorporates elastic net regularization to generate adversarial examples with sparse perturbations. | -| Transfer-based | Transferability-based Attacks | Exploits the transferability of adversarial examples across different models, enabling black-box attacks. | -| Physical-world | Adversarial Patches | Small, carefully designed patches placed on objects to fool object detection or classification models. | -| | Adversarial Objects | Physical objects (e.g., 3D-printed sculptures, modified road signs) crafted to deceive ML systems in real-world scenarios. | ++-----------------------+---------------------------------------------+------------------------------------------------------------------------------------------------------------+ +| Attack Category | Attack Name | Description | ++:======================+:============================================+:===========================================================================================================+ +| Gradient-based | Fast Gradient Sign Method (FGSM) | Perturbs input data by adding small noise in the gradient direction to maximize prediction error. | +| | Projected Gradient Descent (PGD) | Extends FGSM by iteratively applying the gradient update step for more refined adversarial examples. | +| | Jacobian-based Saliency Map Attack (JSMA) | Identifies influential input features and perturbs them to create adversarial examples. | ++-----------------------+---------------------------------------------+------------------------------------------------------------------------------------------------------------+ +| Optimization-based | Carlini and Wagner (C&W) Attack | Finds the smallest perturbation that causes misclassification while maintaining perceptual similarity. | +| | Elastic Net Attack to DNNs (EAD) | Incorporates elastic net regularization to generate adversarial examples with sparse perturbations. | ++-----------------------+---------------------------------------------+------------------------------------------------------------------------------------------------------------+ +| Transfer-based | Transferability-based Attacks | Exploits the transferability of adversarial examples across different models, enabling black-box attacks. | ++-----------------------+---------------------------------------------+------------------------------------------------------------------------------------------------------------+ +| Physical-world | Adversarial Patches | Small, carefully designed patches placed on objects to fool object detection or classification models. | +| | Adversarial Objects | Physical objects (e.g., 3D-printed sculptures, modified road signs) crafted to deceive ML systems in | +| | | real-world scenarios. | ++-----------------------+---------------------------------------------+------------------------------------------------------------------------------------------------------------+ : Different attack types on ML models. {#@tbl-attack_types} @@ -428,9 +455,9 @@ One striking example of the impact of adversarial attacks was demonstrated by re This demonstration shed light on the alarming potential of simple adversarial stickers to trick ML systems into misreading critical road signs. The implications of such attacks in the real world are significant, particularly in the context of autonomous vehicles. If deployed on actual roads, these adversarial stickers could cause self-driving cars to misinterpret stop signs as speed limits, leading to dangerous situations, as shown in [@fig-graffiti]. Researchers warned that this could result in rolling stops or unintended acceleration into intersections, endangering public safety. -![Adversarial example generation applied to GoogLeNet (Szegedy et al., 2014a) on ImageNet (Credit: [Goodfellow](https://arxiv.org/abs/1412.6572))](./images/png/adversarial_googlenet.png){#fig-adversarial-googlenet} +![Adversarial example generation applied to GoogLeNet (Szegedy et al., 2014a) on ImageNet. Source: [Goodfellow](https://arxiv.org/abs/1412.6572)](./images/png/adversarial_googlenet.png){#fig-adversarial-googlenet} -![Graffiti on a stop sign tricked a self-driving car into thinking it was a 45 mph speed limit sign (Credit: [Eykholt](https://arxiv.org/abs/1707.08945))](./images/png/graffiti.png){#fig-graffiti} +![Graffiti on a stop sign tricked a self-driving car into thinking it was a 45 mph speed limit sign. Source: [Eykholt](https://arxiv.org/abs/1707.08945)](./images/png/graffiti.png){#fig-graffiti} The case study of the adversarial stickers on stop signs provides a concrete illustration of how adversarial examples exploit how ML models recognize patterns. By subtly manipulating the input data in ways that are invisible to humans, attackers can induce incorrect predictions and create serious risks, especially in safety-critical applications like autonomous vehicles. The attack's simplicity highlights the vulnerability of ML models to even minor changes in the input, emphasizing the need for robust defenses against such threats. @@ -463,7 +490,7 @@ Think you can outsmart an AI? In this Colab, learn how to trick image classifica Data poisoning is an attack where the training data is tampered with, leading to a compromised model [@biggio2012poisoning], as shown in [@fig-poisoning-example]. Attackers can modify existing training examples, insert new malicious data points, or influence the data collection process. The poisoned data is labeled in such a way as to skew the model's learned behavior. This can be particularly damaging in applications where ML models make automated decisions based on learned patterns. Beyond training sets, poisoning tests, and validation data can allow adversaries to boost reported model performance artificially. -![NightShade's poisoning effects on Stable Diffusion (Credit: [TOMÉ](https://telefonicatech.com/en/blog/attacks-on-artificial-intelligence-iii-data-poisoning))](./images/png/poisoning_example.png){#fig-poisoning-example} +![NightShade's poisoning effects on Stable Diffusion. Source: [TOMÉ](https://telefonicatech.com/en/blog/attacks-on-artificial-intelligence-iii-data-poisoning)](./images/png/poisoning_example.png){#fig-poisoning-example} The process usually involves the following steps: @@ -508,7 +535,7 @@ Data poisoning attacks can be carried out through various mechanisms, exploiting Each of these mechanisms presents unique challenges and requires different mitigation strategies. For example, detecting label manipulation may involve analyzing the distribution of labels and identifying anomalies [@zhou2018learning], while preventing feature manipulation may require secure data preprocessing and anomaly detection techniques [@carta2020local]. Defending against insider threats may involve strict access control policies and monitoring of data access patterns. Moreover, the effectiveness of data poisoning attacks often depends on the attacker's knowledge of the ML system, including the model architecture, training algorithms, and data distribution. Attackers may use adversarial machine learning or data synthesis techniques to craft samples that are more likely to bypass detection and achieve their malicious objectives. -![Garbage In -- Garbage Out (Credit: [Information Matters](https://informationmatters.net/data-poisoning-ai/))](./images/png/distribution_shift_example.png){#fig-distribution-shift-example} +![Garbage In -- Garbage Out. Source: [Information Matters](https://informationmatters.net/data-poisoning-ai/)](./images/png/distribution_shift_example.png){#fig-distribution-shift-example} **Modifying training data labels:** One of the most straightforward mechanisms of data poisoning is modifying the training data labels. In this approach, the attacker selectively changes the labels of a subset of the training samples to mislead the model's learning process as shown in [@fig-distribution-shift-example]. For example, in a binary classification task, the attacker might flip the labels of some positive samples to negative, or vice versa. By introducing such label noise, the attacker aims to degrade the model's performance or cause it to make incorrect predictions for specific target instances. @@ -520,7 +547,7 @@ Each of these mechanisms presents unique challenges and requires different mitig **Manipulating data at the source (e.g., sensor data):** In some cases, attackers can manipulate the data at its source, such as sensor data or input devices. By tampering with the sensors or manipulating the environment in which data is collected, attackers can introduce poisoned samples or bias the data distribution. For instance, in a self-driving car scenario, an attacker might manipulate the sensors or the environment to feed misleading information into the training data, compromising the model's ability to make safe and reliable decisions. -![Data Poisoning Attack (Credit: [Sikandar](https://www.researchgate.net/publication/366883200_A_Detailed_Survey_on_Federated_Learning_Attacks_and_Defenses))](./images/png/poisoning_attack_example.png){#fig-poisoning-attack-example} +![Data Poisoning Attack. Source: [Sikandar](https://www.researchgate.net/publication/366883200_A_Detailed_Survey_on_Federated_Learning_Attacks_and_Defenses)](./images/png/poisoning_attack_example.png){#fig-poisoning-attack-example} **Poisoning data in online learning scenarios:** Data poisoning attacks can also target ML systems that employ online learning, where the model is continuously updated with new data in real time. In such scenarios, an attacker can gradually inject poisoned samples over time, slowly manipulating the model's behavior. Online learning systems are particularly vulnerable to data poisoning because they adapt to new data without extensive validation, making it easier for attackers to introduce malicious samples, as shown in [@fig-poisoning-attack-example]. @@ -558,19 +585,19 @@ This case highlights how data poisoning can degrade model accuracy and reliabili ##### Case Study 2 -![Samples of dirty-label poison data regarding mismatched text/image pairs (Credit: [Shan](https://arxiv.org/pdf/2310.13828))](./images/png/dirty_label_example.png){#fig-dirty-label-example} +![Samples of dirty-label poison data regarding mismatched text/image pairs. Source: [Shan](https://arxiv.org/pdf/2310.13828)](./images/png/dirty_label_example.png){#fig-dirty-label-example} Interestingly enough, data poisoning attacks are not always malicious [@shan2023prompt]. Nightshade, a tool developed by a team led by Professor Ben Zhao at the University of Chicago, utilizes data poisoning to help artists protect their art against scraping and copyright violations by generative AI models. Artists can use the tool to make subtle modifications to their images before uploading them online, as shown in [@fig-dirty-label-example]. While these changes are indiscernible to the human eye, they can significantly disrupt the performance of generative AI models when incorporated into the training data. Generative models can be manipulated to generate hallucinations and weird images. For example, with only 300 poisoned images, the University of Chicago researchers could trick the latest Stable Diffusion model into generating images of dogs that look like cats or images of cows when prompted for cars. -As the number of poisoned images on the internet increases, the performance of the models that use scraped data will deteriorate exponentially. First, the poisoned data is hard to detect and requires manual elimination. Second, the \"poison\" spreads quickly to other labels because generative models rely on connections between words and concepts as they generate images. So a poisoned image of a \"car\" could spread into generated images associated with words like \"truck\," \"train\," \" bus\," etc. +As the number of poisoned images on the internet increases, the performance of the models that use scraped data will deteriorate exponentially. First, the poisoned data is hard to detect and requires manual elimination. Second, the "poison" spreads quickly to other labels because generative models rely on connections between words and concepts as they generate images. So a poisoned image of a "car" could spread into generated images associated with words like "truck\," "train\," " bus\," etc. On the other hand, this tool can be used maliciously and can affect legitimate applications of the generative models. This shows the very challenging and novel nature of machine learning attacks. [@fig-poisoning] demonstrates the effects of different levels of data poisoning (50 samples, 100 samples, and 300 samples of poisoned images) on generating images in different categories. Notice how the images start deforming and deviating from the desired category. For example, after 300 poison samples, a car prompt generates a cow. -![Data poisoning (Credit: @shan2023prompt)](images/png/image14.png){#fig-poisoning} +![Data poisoning. Source: @shan2023prompt)](images/png/image14.png){#fig-poisoning} :::{#exr-pa .callout-caution collapse="true"} @@ -587,7 +614,7 @@ Get ready to explore the dark side of AI security! In this Colab, you'll learn a Distribution shift refers to the phenomenon where the data distribution encountered by an ML model during deployment (inference) differs from the distribution it was trained on, as shown in [@fig-distribution-shift]. This is not so much an attack as it is that the model's robustness will vary over time. In other words, the data's statistical properties, patterns, or underlying assumptions can change between the training and test phases. -![The curly brackets enclose the distribution shift between the environments. Here, z stands for the spurious feature, and y stands for label class (Credit: [Xin](https://www.researchgate.net/publication/366423741_On_the_Connection_between_Invariant_Learning_and_Adversarial_Training_for_Out-of-Distribution_Generalization))](./images/png/distribution_shift.png){#fig-distribution-shift} +![The curly brackets enclose the distribution shift between the environments. Here, z stands for the spurious feature, and y stands for label class. Source: [Xin](https://www.researchgate.net/publication/366423741_On_the_Connection_between_Invariant_Learning_and_Adversarial_Training_for_Out-of-Distribution_Generalization)](./images/png/distribution_shift.png){#fig-distribution-shift} The key characteristics of distribution shift include: @@ -599,7 +626,7 @@ The key characteristics of distribution shift include: **Unrepresentative training data:** The training data may only partially capture the variability and diversity of the real-world data encountered during deployment. Unrepresentative training data can lead to biased or skewed models that perform poorly on real-world data. Suppose the training data needs to capture the variability and diversity of the real-world data adequately. In that case, the model may learn patterns specific to the training set but needs to generalize better to new, unseen data. This can result in poor performance, biased predictions, and limited model applicability. For instance, if a facial recognition model is trained primarily on images of individuals from a specific demographic group, it may struggle to accurately recognize faces from other demographic groups when deployed in a real-world setting. Ensuring that the training data is representative and diverse is crucial for building models that can generalize well to real-world scenarios. -![Concept drift refers to a change in data patterns and relationships over time (Credit: [Evidently AI](https://www.evidentlyai.com/ml-in-production/concept-drift))](./images/png/drift_over_time.png){#fig-drift-over-time} +![Concept drift refers to a change in data patterns and relationships over time. Source: [Evidently AI](https://www.evidentlyai.com/ml-in-production/concept-drift)](./images/png/drift_over_time.png){#fig-drift-over-time} Distribution shift can manifest in various forms, such as: @@ -615,7 +642,7 @@ The presence of a distribution shift can significantly impact the performance an The mechanisms of distribution shift, such as changes in data sources, temporal evolution, domain-specific variations, selection bias, feedback loops, and adversarial manipulations, are important to understand because they help identify the underlying causes of distribution shift. By understanding these mechanisms, practitioners can develop targeted strategies to mitigate their impact and improve the model's robustness. Here are some common mechanisms: -![Temporal evolution (Credit: [Białek](https://www.nannyml.com/blog/types-of-data-shift))](./images/png/temporal_evoltion.png){#fig-temporal-evoltion} +![Temporal evolution. Source: [Białek](https://www.nannyml.com/blog/types-of-data-shift)](./images/png/temporal_evoltion.png){#fig-temporal-evoltion} **Changes in data sources:** Distribution shifts can occur when the data sources used for training and inference differ. For example, if a model is trained on data from one sensor but deployed on data from another sensor with different characteristics, it can lead to a distribution shift. @@ -697,13 +724,13 @@ Recall that data poisoning is an attack that targets the integrity of the traini ##### Anomaly Detection Techniques for Identifying Poisoned Data -![Malicious data injection (Credit: [Li](https://www.mdpi.com/2227-7390/12/2/247))](./images/png/adversarial_attack_injection.png){#fig-adversarial-attack-injection} +![Malicious data injection. Source: [Li](https://www.mdpi.com/2227-7390/12/2/247)](./images/png/adversarial_attack_injection.png){#fig-adversarial-attack-injection} Statistical outlier detection methods identify data points that deviate significantly from most data. These methods assume that poisoned data instances are likely to be statistical outliers. Techniques such as the [Z-score method](https://ubalt.pressbooks.pub/mathstatsguides/chapter/z-score-basics/), [Tukey's method](https://www.itl.nist.gov/div898/handbook/prc/section4/prc471.htm), or the [Mahalanobis] [distance](https://www.statisticshowto.com/mahalanobis-distance/) can be used to measure the deviation of each data point from the central tendency of the dataset. Data points that exceed a predefined threshold are flagged as potential outliers and considered suspicious for data poisoning. Clustering-based methods group similar data points together based on their features or attributes. The assumption is that poisoned data instances may form distinct clusters or lie far away from the normal data clusters. By applying clustering algorithms like [K-means](https://www.oreilly.com/library/view/data-algorithms/9781491906170/ch12.html), [DBSCAN](https://www.oreilly.com/library/view/machine-learning-algorithms/9781789347999/50efb27d-abbe-4855-ad81-a5357050161f.xhtml), or [hierarchical clustering](https://www.oreilly.com/library/view/cluster-analysis-5th/9780470978443/chapter04.html), anomalous clusters or data points that do not belong to any cluster can be identified. These anomalous instances are then treated as potentially poisoned data. -![Autoencoder (Credit: [Dertat](https://towardsdatascience.com/applied-deep-learning-part-3-autoencoders-1c083af4d798))](./images/png/autoencoder.png){#fig-autoencoder} +![Autoencoder. Source: [Dertat](https://towardsdatascience.com/applied-deep-learning-part-3-autoencoders-1c083af4d798)](./images/png/autoencoder.png){#fig-autoencoder} Autoencoders are neural networks trained to reconstruct the input data from a compressed representation, as shown in [@fig-autoencoder]. They can be used for anomaly detection by learning the normal patterns in the data and identifying instances that deviate from them. During training, the autoencoder is trained on clean, unpoisoned data. At inference time, the reconstruction error for each data point is computed. Data points with high reconstruction errors are considered abnormal and potentially poisoned, as they do not conform to the learned normal patterns. @@ -751,7 +778,7 @@ In addition, domain classifiers are trained to distinguish between different dom ##### Mitigation Techniques for Distribution Shifts -![Transfer learning (Credit: [Bhavsar](https://medium.com/modern-nlp/transfer-learning-in-nlp-f5035cc3f62f))](./images/png/transfer_learning.png){#fig-transfer-learning} +![Transfer learning. Source: [Bhavsar](https://medium.com/modern-nlp/transfer-learning-in-nlp-f5035cc3f62f)](./images/png/transfer_learning.png){#fig-transfer-learning} Transfer learning leverages knowledge gained from one domain to improve performance in another, as shown in [@fig-transfer-learning]. By using pre-trained models or transferring learned features from a source domain to a target domain, transfer learning can help mitigate the impact of distribution shifts. The pre-trained model can be fine-tuned on a small amount of labeled data from the target domain, allowing it to adapt to the new distribution. Transfer learning is particularly effective when the source and target domains share similar characteristics or when labeled data in the target domain is scarce. @@ -827,7 +854,7 @@ Detecting and mitigating software faults in machine learning frameworks is essen **Thorough Testing and Validation:** Comprehensive unit testing of individual components and modules can verify their correctness and identify potential faults early in development. Integration testing validates the interaction and compatibility between different components of the ML framework, ensuring seamless integration. Systematic testing of edge cases, boundary conditions, and exceptional scenarios helps uncover hidden faults and vulnerabilities. [Continuous testing and regression testing](https://u-tor.com/topic/regression-vs-integration) as shown in [@fig-regression-testing](#gaprh7zcofc9) detect faults introduced by code changes or updates to the ML framework. -![Automated regression testing (Credit: [UTOR](https://u-tor.com/topic/regression-vs-integration))](./images/png/regression_testing.png){#fig-regression-testing} +![Automated regression testing. Source: [UTOR](https://u-tor.com/topic/regression-vs-integration)](./images/png/regression_testing.png){#fig-regression-testing} **Static Code Analysis and Linting:** Utilizing static code analysis tools automatically identifies potential coding issues, such as syntax errors, undefined variables, or security vulnerabilities. Enforcing coding standards and best practices through linting tools maintains code quality and reduces the likelihood of common programming mistakes. Conducting regular code reviews allows manual inspection of the codebase, identification of potential faults, and ensures adherence to coding guidelines and design principles. @@ -841,7 +868,7 @@ Detecting and mitigating software faults in machine learning frameworks is essen **Automated Testing and Continuous Integration/Continuous Deployment (CI/CD):** Implement automated testing frameworks and scripts, execute comprehensive test suites, and catch faults early in development. Integrating automated testing into the CI/CD pipeline, as shown in [@fig-CI-CD-procedure](#f14k3aj3u8av), ensures that code changes are thoroughly tested before being merged or deployed to production. Utilizing continuous monitoring and automated alerting systems detects and notifies developers and operators about potential faults or anomalies in real-time. -![Continuous Integration/Continuous Deployment (CI/CD) procedure (Credit: [geeksforgeeks](https://www.geeksforgeeks.org/ci-cd-continuous-integration-and-continuous-delivery/))](./images/png/CI_CD_procedure.png){#fig-CI-CD-procedure} +![Continuous Integration/Continuous Deployment (CI/CD) procedure. Source: [geeksforgeeks](https://www.geeksforgeeks.org/ci-cd-continuous-integration-and-continuous-delivery/)](./images/png/CI_CD_procedure.png){#fig-CI-CD-procedure} Adopting a proactive and systematic approach to fault detection and mitigation can significantly improve ML systems' robustness, reliability, and maintainability. By investing in comprehensive testing, monitoring, and fault-tolerant design practices, organizations can minimize the impact of software faults and ensure their ML systems' smooth operation in production environments. @@ -902,7 +929,7 @@ Two of the most common hardware-based fault injection methods are FPGA-based fau ![](./images/png/image15.png) -![Radiation test setup for semiconductor components [@lee2022design] (Credit: [JD Instrument](https://jdinstruments.net/tester-capabilities-radiation-test/))](./images/png/image14.png){#fig-beam-testing} +![Radiation test setup for semiconductor components [@lee2022design] Source: [JD Instrument](https://jdinstruments.net/tester-capabilities-radiation-test/)](./images/png/image14.png){#fig-beam-testing} #### Limitations @@ -938,7 +965,7 @@ Software-based fault injection tools also have some limitations compared to hard **Fidelity:** Software-based tools may provide a different level of Fidelity than hardware-based methods in terms of representing real-world fault conditions. The accuracy of the results obtained from software-based fault injection experiments may depend on how closely the software model approximates the actual hardware behavior. -![Comparison of techniques at layers of abstraction (Credit: [MAVFI](https://ieeexplore.ieee.org/abstract/document/10315202))](./images/jpg/mavfi.jpg){#fig-mavfi} +![Comparison of techniques at layers of abstraction. Source: [MAVFI](https://ieeexplore.ieee.org/abstract/document/10315202)](./images/jpg/mavfi.jpg){#fig-mavfi} ##### Types of Fault Injection Tools @@ -950,7 +977,7 @@ Ares [@reagen2018ares], a fault injection tool initially developed for the Keras PyTorchFI [@mahmoud2020pytorchfi], a fault injection tool specifically designed for the PyTorch framework, was developed in 2020 in collaboration with Nvidia Research. It enables the injection of faults into the weights, activations, and gradients of PyTorch models, supporting a wide range of fault models. By leveraging the GPU acceleration capabilities of PyTorch, PyTorchFI provides a fast and efficient implementation for conducting fault injection experiments on large-scale ML systems, as shown in [@fig-phantom-objects](#txkz61sj1mj4). The tool's speed and ease of use have led to widespread adoption in the community, resulting in multiple developer-led projects, such as PyTorchALFI by Intel xColabs, which focuses on safety in automotive environments. Follow-up PyTorch-centric tools for fault injection include Dr. DNA by Meta [@ma2024dr] (which further facilitates the Pythonic programming model for ease of use), and the GoldenEye framework [@mahmoud2022dsn], which incorporates novel numerical datatypes (such as AdaptivFloat [@tambe2020algorithm] and [BlockFloat](https://en.wikipedia.org/wiki/Bfloat16_floating-point_format) in the context of hardware bit flips. -TensorFI [@chen2020tensorfi], or the TensorFlow Fault Injector, is a fault injection tool developed specifically for the TensorFlow framework. Analogous to Ares and PyTorchFI, TensorFI is considered the state-of-the-art tool for ML robustness studies in the TensorFlow ecosystem. It allows researchers to inject faults into the computational graph of TensorFlow models and study their impact on the model's performance, supporting a wide range of fault models. One of the key benefits of TensorFI is its ability to evaluate the resilience of various ML models, not just DNNs. Further advancements, such as BinFi [@chen2019sc], provide a mechanism to speed up error injection experiments by focusing on the \"important\" bits in the system, accelerating the process of ML robustness analysis and prioritizing the critical components of a model. +TensorFI [@chen2020tensorfi], or the TensorFlow Fault Injector, is a fault injection tool developed specifically for the TensorFlow framework. Analogous to Ares and PyTorchFI, TensorFI is considered the state-of-the-art tool for ML robustness studies in the TensorFlow ecosystem. It allows researchers to inject faults into the computational graph of TensorFlow models and study their impact on the model's performance, supporting a wide range of fault models. One of the key benefits of TensorFI is its ability to evaluate the resilience of various ML models, not just DNNs. Further advancements, such as BinFi [@chen2019sc], provide a mechanism to speed up error injection experiments by focusing on the "important" bits in the system, accelerating the process of ML robustness analysis and prioritizing the critical components of a model. NVBitFI [@tsai2021nvbitfi], a general-purpose fault injection tool developed by Nvidia for their GPU platforms, operates at a lower level compared to framework-specific tools like Ares, PyTorchFI, and TensorFlow. While these tools focus on various deep learning platforms to implement and perform robustness analysis, NVBitFI targets the underlying hardware assembly code for fault injection. This allows researchers to inject faults into any application running on Nvidia GPUs, making it a versatile tool for studying the resilience of ML systems and other GPU-accelerated applications. By enabling users to inject errors at the architectural level, NVBitFI provides a more general-purpose fault model that is not restricted to just ML models. As Nvidia's GPU systems are commonly used in many ML-based systems, NVBitFI is a valuable tool for comprehensive fault injection analysis across various applications. @@ -1067,3 +1094,4 @@ In addition to exercises, we offer a series of hands-on labs allowing students t * _Coming soon._ ::: + diff --git a/contents/sustainable_ai/sustainable_ai.qmd b/contents/sustainable_ai/sustainable_ai.qmd index 62027e84a..c0091f223 100644 --- a/contents/sustainable_ai/sustainable_ai.qmd +++ b/contents/sustainable_ai/sustainable_ai.qmd @@ -89,7 +89,7 @@ Understanding the energy needs for training and operating AI models is crucial i The training of complex AI systems like large deep learning models can demand startlingly high levels of computing power--with profound energy implications. Consider OpenAI's state-of-the-art language model GPT-3 as a prime example. This system pushes the frontiers of text generation through algorithms trained on massive datasets. Yet, the energy GPT-3 consumed for a single training cycle could rival an [entire small town's monthly usage](https://www.washington.edu/news/2023/07/27/how-much-energy-does-chatgpt-use/). In recent years, these generative AI models have gained increasing popularity, leading to more models being trained. Next to the increased number of models, the number of parameters in these models will also increase. Research shows that increasing the model size (number of parameters), dataset size, and compute used for training improves performance smoothly with no signs of saturation [@kaplan2020scaling]. See how, in @fig-scaling-laws, the test loss decreases as each of the 3 increases above. -![Performance improves with compute, dataset set, and model size. Credit: @kaplan2020scaling.](images/png/model_scaling.png){#fig-scaling-laws} +![Performance improves with compute, dataset set, and model size. Source: @kaplan2020scaling.](images/png/model_scaling.png){#fig-scaling-laws} What drives such immense requirements? During training, models like GPT-3 learn their capabilities by continuously processing huge volumes of data to adjust internal parameters. The processing capacity enabling AI's rapid advances also contributes to surging energy usage, especially as datasets and models balloon. GPT-3 highlights a steady trajectory in the field where each leap in AI's sophistication traces back to ever more substantial computational power and resources. Its predecessor, GPT-2, required 10x less training to compute only 1.5 billion parameters, a difference now dwarfed by magnitudes as GPT-3 comprises 175 billion parameters. Sustaining this trajectory toward increasingly capable AI raises energy and infrastructure provision challenges ahead. @@ -99,7 +99,7 @@ Developing and training AI models requires immense data, computing power, and en This concept is reflected in the demand for training and inference hardware in data centers and on the edge. Inference refers to using a trained model to make predictions or decisions on real-world data. According to a [recent McKinsey analysis](https://www.mckinsey.com/~/media/McKinsey/Industries/Semiconductors/Our%20Insights/Artificial%20intelligence%20hardware%20New%20opportunities%20for%20semiconductor%20companies/Artificial-intelligence-hardware.ashx), the need for advanced systems to train ever-larger models is rapidly growing. However, inference computations already make up a dominant and increasing portion of total AI workloads, as shown in @fig-mckinsey. Running real-time inference with trained models--whether for image classification, speech recognition, or predictive analytics--invariably demands computing hardware like servers and chips. However, even a model handling thousands of facial recognition requests or natural language queries daily is dwarfed by massive platforms like Meta. Where inference on millions of photos and videos shared on social media, the infrastructure energy requirements continue to scale! -![Market size for inference and training hardware. Credit: [McKinsey.](https://www.mckinsey.com/~/media/McKinsey/Industries/Semiconductors/Our%20Insights/Artificial%20intelligence%20hardware%20New%20opportunities%20for%20semiconductor%20companies/Artificial-intelligence-hardware.ashx)](images/png/mckinsey_analysis.png){#fig-mckinsey} +![Market size for inference and training hardware. Source: [McKinsey.](https://www.mckinsey.com/~/media/McKinsey/Industries/Semiconductors/Our%20Insights/Artificial%20intelligence%20hardware%20New%20opportunities%20for%20semiconductor%20companies/Artificial-intelligence-hardware.ashx)](images/png/mckinsey_analysis.png){#fig-mckinsey} Algorithms powering AI-enabled smart assistants, automated warehouses, self-driving vehicles, tailored healthcare, and more have marginal individual energy footprints. However, the projected proliferation of these technologies could add hundreds of millions of endpoints running AI algorithms continually, causing the scale of their collective energy requirements to surge. Current efficiency gains need help to counterbalance this sheer growth. @@ -125,7 +125,7 @@ The bandwidth, storage, and processing capacities required to enable this future The energy demand of data centers can roughly be divided into 4 components—infrastructure, network, storage, and servers. In @fig-energydemand, we see that the data infrastructure (which includes cooling, lighting, and controls) and the servers use most of the total energy budget of data centers in the US [@shehabi2016united]. This section breaks down the energy demand for the servers and the infrastructure. For the latter, the focus is on cooling systems, as cooling is the dominant factor in energy consumption in the infrastructure. -![Data centers energy consumption in the US. Credit: International Energy Agency (IEA).](images/png/energy_datacenter.png){#fig-energydemand} +![Data centers energy consumption in the US. Source: International Energy Agency (IEA).](images/png/energy_datacenter.png){#fig-energydemand} ##### Servers {#servers} @@ -165,13 +165,13 @@ Without action, this exponential demand growth risks ratcheting up the carbon fo The concept of a 'carbon footprint' has emerged as a key metric. This term refers to the total amount of greenhouse gasses, particularly carbon dioxide, emitted directly or indirectly by an individual, organization, event, or product. These emissions significantly contribute to the greenhouse effect, accelerating global warming and climate change. The carbon footprint is measured in terms of carbon dioxide equivalents ($\textrm{CO}_2$e), allowing for a comprehensive account that includes various greenhouse gasses and their relative environmental impact. Examples of this as applied to large-scale ML tasks are shown in @fig-carbonfootprint. -![Carbon footprint of large-scale ML tasks. Credit: @wu2022sustainable.](images/png/model_carbonfootprint.png){#fig-carbonfootprint} +![Carbon footprint of large-scale ML tasks. Source: @wu2022sustainable.](images/png/model_carbonfootprint.png){#fig-carbonfootprint} Considering the carbon footprint is especially important in AI AI's rapid advancement and integration into various sectors, bringing its environmental impact into sharp focus. AI systems, particularly those involving intensive computations like deep learning and large-scale data processing, are known for their substantial energy demands. This energy, often drawn from power grids, may still predominantly rely on fossil fuels, leading to significant greenhouse gas emissions. Take, for example, training large AI models such as GPT-3 or complex neural networks. These processes require immense computational power, typically provided by data centers. The energy consumption associated with operating these centers, particularly for high-intensity tasks, results in notable greenhouse gas emissions. Studies have highlighted that training a single AI model can generate carbon emissions comparable to that of the lifetime emissions of multiple cars, shedding light on the environmental cost of developing advanced AI technologies [@dayarathna2015data]. @fig-carboncars shows a comparison from lowest to highest carbon footprints, starting with a roundtrip flight between NY and SF, human life average per year, American life average per year, US car including fuel over a lifetime, and a Transformer model with neural architecture search, which has the highest footprint. -![Carbon footprint of NLP model in lbs of $\textrm{CO}_2$ equivalent. Credit: @dayarathna2015data.](images/png/carbon_benchmarks.png){#fig-carboncars} +![Carbon footprint of NLP model in lbs of $\textrm{CO}_2$ equivalent. Source: @dayarathna2015data.](images/png/carbon_benchmarks.png){#fig-carboncars} Moreover, AI's carbon footprint extends beyond the operational phase. The entire lifecycle of AI systems, including the manufacturing of computing hardware, the energy used in data centers for cooling and maintenance, and the disposal of electronic waste, contributes to their overall carbon footprint. We have discussed some of these aspects earlier, and we will discuss the waste aspects later in this chapter. @@ -195,7 +195,7 @@ The variability of renewable energy production has been an ongoing challenge in Innovation in energy storage solutions is required to enable constant use of renewable energy sources. The base energy load is currently met with nuclear energy. This constant energy source does not directly emit carbon emissions but needs to be faster to accommodate the variability of renewable energy sources. Tech companies such as Microsoft have shown interest in nuclear energy sources [to power their data centers](https://www.bloomberg.com/news/newsletters/2023-09-29/microsoft-msft-sees-artificial-intelligence-and-nuclear-energy-as-dynamic-duo). As the demand of data centers is more constant than the demand of regular households, nuclear energy could be used as a dominant source of energy. -![Energy sources and generation capabilities. Credit: [Energy Charts.](https://www.energy-charts.info/?l=en&c=DE).](images/png/europe_energy_grid.png){#fig-energyprod} +![Energy sources and generation capabilities. Source: [Energy Charts.](https://www.energy-charts.info/?l=en&c=DE).](images/png/europe_energy_grid.png){#fig-energyprod} Additionally, the manufacturing and disposal of AI hardware add to the carbon footprint. Producing specialized computing devices, such as GPUs and CPUs, is energy- and resource-intensive. This phase often relies on energy sources that contribute to greenhouse gas emissions. The electronics industry's manufacturing process has been identified as one of the eight big supply chains responsible for more than 50 percent of global emissions [@challenge2021supply]. Furthermore, the end-of-life disposal of this hardware, which can lead to electronic waste, also has environmental implications. As mentioned, servers have a refresh cycle of roughly 3 to 5 years. Of this e-waste, currently [only 17.4 percent is properly collected and recycled.](https://www.genevaenvironmentnetwork.org/resources/updates/the-growing-environmental-risks-of-e-waste/). The carbon emissions of this e-waste has shown an increase of more than 50 percent between 2014 and 2020 [@singh2022disentangling]. @@ -429,7 +429,7 @@ Several software libraries and development environments are specifically tailore Energy monitoring tools are crucial for Green AI, as they allow developers to measure and analyze the energy consumption of their AI systems. By providing detailed insights into where and how energy is being used, these tools enable developers to make informed decisions about optimizing their models for better energy efficiency. This can involve adjustments in algorithm design, hardware selection, cloud computing software selection, or operational parameters. @fig-azuredashboard is a screenshot of an energy consumption dashboard provided by Microsoft's cloud services platform. -![Microsoft Azure energy consumption dashboard. Credit: [Will Buchanan.](https://techcommunity.microsoft.com/t5/green-tech-blog/charting-the-path-towards-sustainable-ai-with-azure-machine/ba-p/2866923)](images/png/azure_dashboard.png){#fig-azuredashboard} +![Microsoft Azure energy consumption dashboard. Source: [Will Buchanan.](https://techcommunity.microsoft.com/t5/green-tech-blog/charting-the-path-towards-sustainable-ai-with-azure-machine/ba-p/2866923)](images/png/azure_dashboard.png){#fig-azuredashboard} With the increasing integration of renewable energy sources in AI operations, frameworks facilitating this process are becoming more important. These frameworks help manage the energy supply from renewable sources like solar or wind power, ensuring that AI systems can operate efficiently with fluctuating energy inputs. @@ -500,7 +500,7 @@ While much attention has focused on making the immense data centers powering AI Tiny computers, microcontrollers, and custom ASICs powering edge intelligence face size, cost, and power limitations that rule out high-end GPUs used in data centers. Instead, they require optimized algorithms and extremely compact, energy-efficient circuitry to run smoothly. However, engineering for these microscopic form factors opens up risks around planned obsolescence, disposability, and waste. @fig-iot-devices shows that the number of IoT devices is projected to [reach 30 billion connected devices by 2030](https://www.statista.com/statistics/1183457/iot-connected-devices-worldwide/). -![Number of Internet of Things (IoT) connected devices worldwide from 2019 to 2023. Credit: [Statista.](https://www.statista.com/statistics/1183457/iot-connected-devices-worldwide/)](images/png/statista_chip_growth.png){#fig-iot-devices} +![Number of Internet of Things (IoT) connected devices worldwide from 2019 to 2023. Source: [Statista.](https://www.statista.com/statistics/1183457/iot-connected-devices-worldwide/)](images/png/statista_chip_growth.png){#fig-iot-devices} End-of-life handling of internet-connected gadgets embedded with sensors and AI remains an often overlooked issue during design. However, these products permeate consumer goods, vehicles, public infrastructure, industrial equipment, and more. @@ -708,3 +708,4 @@ In addition to exercises, we offer hands-on labs that allow students to gain pra * _Coming soon._ ::: + diff --git a/contents/tools.qmd b/contents/tools.qmd index 83b6c71cb..3d04e2a7a 100644 --- a/contents/tools.qmd +++ b/contents/tools.qmd @@ -7,7 +7,7 @@ This is a non-exhaustive list of tools and frameworks that are available for emb ### **Microcontrollers and Development Boards** | No | Hardware | Processor | Features | TinyML Compatibility | -|----|------------------------------|--------------------------------|---------------------------------------------------------|-------------------------------------------------| +|:----|:------------------------------|:--------------------------------|:---------------------------------------------------------|:-------------------------------------------------| | 1 | Arduino Nano 33 BLE Sense | ARM Cortex-M4 | Onboard sensors, Bluetooth connectivity | TensorFlow Lite Micro | | 2 | Raspberry Pi Pico | Dual-core Arm Cortex-M0+ | Low-cost, large community support | TensorFlow Lite Micro | | 3 | SparkFun Edge | Ambiq Apollo3 Blue | Ultra-low power consumption, onboard microphone | TensorFlow Lite Micro | @@ -23,7 +23,7 @@ This is a non-exhaustive list of tools and frameworks that are available for emb ### **Machine Learning Frameworks** | No | Machine Learning Framework | Description | Use Cases | -|----|---------------------------|--------------------------------------------------------------------------------|------------------------------------------| +|:----|:---------------------------|:--------------------------------------------------------------------------------|:------------------------------------------| | 1 | TensorFlow Lite | Lightweight library for running machine learning models on constrained devices | Image recognition, voice commands, anomaly detection | | 2 | Edge Impulse | A platform providing tools for creating machine learning models optimized for edge devices | Data collection, model training, deployment on tiny devices | | 3 | ONNX Runtime | A performance-optimized engine for running ONNX models, fine-tuned for edge devices | Cross-platform deployment of machine learning models | @@ -31,14 +31,14 @@ This is a non-exhaustive list of tools and frameworks that are available for emb ### **Libraries and APIs** | No | Library/API | Description | Use Cases | -|----|-------------|------------------------------------------------------------------------------------------------------|------------------------------------------| +|:----|:-------------|:------------------------------------------------------------------------------------------------------|:------------------------------------------| | 1 | CMSIS-NN | A collection of efficient neural network kernels optimized for Cortex-M processors | Embedded vision and AI applications | | 2 | ARM NN | An inference engine for CPUs, GPUs, and NPUs, enabling the translation of neural network frameworks | Accelerating machine learning model inference on ARM-based devices | ## IDEs and Development Environments | No | IDE/Development Environment | Description | Features | -|----|------------------------------|------------------------------------------------------------------------------------|----------------------------------------------------| +|:----|:------------------------------|:------------------------------------------------------------------------------------|:----------------------------------------------------| | 1 | PlatformIO | An open-source ecosystem for IoT development catering to various boards & platforms | Cross-platform build system, continuous testing, firmware updates | | 2 | Eclipse Embedded CDT | A plugin for Eclipse facilitating embedded systems development | Supports various compilers and debuggers, integrates with popular build tools | | 3 | Arduino IDE | Official development environment for Arduino supporting various boards & languages | User-friendly interface, large community support, extensive library collection | diff --git a/contents/training/training.qmd b/contents/training/training.qmd index c7bda8f0a..fa12ede6a 100644 --- a/contents/training/training.qmd +++ b/contents/training/training.qmd @@ -90,7 +90,7 @@ Why are the nonlinear operations necessary? If we only had linear layers, the en Convolutions are also linear operators and can be cast as a matrix multiplication. ::: -![Neural network diagram. Credit: astroML.](images/png/aitrainingnn.png){#fig-neural-net-diagram} +![Neural network diagram. Source: astroML.](images/png/aitrainingnn.png){#fig-neural-net-diagram} Where $A_{0}$ is a vector input to the neural network (i.e., an image that we want the neural network to classify or some other data that the neural network operates on), $A_{n}$ (where $n$ is the number of layers of the network) is the vector output of the neural network (i.e., a vector of size 10 in the case of classifying pictures of handwritten digits), $W_i$s are the weights of the neural network that are tweaked at training time to fit our data, and $F_{i}$ is that layer's nonlinear activation function (i.e., ReLU, softmax, etc.). As defined, the intermediate output of the neural network is a vector of real-valued numbers with dimensions: @@ -155,7 +155,7 @@ In practice, the gradient is computed over a minibatch of data points to improve Where $\lambda$ is the stepsize or learning rate of our tweaks, in training our neural network, we repeatedly perform the step above until convergence, or when the loss no longer decreases. @fig-gradient-descent illustrates this process: we want to reach the minimum point, which's done by following the gradient (as illustrated with the blue arrows in the figure). This prior approach is known as full gradient descent since we are computing the derivative concerning the entire training data and only then taking a single gradient step; a more efficient approach is to calculate the gradient concerning just a random batch of data points and then taking a step, a process known as batch gradient descent or stochastic gradient descent [@robbins1951stochastic], which is more efficient since now we are taking many more steps per pass of the entire training data. Next, we will cover the mathematics behind computing the gradient of the loss function concerning the $W_i$s, a process known as backpropagation. -![Gradient descent. Credit: Towards Data Science.](images/png/aitrainingsgd.png){#fig-gradient-descent} +![Gradient descent. Source: Towards Data Science.](images/png/aitrainingsgd.png){#fig-gradient-descent} ### Backpropagation @@ -280,7 +280,7 @@ In general, stochastic gradient descent using backpropagation can be performed o The structure depicted in @fig-computational-graph showcases a segment of a differentiable computational graph. In this graph, the input 'x' is processed through a series of operations: it is first multiplied by a weight matrix 'W' (MatMul), then added to a bias 'b' (Add), and finally passed to an activation function, Rectified Linear Unit (ReLU). This sequence of operations gives us the output C. The graph's differentiable nature means that each operation has a well-defined gradient. Automatic differentiation, as implemented in ML frameworks, leverages this property to efficiently compute the gradients of the loss with respect to each parameter in the network (e.g., 'W' and 'b'). -![Computational Graph. Credit: TensorFlow.](./images/png/graph.png){#fig-computational-graph height=40%} +![Computational Graph. Source: TensorFlow.](./images/png/graph.png){#fig-computational-graph height=40%} ## Training Data @@ -290,13 +290,18 @@ Maintaining clear splits between train, validation, and test sets with represent @tbl-training_splits compares the differences between training, validation, and test data splits: -| Data Split | Purpose | Typical Size | -|-|-|-| -| Training Set | Train the model parameters | 60-80% of total data | -| Validation Set | Evaluate model during training to tune hyperparameters and prevent overfitting | ∼20% of total data | -| Test Set | Provide unbiased evaluation of final trained model | ∼20% of total data | ++-----------------+------------------------------------------------------------+--------------------------+ +| Data Split | Purpose | Typical Size | ++:================+:===========================================================+:=========================+ +| Training Set | Train the model parameters | 60-80% of total data | ++-----------------+------------------------------------------------------------+--------------------------+ +| Validation Set | Evaluate model during training to tune hyperparameters and | ∼20% of total data | +| | prevent overfitting | | ++-----------------+------------------------------------------------------------+--------------------------+ +| Test Set | Provide unbiased evaluation of final trained model | ∼20% of total data | ++-----------------+------------------------------------------------------------+--------------------------+ -: Comparing training, validation, and test data splits. {#tbl-training_splits} +: Comparing training, validation, and test data splits. {#tbl-training_splits .striped .hover} ### Dataset Splits @@ -320,7 +325,7 @@ The relative proportions of the training, validation, and test sets can vary bas # Train/Dev/Test Sets -{{< video >}} +{{< video https://www.youtube.com/watch?v=1waHlpKiNyY >}} ::: @@ -340,11 +345,11 @@ Carefully factoring in the model complexity and problem difficulty when allocati Consider @fig-over-under-fitting where we try to classify/split datapoints into two categories (here, by color): On the left, overfitting is depicted by a model that has learned the nuances in the training data too well (either the dataset was too small or we ran the model for too long), causing it to follow the noise along with the signal, as indicated by the line's excessive curves. The right side shows underfitting, where the model's simplicity prevents it from capturing the dataset's underlying structure, resulting in a line that does not fit the data well. The center graph represents an ideal fit, where the model balances well between generalization and fitting, capturing the main trend of the data without being swayed by outliers. Although the model is not a perfect fit (it misses some points), we care more about its ability to recognize general patterns rather than idiosyncratic outliers. -![Data fitting: overfitting, right fit, and underfitting. Credit: MathWorks.](images/png/fits.png){#fig-over-under-fitting} +![Data fitting: overfitting, right fit, and underfitting. Source: MathWorks.](images/png/fits.png){#fig-over-under-fitting} @fig-fitting-time illustrates the process of fitting the data over time. When training, we search for the "sweet spot" between underfitting and overfitting. At first when the model hasn't had enough time to learn the patterns in the data, we find ourselves in the underfitting zone, indicated by high error rates on the validation set (remember that the model is trained on the training set and we test its generalizability on the validation set, or data it hasn't seen before). At some point, we achieve a global minimum for error rates, and ideally we want to stop the training there. If we continue training, the model will start "memorizing" or getting to know the data too well that the error rate starts going back up, since the model will fail to generalize to data it hasn't seen before. -![Fitting the data overtime. Credit: IBM.](images/png/aitrainingfit.png){#fig-fitting-time} +![Fitting the data overtime. Source: IBM.](images/png/aitrainingfit.png){#fig-fitting-time} @vid-bias provides an overview of bias and variance and the relationship between the two concepts and model accuracy. @@ -352,7 +357,7 @@ Consider @fig-over-under-fitting where we try to classify/split datapoints into # Bias/Variance -{{< video >}} +{{< video https://www.youtube.com/watch?v=SjQyLhQIXSM >}} ::: @@ -617,7 +622,7 @@ Get ready to unlock the secrets of hyperparameter tuning and take your PyTorch m # Hyperparameter -{{< video >}} +{{< video https://www.youtube.com/watch?v=AXDByU3D1hA&list=PLkDaE6sCZn6Hn0vK8co82zjQtt3T2Nkqc&index=24 >}} ::: @@ -687,7 +692,7 @@ The choice between L1 and L2 depends on the expected model complexity and whethe # Regularization -{{< video >}} +{{< video https://www.youtube.com/watch?v=6g0t3Phly2M&list=PLkDaE6sCZn6Hn0vK8co82zjQtt3T2Nkqc&index=4 >}} ::: @@ -697,7 +702,7 @@ The choice between L1 and L2 depends on the expected model complexity and whethe # Why Regularization Reduces Overfitting -{{< video >}} +{{< video https://www.youtube.com/watch?v=NyG-7nRpsW8&list=PLkDaE6sCZn6Hn0vK8co82zjQtt3T2Nkqc&index=5 >}} ::: @@ -735,7 +740,7 @@ The key hyperparameter is $p$, the fraction of nodes dropped, often set between # Dropout -{{< video >}} +{{< video https://www.youtube.com/watch?v=ARq74QuavAo&list=PLkDaE6sCZn6Hn0vK8co82zjQtt3T2Nkqc&index=7 >}} ::: @@ -763,7 +768,7 @@ Battling Overfitting: Unlock the Secrets of Regularization! Overfitting is like # Other Regularization Methods -{{< video >}} +{{< video https://www.youtube.com/watch?v=BOCLq2gpcGU&list=PLkDaE6sCZn6Hn0vK8co82zjQtt3T2Nkqc&index=8 >}} ::: @@ -819,7 +824,7 @@ Get your neural network off to a strong start with weight initialization! How yo # Weight Initialization -{{< video >}} +{{< video https://www.youtube.com/watch?v=s2coXdufOzE&list=PLkDaE6sCZn6Hn0vK8co82zjQtt3T2Nkqc&index=11 >}} ::: @@ -875,7 +880,7 @@ It leaves all positive inputs unchanged while clipping all negative values to 0. @fig-activation-functions demonstrates the 3 activation functions we discussed above -Tanh, ReLU, Sigmoid- in addition to the Linear case. -![Common activation functions. Credit: [AI Wiki.](https://machine-learning.paperspace.com/wiki/activation-function)](images/jpeg/activation-functions3.jpg){width=70% #fig-activation-functions} +![Common activation functions. Source: [AI Wiki.](https://machine-learning.paperspace.com/wiki/activation-function)](images/jpeg/activation-functions3.jpg){width=70% #fig-activation-functions} ### Softmax @@ -1060,19 +1065,29 @@ However, as the model parts run on physically separate devices, they must commun To summarize, `@tbl-parallelism` demonstrates some of the key characteristics for comparing data parallelism and model parallelism: -| Characteristic | Data Parallelism | Model Parallelism | -|----------------------|----------------------------------------------------|--------------------------------------| -| Definition | Distribute data across devices with model replicas | Distribute model across devices | -| Objective | Accelerate training through compute scaling | Enable larger model training | -| Scaling Method | Scale devices/workers | Scale model size | -| Main Constraint | Model size per device | Device coordination overhead | -| Hardware Requirements| Multiple GPU/TPUs | Often specialized interconnect | -| Primary Challenge | Parameter synchronization | Complex partitioning + communication | -| Types | N/A | Layer-wise, filter-wise, spatial | -| Code Complexity | Minimal changes | More significant model surgery | -| Popular Libraries | Horovod, PyTorch Distributed | Mesh TensorFlow | - -: Comparing data parallelism and model parallelism. {#tbl-parallelism} ++------------------------+--------------------------------------------------+----------------------------------------+ +| Characteristic | Data Parallelism | Model Parallelism | ++========================+==================================================+========================================+ +| Definition | Distribute data across devices with model replicas | Distribute model across devices | ++------------------------+--------------------------------------------------+----------------------------------------+ +| Objective | Accelerate training through compute scaling | Enable larger model training | ++------------------------+--------------------------------------------------+----------------------------------------+ +| Scaling Method | Scale devices/workers | Scale model size | ++------------------------+--------------------------------------------------+----------------------------------------+ +| Main Constraint | Model size per device | Device coordination overhead | ++------------------------+--------------------------------------------------+----------------------------------------+ +| Hardware Requirements | Multiple GPU/TPUs | Often specialized interconnect | ++------------------------+--------------------------------------------------+----------------------------------------+ +| Primary Challenge | Parameter synchronization | Complex partitioning + communication | ++------------------------+--------------------------------------------------+----------------------------------------+ +| Types | N/A | Layer-wise, filter-wise, spatial | ++------------------------+--------------------------------------------------+----------------------------------------+ +| Code Complexity | Minimal changes | More significant model surgery | ++------------------------+--------------------------------------------------+----------------------------------------+ +| Popular Libraries | Horovod, PyTorch Distributed | Mesh TensorFlow | ++------------------------+--------------------------------------------------+----------------------------------------+ + +: Comparing data parallelism and model parallelism. {#tbl-parallelism .striped .hover} ## Conclusion @@ -1167,3 +1182,5 @@ In addition to exercises, we offer a series of hands-on labs allowing students t * _Coming soon._ ::: + + diff --git a/contents/workflow/workflow.qmd b/contents/workflow/workflow.qmd index 5ecd12de6..fd3892815 100644 --- a/contents/workflow/workflow.qmd +++ b/contents/workflow/workflow.qmd @@ -39,7 +39,7 @@ The ML workflow is a structured approach that guides professionals and researche Developing a successful machine learning model requires a systematic workflow. This end-to-end process enables you to build, deploy, and maintain models effectively. As shown in @fig-ml-life-cycle, It typically involves the following key steps: 1. **Problem Definition** - Start by clearly articulating the specific problem you want to solve. This focuses on your efforts during data collection and model building. -2. **Data Collection to Preparation:** Gather relevant, high-quality training data that captures all aspects of the problem. Clean and preprocess the data to prepare it for modeling. +2. **Data Collection and Preparation:** Gather relevant, high-quality training data that captures all aspects of the problem. Clean and preprocess the data to prepare it for modeling. 3. **Model Selection and Training:** Choose a machine learning algorithm suited to your problem type and data. Consider the pros and cons of different approaches. Feed the prepared data into the model to train it. Training time varies based on data size and model complexity. 4. **Model Evaluation:** Test the trained model on new unseen data to measure its predictive accuracy. Identify any limitations. 6. **Model Deployment:** Integrate the validated model into applications or systems to start operationalization. @@ -80,29 +80,55 @@ The ML workflow is a universal guide applicable across various platforms, includ ## Roles & Responsibilities -Creating an ML solution, especially for embedded AI, is a multidisciplinary effort involving various specialists. +Creating an ML solution, especially for embedded AI, is a multidisciplinary effort involving various specialists. Unlike traditional software development, building an ML solution demands a multidisciplinary approach due to the experimental nature of model development and the resource-intensive requirements of training and deploying these models. + +There is a pronounced need for roles focusing on data for the success of machine learning pipelines. Data scientists and data engineers handle data collection, build data pipelines, and ensure data quality. Since the nature of machine learning models depends on the data they consume, the models are unique and vary with different applications, necessitating extensive experimentation. Machine learning researchers and engineers drive this experimental phase through continuous testing, validation, and iteration to achieve optimal performance. + +The deployment phase often requires specialized hardware and infrastructure, as machine learning models can be resource-intensive, demanding high computational power and efficient resource management. This necessitates collaboration with hardware engineers to ensure that the infrastructure can support the computational demands of model training and inference. + +As models make decisions that can impact individuals and society, ethical and legal aspects of machine learning are becoming increasingly important. Ethicists and legal advisors are needed to ensure compliance with ethical standards and legal regulations. + +@tbl-mlops_roles shows a rundown of the typical roles involved. While the lines between these roles can sometimes blur, the table below provides a general overview. + ++----------------------------------------+----------------------------------------------------------------------------------------------------+ +| Role | Responsibilities | ++:=======================================+:===================================================================================================+ +| Project Manager | Oversees the project, ensuring timelines and milestones are met. | ++----------------------------------------+----------------------------------------------------------------------------------------------------+ +| Domain Experts | Offer domain-specific insights to define project requirements. | ++----------------------------------------+----------------------------------------------------------------------------------------------------+ +| Data Scientists | Specialize in data analysis and model development. | ++----------------------------------------+----------------------------------------------------------------------------------------------------+ +| Machine Learning Engineers | Focus on model development and deployment. | ++----------------------------------------+----------------------------------------------------------------------------------------------------+ +| Data Engineers | Manage data pipelines. | ++----------------------------------------+----------------------------------------------------------------------------------------------------+ +| Embedded Systems Engineers | Integrate ML models into embedded systems. | ++----------------------------------------+----------------------------------------------------------------------------------------------------+ +| Software Developers | Develop software components for AI system integration. | ++----------------------------------------+----------------------------------------------------------------------------------------------------+ +| Hardware Engineers | Design and optimize hardware for the embedded AI system. | ++----------------------------------------+----------------------------------------------------------------------------------------------------+ +| UI/UX Designers | Focus on user-centric design. | ++----------------------------------------+----------------------------------------------------------------------------------------------------+ +| QA Engineers | Ensure the system meets quality standards. | ++----------------------------------------+----------------------------------------------------------------------------------------------------+ +| Ethicists and Legal Advisors | Consult on ethical and legal compliance. | ++----------------------------------------+----------------------------------------------------------------------------------------------------+ +| Operations and Maintenance Personnel | Monitor and maintain the deployed system. | ++----------------------------------------+----------------------------------------------------------------------------------------------------+ +| Security Specialists | Ensure system security. | ++----------------------------------------+----------------------------------------------------------------------------------------------------+ + +: Roles and responsibilities of people involved in MLOps. {#tbl-mlops_roles .striped .hover} -@tbl-mlops_roles shows a rundown of the typical roles involved: +Understanding these roles is crucial for completing an ML project. As we proceed through the upcoming chapters, we'll delve into each role's essence and expertise, fostering a comprehensive understanding of the complexities involved in embedded AI projects. This holistic view facilitates seamless collaboration and nurtures an environment ripe for innovation and breakthroughs. -| Role | Responsibilities | -|--------------------------------|----------------------------------------------------------------------------------------------------| -| Project Manager | Oversees the project, ensuring timelines and milestones are met. | -| Domain Experts | Offer domain-specific insights to define project requirements. | -| Data Scientists | Specialize in data analysis and model development. | -| Machine Learning Engineers | Focus on model development and deployment. | -| Data Engineers | Manage data pipelines. | -| Embedded Systems Engineers | Integrate ML models into embedded systems. | -| Software Developers | Develop software components for AI system integration. | -| Hardware Engineers | Design and optimize hardware for the embedded AI system. | -| UI/UX Designers | Focus on user-centric design. | -| QA Engineers | Ensure the system meets quality standards. | -| Ethicists and Legal Advisors | Consult on ethical and legal compliance. | -| Operations and Maintenance Personnel | Monitor and maintain the deployed system. | -| Security Specialists | Ensure system security. | +## Conclusion -: Roles and responsibilities of people involved in MLOps. {#tbl-mlops_roles} +This chapter has laid the foundation for understanding the machine learning workflow, a structured approach crucial for the development, deployment, and maintenance of ML models. By exploring the distinct stages of the ML lifecycle, we have gained insights into the unique challenges faced by traditional ML and embedded AI workflows, particularly in terms of resource optimization, real-time processing, data management, and hardware-software integration. These distinctions underscore the importance of tailoring workflows to meet the specific demands of the application environment. -Understanding these roles is crucial for completing an ML project. As we proceed through the upcoming chapters, we'll delve into each role's essence and expertise, fostering a comprehensive understanding of the complexities involved in embedded AI projects. This holistic view facilitates seamless collaboration and nurtures an environment ripe for innovation and breakthroughs. +The chapter emphasized the significance of multidisciplinary collaboration in ML projects. Understanding the diverse roles provides a comprehensive view of the teamwork necessary to navigate the experimental and resource-intensive nature of ML development. As we move forward to more detailed discussions in the subsequent chapters, this high-level overview equips us with a holistic perspective on the ML workflow and the various roles involved. ## Resources {#sec-ai-workflow-resource} diff --git a/contribute.md b/contribute.md index e378dcc35..f85b5f4c2 100644 --- a/contribute.md +++ b/contribute.md @@ -29,7 +29,7 @@ Fork the repository on GitHub and clone your fork to your local machine. We are git clone https://github.com/YOUR_USERNAME/cs249r_book.git ``` -### Naviage to the repository +### Navigate to the repository ```bash cd cs249r_book diff --git a/scripts/welcome/confetti.js b/scripts/confettipages/confetti.js similarity index 73% rename from scripts/welcome/confetti.js rename to scripts/confettipages/confetti.js index de3177bcc..37ee58ff9 100644 --- a/scripts/welcome/confetti.js +++ b/scripts/confettipages/confetti.js @@ -39,7 +39,7 @@ document.addEventListener("DOMContentLoaded", function() { // Trigger the confetti script const script = document.createElement('script'); script.src = "https://run.confettipage.com/here.js"; - script.setAttribute('data-confetticode', 'U2FsdGVkX18PTb8Vsl0OaWt5DlYvi1EpmZ1g03dN0/He6MHxX9/qmVN93W7pVyq4VCVNYVctaZocBF/2m59tpKmgNiqPF8sS/3l8DqM91vBrhexmAFNHStQnYLxc4bNbBhbY537N2MVg6oWhKkm4OrYXHf0sE3fYXuuO7lp8folqy0rnmXBJKEHYZSJdusaOs7AmxgFOJqkqSViJHqpvkzE5dc2dGkrqr4OAF7lz8OaAk9brMe6f3irUFRuiyaziBkJn+m1nz0LO6Wqq9QC/lxd9bJPKD9k8Ra1sM9uYnFpVcuyQgHb04IK7HBnDIW4FkZc30x9zmUBgtiY0KQU6myHvDXJld+5r0RwqCKUV9DB79Bulw6UjtbKEwhNGC16AtNEiEl1QQjMI7ml9stj9/IOcnSgBYxtO+6UPan9BVKswi0ZZeNo7lrzjDU9o2IoMMlagMfi7XvbVLcAXJ+lkAyOnRsgCxkJSmPYff9FuBLLht5HEQ33Nlj4DKBx9YSoH28l4BEkJkoQ8rWSJlEZTet7Of0WI7bZ+WKUsvUHnvBt3pO9reNSxRP1SEFIKjHSYL7jYjFwZm21WhMsZ0wNmPCaXsHw3r6XtTL4w8KFbkiTEt+U/naTUFsumcPdKSolk8gmGDSD2aQ73askV71vrcvIHKyxs8UWOYaxGznj1VO9i97T/784QAll6d/9+ymHZrGTH1UbSz3KRU9uGoBqtFw=='); + script.setAttribute('data-confetticode', 'U2FsdGVkX1/UZXxxbIcynlCbY0mzzQusg5WULngfD5adgMzEc15y5e1S74UPrrRdk+BQEOXVkrEOaVkGoXEK22dgc2CsHK+KagYyrEv4CE+sbJGfBa6ompBnctYY56wo30NujuwPT7TzKCtV4F4uueEsN8UTuH5STPnHbG9ASOjPyyImEjSYG89SEwGoUw14YnvLuE3QXWpAVRlhh7qUtjiUAidd2bSlTpQmd1OfyFm8pLrO6183KxmLexAxcNFJKh0QkFrxG/LEAA+4vIIZrL4gATrNkcUrbdeL2VDsQGAOPFXJVzA50fZUtiawk+FbUD9kIhvTEOx9XzwwIq5WRlnbltT0uz4WZHSqZ2cdvHFLzpV+HqkQm76LPDLCgyA4Pbo2EbDTlXpFIx8BuwTx7H2idFrDO9zLgPAlrj0g+h4NznQV+B55vEGQiryEudyihPRzraPAT5vKpYbr7k62jV5msRO2O1pG+2HmH7e9z5v3+74cBluP1qrn52OZXdVQlmYuXRG2/kpMwDClhHxIGHzi3AWn1zH+sAJ+ICtXuZu02L+hTKkdpr/OHu4jMoz3F5vpOusFXLihx6byza+BukY2MDbTjFNICgZSm6JTbTisQWB1oEcevwOewKjoWybeQiPmEtKkXmQlBp3liYCKwKKKsyt4JnnoMZrczo0fTB8='); document.body.appendChild(script); // Fade out the welcome message after a few seconds diff --git a/scripts/quarto_publish/publish.sh b/scripts/quarto_publish/publish.sh index 98146f15d..62e09b645 100755 --- a/scripts/quarto_publish/publish.sh +++ b/scripts/quarto_publish/publish.sh @@ -3,7 +3,7 @@ # Check if the current git branch is main current_branch=$(git branch --show-current) if [ "$current_branch" != "main" ]; then - echo "You are not on the main branch. Please switch to the main branch to proceed." + echo "You are not on the main branch. Please switch to the main branch to proceed. You should have merged dev into main by now." exit 1 fi diff --git a/style.scss b/style.scss index 499aaa3a3..e41803b21 100644 --- a/style.scss +++ b/style.scss @@ -58,6 +58,18 @@ code a:any-link { /*-- scss:defaults --*/ +.figure { + margin: auto; + text-align: left; +} + +/* Aligns the caption in margin to the left */ +.csl-entry { + display: block; + margin-left: 0; + text-align: left; +} + /* Mixin for callout styling */ @mixin base-callout { margin-top: 1em; diff --git a/tex/after-body-includes.tex b/tex/after-body-includes.tex new file mode 100644 index 000000000..355a74f57 --- /dev/null +++ b/tex/after-body-includes.tex @@ -0,0 +1 @@ +\clearpage diff --git a/tex/before-body-includes.tex b/tex/before-body-includes.tex new file mode 100644 index 000000000..e1f4af100 --- /dev/null +++ b/tex/before-body-includes.tex @@ -0,0 +1,16 @@ +% Disable the default title page +\renewcommand{\maketitle}{ + \newgeometry{top=0.5in,bottom=1in,inner=1in,outer=1in} % Geometry for title page + \begin{titlepage} + \begin{center} + \thispagestyle{empty} + \includegraphics[trim=0 -10 0 0, clip, width=\textwidth]{cover-image-white.png} % Adjust the size and path to your image + {{\Huge\bfseries Machine Learning Systems}\\[1em] \par} + \vspace*{\fill} + {\large Written, edited and curated by \\[.1cm] Prof. Vijay Janapa Reddi \\[.2cm] Harvard University \\[1em] \normalsize {\itshape With special thanks to the community for their contributions and support.} \\[1em] \vfill \scriptsize Last Modified: \today\par \vfill} + \end{center} + \end{titlepage} + \restoregeometry % Restore original geometry +} + +\let\endtitlepage\relax diff --git a/tex/copyright.tex b/tex/copyright.tex new file mode 100644 index 000000000..4845cb6a5 --- /dev/null +++ b/tex/copyright.tex @@ -0,0 +1,11 @@ +\null\vfill +\begin{flushleft} +\thispagestyle{empty} +\textit{Here's the name of the book for the copyright page} + +© Anyname, Inc. + +ISBN-1234567891234 + +\noindent All rights reserved. No part of this publication may be produced or transmitted in any form or by any means, electronic or mechanical, including photocopying recording or any information storage and retrieval system, without the prior written permission of the publisher. For permissions contact +\end{flushleft} diff --git a/tex/cover_page.tex b/tex/cover_page.tex new file mode 100644 index 000000000..df5c0e2d4 --- /dev/null +++ b/tex/cover_page.tex @@ -0,0 +1,9 @@ +\begin{titlepage} + \begin{center} + \thispagestyle{empty} + \includegraphics[trim=0 -10 0 0, clip, width=\textwidth]{cover-image-white.png} % Adjust the size and path to your image + {{\Huge\bfseries Machine Learning Systems}\\[1em] \par} + \vspace*{\fill} + {\large Written, edited and curated by \\[.1cm] Prof. Vijay Janapa Reddi \\[.2cm] Harvard University \\[1em] \normalsize {\itshape With special thanks to the community for their contributions and support.} \\[1em] \vfill \scriptsize Last Modified: \today\par \vfill} + \end{center} +\end{titlepage} diff --git a/tex/dedication.tex b/tex/dedication.tex new file mode 100644 index 000000000..b5970ab03 --- /dev/null +++ b/tex/dedication.tex @@ -0,0 +1,6 @@ +\begin{center} + \thispagestyle{empty} + \vspace*{\fill} + \Huge{\textit{To those who came before}} + \vspace*{\fill} +\end{center} \ No newline at end of file diff --git a/tex/header-includes.tex b/tex/header-includes.tex new file mode 100644 index 000000000..c2fcdcb9c --- /dev/null +++ b/tex/header-includes.tex @@ -0,0 +1,78 @@ +% Package imports +\definecolor{crimson}{RGB}{165, 28, 48} +\usepackage[english]{babel} +\usepackage[format=plain, labelfont={bf,it}, textfont=it, labelsep=space]{caption} +\usepackage{afterpage} +\usepackage{atbegshi} % Package to insert content at the beginning +\usepackage{babel} +\usepackage{etoolbox} +\usepackage{fancyhdr} +\usepackage{fontspec} +\usepackage{graphicx} +\usepackage{marginfix} % Fixes the issue of margin notes being cut off +\usepackage{marginnote} +\usepackage{mathptmx} +\usepackage{newpxtext} % Palatino-like font +\usepackage{ragged2e} +\usepackage{xcolor} + +\babelprovide[import]{czech} + +% Redefine \marginnote to always include \RaggedRight +\let\oldmarginnote\marginnote +\renewcommand{\marginnote}[2][\null]{% + \oldmarginnote[#1]{\RaggedRight #2}% +} + +\patchcmd{\chapter}{\thispagestyle{plain}}{\thispagestyle{fancy}}{}{} + +% Page style settings +\pagestyle{fancy} +\fancyhf{} +\fancyhead[LE]{\color{crimson}\nouppercase{\rightmark}} +\fancyhead[RO]{\color{crimson}\thepage} +\fancyhead[LO]{\color{crimson}\MakeUppercase{\leftmark}} +\fancyhead[RE]{\color{crimson}\thepage} +\renewcommand{\headrulewidth}{0.4pt} +\renewcommand{\footrulewidth}{0pt} +\fancypagestyle{plain}{ + \fancyhf{} + \fancyhead[LE,RO]{\color{crimson}\thepage} + \renewcommand{\headrulewidth}{0.4pt} + \renewcommand{\footrulewidth}{0pt} +} + +% KOMA-Script adjustments +\addtokomafont{disposition}{\rmfamily\color{crimson}} +\addtokomafont{chapter}{\color{crimson}} +\addtokomafont{section}{\color{crimson}} +\addtokomafont{subsection}{\color{crimson}} + +\newenvironment{abstract}{ + \chapter*{\abstractname} + \addcontentsline{toc}{chapter}{\abstractname} + \small +}{ + \clearpage +} + +\usepackage{hyperref} +\hypersetup{ + linkcolor=crimson, + citecolor=crimson, + urlcolor=crimson, + pdfpagelayout=TwoPageRight, % This sets the layout to two-page mode with the first page alone + pdfstartview=Fit % This sets the initial zoom to fit the page +} + +\usepackage{geometry} +\geometry{ + paperwidth=7.5in, + paperheight=9.25in, + top=1in, + bottom=1in, + inner=1in, + outer=2.25in, + marginparwidth=1.5in, + twoside +} \ No newline at end of file