diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 8c78bf0..198ba9a 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -27,7 +27,7 @@ jobs:
           git config user.email 'github-actions[bot]@users.noreply.github.com'
 
       - name: Install requirements
-        run: pip install -r docs/requirements.txt
+        run: pip install '.[docs]'
 
       - name: Publish docs
 
diff --git a/docs/api.md b/docs/api.md
new file mode 100644
index 0000000..c304571
--- /dev/null
+++ b/docs/api.md
@@ -0,0 +1,3 @@
+# API Reference
+
+::: opennotebookllm.preprocessing.data_cleaners
diff --git a/docs/requirements.txt b/docs/requirements.txt
deleted file mode 100644
index c584e19..0000000
--- a/docs/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-mkdocs
-mkdocs-material
-mkdocstrings[python]
diff --git a/mkdocs.yml b/mkdocs.yml
index 3855709..cbf0a8d 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -2,8 +2,16 @@ site_name: Blueprint Docs
 
 nav:
   - Home: index.md
+  - API Reference: api.md
 
 theme:
   name: material
   palette:
     primary: deep orange
+
+plugins:
+- mkdocstrings:
+    handlers:
+      python:
+        options:
+          show_root_heading: true
diff --git a/src/opennotebookllm/preprocessing/data_cleaners.py b/src/opennotebookllm/preprocessing/data_cleaners.py
index bc4c640..2b35b1f 100644
--- a/src/opennotebookllm/preprocessing/data_cleaners.py
+++ b/src/opennotebookllm/preprocessing/data_cleaners.py
@@ -3,7 +3,25 @@
 
 
 def clean_with_regex(text: str) -> str:
-    text = re.sub(r"\s+", " ", text).strip()
+    """
+    Clean text using regular expressions.
+
+    This function removes:
+        - URLs
+        - emails
+        - special characters
+        - extra spaces
+
+    Examples:
+        >>> clean_with_regex("\xa0Hello,   world! http://example.com")
+        "Hello, world!"
+
+    Args:
+        text (str): The text to clean.
+
+    Returns:
+        str: The cleaned text.
+    """
     text = re.sub(
         r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",
         "",
@@ -11,10 +29,31 @@ def clean_with_regex(text: str) -> str:
     )
     text = re.sub(r"[\w\.-]+@[\w\.-]+\.[\w]+", "", text)
     text = re.sub(r'[^a-zA-Z0-9\s.,!?;:"\']', "", text)
+    text = re.sub(r"\s+", " ", text).strip()
     return text
 
 
 def clean_html(text: str) -> str:
+    """Clean HTML text.
+
+    This function removes:
+        - scripts
+        - styles
+        - links
+        - meta tags
+
+    In addition, it calls [clean_with_regex][opennotebookllm.preprocessing.data_cleaners.clean_with_regex].
+
+    Examples:
+        >>> clean_html("<html><body><p>Hello,  world!  </p></body></html>"")
+        "Hello, world!"
+
+    Args:
+        text (str): The HTML text to clean.
+
+    Returns:
+        str: The cleaned text.
+    """
     soup = BeautifulSoup(text, "html.parser")
     for tag in soup(["script", "style", "link", "meta"]):
         tag.decompose()
@@ -22,9 +61,24 @@ def clean_html(text: str) -> str:
     return clean_with_regex(text)
 
 
-def clean_markdown_image(text: str) -> str:
-    return re.sub(r'!\[.*?\]\(.*?(".*?")?\)', "", text)
+def clean_markdown(text: str) -> str:
+    """Clean Markdown text.
 
+    This function removes:
+        - markdown images
 
-def clean_markdown(text: str) -> str:
-    return clean_with_regex(clean_markdown_image(text))
+    In addition, it calls [clean_with_regex][opennotebookllm.preprocessing.data_cleaners.clean_with_regex].
+
+    Examples:
+        >>> clean_markdown('# Title   with image ![alt text](image.jpg "Image Title")')
+        "Title with image"
+
+    Args:
+        text (str): The Markdown text to clean.
+
+    Returns:
+        str: The cleaned text.
+    """
+    text = re.sub(r'!\[.*?\]\(.*?(".*?")?\)', "", text)
+
+    return clean_with_regex(text)