diff --git a/06_numpy_intro.html b/06_numpy_intro.html index f437def..48700ba 100644 --- a/06_numpy_intro.html +++ b/06_numpy_intro.html @@ -208,7 +208,8 @@ @@ -635,7 +636,9 @@

Images are Numerical Data
Requirement already satisfied: scikit-image in /home/javi/anaconda3/lib/python3.11/site-packages (0.20.0)
 Requirement already satisfied: matplotlib in /home/javi/anaconda3/lib/python3.11/site-packages (3.7.1)
-Requirement already satisfied: numpy>=1.21.1 in /home/javi/anaconda3/lib/python3.11/site-packages (from scikit-image) (1.24.3)
+
+
+
Requirement already satisfied: numpy>=1.21.1 in /home/javi/anaconda3/lib/python3.11/site-packages (from scikit-image) (1.24.3)
 Requirement already satisfied: scipy>=1.8 in /home/javi/anaconda3/lib/python3.11/site-packages (from scikit-image) (1.13.1)
 Requirement already satisfied: networkx>=2.8 in /home/javi/anaconda3/lib/python3.11/site-packages (from scikit-image) (3.3)
 Requirement already satisfied: pillow>=9.0.1 in /home/javi/anaconda3/lib/python3.11/site-packages (from scikit-image) (10.4.0)
diff --git a/_images/0162c451ac672a26e86c52dc52cbd8b38ba1e099701a7ed4238cf5767774227c.png b/_images/0162c451ac672a26e86c52dc52cbd8b38ba1e099701a7ed4238cf5767774227c.png
new file mode 100644
index 0000000..5fe7643
Binary files /dev/null and b/_images/0162c451ac672a26e86c52dc52cbd8b38ba1e099701a7ed4238cf5767774227c.png differ
diff --git a/_images/084e4e6c786597cff0902b18e0861cdb36eac101d9b17ecc0591a8188fe3806d.png b/_images/084e4e6c786597cff0902b18e0861cdb36eac101d9b17ecc0591a8188fe3806d.png
new file mode 100644
index 0000000..103a7e9
Binary files /dev/null and b/_images/084e4e6c786597cff0902b18e0861cdb36eac101d9b17ecc0591a8188fe3806d.png differ
diff --git a/_images/29b2dacbb39c2db764d7806fe649dce41fa1c0a6b7b0766e1f0582dbb76de2a6.png b/_images/29b2dacbb39c2db764d7806fe649dce41fa1c0a6b7b0766e1f0582dbb76de2a6.png
new file mode 100644
index 0000000..dccb909
Binary files /dev/null and b/_images/29b2dacbb39c2db764d7806fe649dce41fa1c0a6b7b0766e1f0582dbb76de2a6.png differ
diff --git a/_images/4cb7adb9c489409e62f6c2166c58da5781b3e6a3e6e77af46197cdd51fe5dd78.png b/_images/4cb7adb9c489409e62f6c2166c58da5781b3e6a3e6e77af46197cdd51fe5dd78.png
new file mode 100644
index 0000000..40c41f2
Binary files /dev/null and b/_images/4cb7adb9c489409e62f6c2166c58da5781b3e6a3e6e77af46197cdd51fe5dd78.png differ
diff --git a/_images/5a22befb2ceba1706703f65ee950fdd6f7a7780b43a1282435e31e5853e01b06.png b/_images/5a22befb2ceba1706703f65ee950fdd6f7a7780b43a1282435e31e5853e01b06.png
new file mode 100644
index 0000000..ed2bcda
Binary files /dev/null and b/_images/5a22befb2ceba1706703f65ee950fdd6f7a7780b43a1282435e31e5853e01b06.png differ
diff --git a/_images/ee18945665141d52651967b932290a1bba5c7b09d936747832abce741a11b86d.png b/_images/ee18945665141d52651967b932290a1bba5c7b09d936747832abce741a11b86d.png
new file mode 100644
index 0000000..136d8c9
Binary files /dev/null and b/_images/ee18945665141d52651967b932290a1bba5c7b09d936747832abce741a11b86d.png differ
diff --git a/_sources/chapters/module-4/042-numpyII.ipynb b/_sources/chapters/module-4/042-numpyII.ipynb
index c21a064..f570f74 100644
--- a/_sources/chapters/module-4/042-numpyII.ipynb
+++ b/_sources/chapters/module-4/042-numpyII.ipynb
@@ -2085,7 +2085,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 74,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
diff --git a/_sources/chapters/module-4/044-PandasII-Exploration_and_Manipulation.ipynb b/_sources/chapters/module-4/044-PandasII-Exploration_and_Manipulation-Copy1.ipynb
similarity index 66%
rename from _sources/chapters/module-4/044-PandasII-Exploration_and_Manipulation.ipynb
rename to _sources/chapters/module-4/044-PandasII-Exploration_and_Manipulation-Copy1.ipynb
index 924c5ea..6fd0f62 100644
--- a/_sources/chapters/module-4/044-PandasII-Exploration_and_Manipulation.ipynb
+++ b/_sources/chapters/module-4/044-PandasII-Exploration_and_Manipulation-Copy1.ipynb
@@ -6,10 +6,22 @@
    "id": "13aa848b",
    "metadata": {},
    "source": [
-    "# PandasII: Exploration and Manipulation\n",
+    "# PandasII: Exploration\n",
     "\n",
     "What you will learn:\n",
-    "- Introduce pandas dataframes and the essential operations"
+    "\n",
+    "- Use Pandas for data inspection and exploration\n",
+    "- Selection, indexing and slicing in Pandas\n",
+    "- Sorting and ranking\n",
+    "- Data manipulation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "19ffa6d2-9759-47d2-b48d-64239d168bd9",
+   "metadata": {},
+   "source": [
+    "![](https://ds1002-resources.s3.amazonaws.com/images/workflow.png)"
    ]
   },
   {
@@ -30,7 +42,7 @@
     "tags": []
    },
    "source": [
-    "Let's load a bigger data set to explore more functionality."
+    "For this lesson we will work with the following dataset:"
    ]
   },
   {
@@ -193,12 +205,12 @@
    "id": "897df551",
    "metadata": {},
    "source": [
-    "Check the data type of `iris`:"
+    "Check that we have a dataframe:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 6,
    "id": "cc587038",
    "metadata": {},
    "outputs": [
@@ -208,7 +220,7 @@
        "pandas.core.frame.DataFrame"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -243,7 +255,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 7,
    "id": "6e79ac0b",
    "metadata": {},
    "outputs": [
@@ -329,7 +341,7 @@
        "4           5.0          3.6           1.4          0.2  setosa"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -340,7 +352,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 8,
    "id": "1a3341d5",
    "metadata": {},
    "outputs": [
@@ -471,7 +483,7 @@
        "9           4.9          3.1           1.5          0.1  setosa"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -491,7 +503,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 9,
    "id": "096cbcf6",
    "metadata": {},
    "outputs": [
@@ -577,7 +589,7 @@
        "149           5.9          3.0           5.1          1.8  virginica"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -588,7 +600,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 10,
    "id": "cd014af8",
    "metadata": {},
    "outputs": [
@@ -719,7 +731,7 @@
        "149           5.9          3.0           5.1          1.8  virginica"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -739,7 +751,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 11,
    "id": "c12d0f01",
    "metadata": {},
    "outputs": [
@@ -754,7 +766,7 @@
        "dtype: object"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -768,12 +780,12 @@
    "id": "36df3ba6",
    "metadata": {},
    "source": [
-    "- `shape`: As with NumPy, the shape of the dataframe (rows, columns)."
+    "- `shape`: As with NumPy, the shape of the dataframe (number of rows, number of columns)."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 12,
    "id": "15b4e581",
    "metadata": {},
    "outputs": [
@@ -783,7 +795,7 @@
        "(150, 5)"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -802,7 +814,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 13,
    "id": "be7bf6fa",
    "metadata": {},
    "outputs": [
@@ -812,7 +824,7 @@
        "150"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -831,7 +843,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 14,
    "id": "89e82424",
    "metadata": {},
    "outputs": [
@@ -843,7 +855,7 @@
        "      dtype='object')"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -857,12 +869,12 @@
    "id": "861715f7",
    "metadata": {},
    "source": [
-    "- `info()`: "
+    "- `info()`: prints information about the dataframe including the index dtype and columns, non-null values and memory usage."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 15,
    "id": "8d306a64",
    "metadata": {},
    "outputs": [
@@ -909,7 +921,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 16,
    "id": "1ddb0055",
    "metadata": {},
    "outputs": [
@@ -1013,7 +1025,7 @@
        "max        7.900000     4.400000      6.900000     2.500000"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1024,7 +1036,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 17,
    "id": "30896c3d",
    "metadata": {},
    "outputs": [
@@ -1116,7 +1128,7 @@
        "petal_width   150.0  1.199333  0.762238  0.1  0.3  1.30  1.8  2.5"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1135,7 +1147,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 18,
    "id": "294d2ade",
    "metadata": {},
    "outputs": [
@@ -1192,7 +1204,7 @@
        "freq        50"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1203,7 +1215,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 19,
    "id": "03ed92bf",
    "metadata": {},
    "outputs": [
@@ -1221,7 +1233,7 @@
        "Name: sepal_length, dtype: float64"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1240,7 +1252,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 20,
    "id": "8979b360",
    "metadata": {},
    "outputs": [
@@ -1253,7 +1265,7 @@
        "Name: species, dtype: int64"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1272,7 +1284,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 21,
    "id": "1345fc52",
    "metadata": {},
    "outputs": [
@@ -1285,7 +1297,7 @@
        "Name: species, dtype: float64"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1538,153 +1550,100 @@
    "id": "450bf331",
    "metadata": {},
    "source": [
-    "## Selection and Indexing"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9ec3e70b-915e-4616-a680-e9e1c34cd736",
-   "metadata": {},
-   "source": [
-    "### By Index"
+    "## Working with columns"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "cc454247",
+   "id": "c02d8056-c7cd-401b-9644-2bcdf74ce1dd",
    "metadata": {},
    "source": [
-    "We use `iloc[]` to extract rows using **indexes**. \n",
-    "\n"
+    "### Selection\n",
+    "\n",
+    "- **bracket notation**: variable name must be a string"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
-   "id": "98a9ae6e",
+   "execution_count": 23,
+   "id": "28365a2f-af89-40f4-8db9-d57456586599",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "sepal_length       4.7\n",
-       "sepal_width        3.2\n",
-       "petal_length       1.3\n",
-       "petal_width        0.2\n",
-       "species         setosa\n",
-       "Name: 2, dtype: object"
+       "(0      5.1\n",
+       " 1      4.9\n",
+       " 2      4.7\n",
+       " 3      4.6\n",
+       " 4      5.0\n",
+       "       ... \n",
+       " 145    6.7\n",
+       " 146    6.3\n",
+       " 147    6.5\n",
+       " 148    6.2\n",
+       " 149    5.9\n",
+       " Name: sepal_length, Length: 150, dtype: float64,\n",
+       " pandas.core.series.Series)"
       ]
      },
-     "execution_count": 58,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "# This fetches row 3, and all columns:\n",
-    "\n",
-    "iris_df.iloc[2]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a82a9f45",
-   "metadata": {},
-   "source": [
-    "fetch rows with indices 1,2 (the right endpoint is exclusive), and all columns."
+    "iris_df['sepal_length'], type(iris_df['sepal_length'])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
-   "id": "c5c45d06",
+   "execution_count": 26,
+   "id": "1497be19-6561-4adf-bb35-ae71bf3f419f",
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/html": [
-       "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
obs_id
14.93.01.40.2setosa
24.73.21.30.2setosa
\n", - "
" - ], "text/plain": [ - " sepal_length sepal_width petal_length petal_width species\n", - "obs_id \n", - "1 4.9 3.0 1.4 0.2 setosa\n", - "2 4.7 3.2 1.3 0.2 setosa" + "( sepal_length\n", + " 0 5.1\n", + " 1 4.9\n", + " 2 4.7\n", + " 3 4.6\n", + " 4 5.0\n", + " .. ...\n", + " 145 6.7\n", + " 146 6.3\n", + " 147 6.5\n", + " 148 6.2\n", + " 149 5.9\n", + " \n", + " [150 rows x 1 columns],\n", + " pandas.core.frame.DataFrame)" ] }, - "execution_count": 59, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "iris_df.iloc[1:3]" + "# double bracket gives you the selected column as new dataframe\n", + "iris_df[['sepal_length']], type(iris_df[['sepal_length']])" ] }, { "cell_type": "markdown", - "id": "3bc78532", + "id": "a003b66b-5881-4423-9f2a-3f64943c10d7", "metadata": {}, "source": [ - "fetch rows with indices 1,2 and first three columns (positions 0, 1, 2)" + "This notation allows you to select more than one column" ] }, { "cell_type": "code", - "execution_count": 60, - "id": "408ba901", + "execution_count": 28, + "id": "b250fd93-8c83-4389-bff1-bf87e89ea368", "metadata": {}, "outputs": [ { @@ -1709,109 +1668,160 @@ " \n", " \n", " sepal_length\n", - " sepal_width\n", " petal_length\n", " \n", - " \n", - " obs_id\n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", + " 0\n", + " 5.1\n", + " 1.4\n", + " \n", + " \n", " 1\n", " 4.9\n", - " 3.0\n", " 1.4\n", " \n", " \n", " 2\n", " 4.7\n", - " 3.2\n", " 1.3\n", " \n", + " \n", + " 3\n", + " 4.6\n", + " 1.5\n", + " \n", + " \n", + " 4\n", + " 5.0\n", + " 1.4\n", + " \n", + " \n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 145\n", + " 6.7\n", + " 5.2\n", + " \n", + " \n", + " 146\n", + " 6.3\n", + " 5.0\n", + " \n", + " \n", + " 147\n", + " 6.5\n", + " 5.2\n", + " \n", + " \n", + " 148\n", + " 6.2\n", + " 5.4\n", + " \n", + " \n", + " 149\n", + " 5.9\n", + " 5.1\n", + " \n", " \n", "\n", + "

150 rows × 2 columns

\n", "
" ], "text/plain": [ - " sepal_length sepal_width petal_length\n", - "obs_id \n", - "1 4.9 3.0 1.4\n", - "2 4.7 3.2 1.3" + " sepal_length petal_length\n", + "0 5.1 1.4\n", + "1 4.9 1.4\n", + "2 4.7 1.3\n", + "3 4.6 1.5\n", + "4 5.0 1.4\n", + ".. ... ...\n", + "145 6.7 5.2\n", + "146 6.3 5.0\n", + "147 6.5 5.2\n", + "148 6.2 5.4\n", + "149 5.9 5.1\n", + "\n", + "[150 rows x 2 columns]" ] }, - "execution_count": 60, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "iris_df.iloc[1:3, 0:3]" + "iris_df[['sepal_length', 'petal_length']]" ] }, { "cell_type": "markdown", - "id": "46975617", + "id": "62e149f9-6641-4a0b-a98f-fc59529b0d07", "metadata": {}, "source": [ - "You can apply slices to column names too. You don't need `.iloc[]` here." + "- **Dot notation**: Here columns are object attributes" ] }, { "cell_type": "code", - "execution_count": 62, - "id": "5056b057", + "execution_count": 24, + "id": "0ab28157-28dd-4a10-8714-c83cab905d8c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Index(['sepal_length', 'sepal_width', 'petal_length'], dtype='object')" + "(0 5.1\n", + " 1 4.9\n", + " 2 4.7\n", + " 3 4.6\n", + " 4 5.0\n", + " ... \n", + " 145 6.7\n", + " 146 6.3\n", + " 147 6.5\n", + " 148 6.2\n", + " 149 5.9\n", + " Name: sepal_length, Length: 150, dtype: float64,\n", + " pandas.core.series.Series)" ] }, - "execution_count": 62, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "iris_df.columns[0:3]" - ] - }, - { - "cell_type": "markdown", - "id": "ccfdea8c-d7ff-48c6-89a4-a75bd16f88c8", - "metadata": {}, - "source": [ - "### By label" + "iris_df.sepal_length, type(iris_df.sepal_length)" ] }, { "cell_type": "markdown", - "id": "f9be9788", + "id": "85754a99-11c2-467f-a35a-9c4183a86e33", "metadata": {}, "source": [ - "We can select by row and column labels using `.loc[]`. " + "Dot notation is very convenient, since as object attributes they can be tab-completed in various editing environments.\n", + "\n", + "But: \n", + "- It only works if the column names are not reserved words\n", + "- It can't be used when created a new column (see below)\n", + "- It allows you to select just one column" ] }, { "cell_type": "markdown", - "id": "d84fe9ab", + "id": "2e35302c-f0ed-487e-b876-f58fc83645d0", "metadata": {}, - "source": [ - "Here we ask for rows with labels (indexes) 1-3, and it gives exactly that \n", - "`.iloc[]` returned rows with indices 1,2.\n", - "\n", - "**Author note: This is by far the more useful of the two in my experience.**" - ] + "source": [] }, { "cell_type": "code", - "execution_count": 63, - "id": "cd1a1a13", + "execution_count": 38, + "id": "45a650cb", "metadata": {}, "outputs": [ { @@ -1840,24 +1850,30 @@ " petal_length\n", " petal_width\n", " species\n", - " \n", - " \n", - " obs_id\n", - " \n", - " \n", - " \n", - " \n", - " \n", + " sepal_volume\n", + " sepal_volume_2\n", " \n", " \n", " \n", " \n", + " 0\n", + " 5.1\n", + " 3.5\n", + " 1.4\n", + " 0.2\n", + " setosa\n", + " 17.85\n", + " 17.85\n", + " \n", + " \n", " 1\n", " 4.9\n", " 3.0\n", " 1.4\n", " 0.2\n", " setosa\n", + " 14.70\n", + " 14.70\n", " \n", " \n", " 2\n", @@ -1866,6 +1882,8 @@ " 1.3\n", " 0.2\n", " setosa\n", + " 15.04\n", + " 15.04\n", " \n", " \n", " 3\n", @@ -1874,40 +1892,74 @@ " 1.5\n", " 0.2\n", " setosa\n", + " 14.26\n", + " 14.26\n", + " \n", + " \n", + " 4\n", + " 5.0\n", + " 3.6\n", + " 1.4\n", + " 0.2\n", + " setosa\n", + " 18.00\n", + " 18.00\n", " \n", " \n", "\n", "
" ], "text/plain": [ - " sepal_length sepal_width petal_length petal_width species\n", - "obs_id \n", - "1 4.9 3.0 1.4 0.2 setosa\n", - "2 4.7 3.2 1.3 0.2 setosa\n", - "3 4.6 3.1 1.5 0.2 setosa" + " sepal_length sepal_width petal_length petal_width species sepal_volume \\\n", + "0 5.1 3.5 1.4 0.2 setosa 17.85 \n", + "1 4.9 3.0 1.4 0.2 setosa 14.70 \n", + "2 4.7 3.2 1.3 0.2 setosa 15.04 \n", + "3 4.6 3.1 1.5 0.2 setosa 14.26 \n", + "4 5.0 3.6 1.4 0.2 setosa 18.00 \n", + "\n", + " sepal_volume_2 \n", + "0 17.85 \n", + "1 14.70 \n", + "2 15.04 \n", + "3 14.26 \n", + "4 18.00 " ] }, - "execution_count": 63, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "iris_df.loc[1:3]" + "iris_df['sepal_volume'] = iris_df.sepal_length * iris_df.sepal_width\n", + "\n", + "iris_df.head()" ] }, { "cell_type": "markdown", - "id": "5ccd9d19", + "id": "14811bd8-b951-4194-a9b5-99d176016e53", "metadata": {}, "source": [ - "Subset on columns with column name (as a string) or list of strings" - ] + "Note that:\n", + "\n", + "- The left side has form: DataFrame name, bracket notation, new column name\n", + "- The assignment operator `=` is used\n", + "- The right side contains an expression; here, two df columns are multiplied together " + ] + }, + { + "cell_type": "markdown", + "id": "9fcf4b80-0dfc-479a-93f6-78287920837a", + "metadata": {}, + "source": [ + "Bracket notation also works on the fields, but it's more typing:" + ] }, { "cell_type": "code", - "execution_count": 65, - "id": "332696cc", + "execution_count": 39, + "id": "65346e2d", "metadata": {}, "outputs": [ { @@ -1932,63 +1984,176 @@ " \n", " \n", " sepal_length\n", + " sepal_width\n", + " petal_length\n", " petal_width\n", - " \n", - " \n", - " obs_id\n", - " \n", - " \n", + " species\n", + " sepal_volume\n", + " sepal_volume_2\n", " \n", " \n", " \n", " \n", + " 0\n", + " 5.1\n", + " 3.5\n", + " 1.4\n", + " 0.2\n", + " setosa\n", + " 17.85\n", + " 17.85\n", + " \n", + " \n", " 1\n", " 4.9\n", + " 3.0\n", + " 1.4\n", " 0.2\n", + " setosa\n", + " 14.70\n", + " 14.70\n", " \n", " \n", " 2\n", " 4.7\n", + " 3.2\n", + " 1.3\n", " 0.2\n", + " setosa\n", + " 15.04\n", + " 15.04\n", " \n", " \n", " 3\n", " 4.6\n", + " 3.1\n", + " 1.5\n", + " 0.2\n", + " setosa\n", + " 14.26\n", + " 14.26\n", + " \n", + " \n", + " 4\n", + " 5.0\n", + " 3.6\n", + " 1.4\n", " 0.2\n", + " setosa\n", + " 18.00\n", + " 18.00\n", " \n", " \n", "\n", "" ], "text/plain": [ - " sepal_length petal_width\n", - "obs_id \n", - "1 4.9 0.2\n", - "2 4.7 0.2\n", - "3 4.6 0.2" + " sepal_length sepal_width petal_length petal_width species sepal_volume \\\n", + "0 5.1 3.5 1.4 0.2 setosa 17.85 \n", + "1 4.9 3.0 1.4 0.2 setosa 14.70 \n", + "2 4.7 3.2 1.3 0.2 setosa 15.04 \n", + "3 4.6 3.1 1.5 0.2 setosa 14.26 \n", + "4 5.0 3.6 1.4 0.2 setosa 18.00 \n", + "\n", + " sepal_volume_2 \n", + "0 17.85 \n", + "1 14.70 \n", + "2 15.04 \n", + "3 14.26 \n", + "4 18.00 " ] }, - "execution_count": 65, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "iris_df.loc[1:3, ['sepal_length','petal_width']]" + "iris_df['sepal_volume_2'] = iris_df['sepal_length'] * iris_df['sepal_width']\n", + "\n", + "iris_df.head()" ] }, { "cell_type": "markdown", - "id": "10439dcc", + "id": "3f73a91c-3096-4612-9730-5a9873db4e3f", "metadata": {}, "source": [ - "Select all rows, specific columns" + "The bracket notation must be used when assigning to a new column. This will break:" ] }, { "cell_type": "code", - "execution_count": 66, - "id": "e322dddf", + "execution_count": 43, + "id": "86dd3bca", + "metadata": {}, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "invalid syntax (1290401302.py, line 1)", + "output_type": "error", + "traceback": [ + "\u001b[0;36m Cell \u001b[0;32mIn[43], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m iris_df.'sepal_volume_3' = iris_df.sepal_length + iris_df.sepal_width\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" + ] + } + ], + "source": [ + "iris_df.'sepal_volume_3' = iris_df.sepal_length + iris_df.sepal_width" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "afc15011-94ec-4cd5-8092-7735a5dfa178", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_26349/2179810851.py:1: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access\n", + " iris_df.sepal_volume_3 = iris_df.sepal_length + iris_df.sepal_width\n" + ] + } + ], + "source": [ + "iris_df.sepal_volume_3 = iris_df.sepal_length + iris_df.sepal_width" + ] + }, + { + "cell_type": "markdown", + "id": "6add9fc1-c50b-43ab-8219-15dfc66384db", + "metadata": { + "tags": [] + }, + "source": [ + "### Removing" + ] + }, + { + "cell_type": "markdown", + "id": "2c6f53fa-9004-4918-92a2-8b2acefd1ead", + "metadata": { + "tags": [] + }, + "source": [ + "- Using the reserverd keyword `del` to drop a DataFrame or single columns from the dataframe:" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "5bef1897", + "metadata": {}, + "outputs": [], + "source": [ + "iris_df_drop = iris_df.copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "7a234a8c", "metadata": {}, "outputs": [ { @@ -2013,153 +2178,260 @@ " \n", " \n", " sepal_length\n", + " sepal_width\n", + " petal_length\n", " petal_width\n", + " species\n", + " sepal_volume\n", + " sepal_volume_2\n", " \n", + " \n", + " \n", " \n", - " obs_id\n", - " \n", + " 0\n", + " 5.1\n", + " 3.5\n", + " 1.4\n", + " 0.2\n", + " setosa\n", + " 17.85\n", + " 17.85\n", + " \n", + " \n", + " 1\n", + " 4.9\n", + " 3.0\n", + " 1.4\n", + " 0.2\n", + " setosa\n", + " 14.70\n", + " 14.70\n", + " \n", + " \n", + "\n", + "" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species sepal_volume \\\n", + "0 5.1 3.5 1.4 0.2 setosa 17.85 \n", + "1 4.9 3.0 1.4 0.2 setosa 14.70 \n", + "\n", + " sepal_volume_2 \n", + "0 17.85 \n", + "1 14.70 " + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df_drop.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "f996dd87", + "metadata": {}, + "outputs": [], + "source": [ + "# delete the column 'x'\n", + "del iris_df_drop['sepal_volume_2']" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "2f8a2a9c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", " \n", "
sepal_lengthsepal_widthpetal_lengthpetal_widthspeciessepal_volume
05.13.51.40.2setosa17.85
14.93.01.40.2setosa14.70
24.73.21.30.2setosa15.04
34.63.11.50.2setosa14.26
45.03.61.40.2setosa18.00
.....................
1456.73.05.22.3virginica20.10
1466.32.55.01.9virginica15.75
1476.53.05.22.0virginica19.50
1486.23.45.42.3virginica21.08
1495.93.05.11.8virginica17.70
\n", - "

150 rows × 2 columns

\n", + "

150 rows × 6 columns

\n", "
" ], "text/plain": [ - " sepal_length petal_width\n", - "obs_id \n", - "0 5.1 0.2\n", - "1 4.9 0.2\n", - "2 4.7 0.2\n", - "3 4.6 0.2\n", - "4 5.0 0.2\n", - "... ... ...\n", - "145 6.7 2.3\n", - "146 6.3 1.9\n", - "147 6.5 2.0\n", - "148 6.2 2.3\n", - "149 5.9 1.8\n", + " sepal_length sepal_width petal_length petal_width species \\\n", + "0 5.1 3.5 1.4 0.2 setosa \n", + "1 4.9 3.0 1.4 0.2 setosa \n", + "2 4.7 3.2 1.3 0.2 setosa \n", + "3 4.6 3.1 1.5 0.2 setosa \n", + "4 5.0 3.6 1.4 0.2 setosa \n", + ".. ... ... ... ... ... \n", + "145 6.7 3.0 5.2 2.3 virginica \n", + "146 6.3 2.5 5.0 1.9 virginica \n", + "147 6.5 3.0 5.2 2.0 virginica \n", + "148 6.2 3.4 5.4 2.3 virginica \n", + "149 5.9 3.0 5.1 1.8 virginica \n", "\n", - "[150 rows x 2 columns]" + " sepal_volume \n", + "0 17.85 \n", + "1 14.70 \n", + "2 15.04 \n", + "3 14.26 \n", + "4 18.00 \n", + ".. ... \n", + "145 20.10 \n", + "146 15.75 \n", + "147 19.50 \n", + "148 21.08 \n", + "149 17.70 \n", + "\n", + "[150 rows x 6 columns]" ] }, - "execution_count": 66, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "iris_df.loc[:, ['sepal_length','petal_width']]" + "iris_df_drop" ] }, { "cell_type": "markdown", - "id": "485059f0", + "id": "d0af9775-b5c5-4c86-a3af-f7226a20f67f", "metadata": {}, "source": [ - "### Boolean Filtering\n", - "\n", - "It's very common to subset a dataframe based on some condition on the data.\n", - "\n", - "🔑 Note that even though we are filtering rows, we are not using `.loc[]` or `.iloc[]` here.\n", - "\n", - "Pandas knows what to do if you pass a boolean structure." + "- Using the method `drop()` to drop one or more columns by specifying `axis` argument equal 1: " ] }, { "cell_type": "code", - "execution_count": 73, - "id": "ef3d5652", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "obs_id\n", - "0 False\n", - "1 False\n", - "2 False\n", - "3 False\n", - "4 False\n", - " ... \n", - "145 False\n", - "146 False\n", - "147 False\n", - "148 False\n", - "149 False\n", - "Name: sepal_length, Length: 150, dtype: bool" - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "iris_df.sepal_length >= 7.5" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "id": "059d604e", + "execution_count": 50, + "id": "13358267", "metadata": {}, "outputs": [ { @@ -2187,94 +2459,131 @@ " sepal_width\n", " petal_length\n", " petal_width\n", - " species\n", - " \n", - " \n", - " obs_id\n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", - " 105\n", - " 7.6\n", + " 0\n", + " 5.1\n", + " 3.5\n", + " 1.4\n", + " 0.2\n", + " \n", + " \n", + " 1\n", + " 4.9\n", " 3.0\n", - " 6.6\n", - " 2.1\n", - " virginica\n", + " 1.4\n", + " 0.2\n", " \n", " \n", - " 117\n", - " 7.7\n", - " 3.8\n", - " 6.7\n", - " 2.2\n", - " virginica\n", + " 2\n", + " 4.7\n", + " 3.2\n", + " 1.3\n", + " 0.2\n", " \n", " \n", - " 118\n", - " 7.7\n", - " 2.6\n", - " 6.9\n", - " 2.3\n", - " virginica\n", + " 3\n", + " 4.6\n", + " 3.1\n", + " 1.5\n", + " 0.2\n", " \n", " \n", - " 122\n", - " 7.7\n", - " 2.8\n", + " 4\n", + " 5.0\n", + " 3.6\n", + " 1.4\n", + " 0.2\n", + " \n", + " \n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 145\n", " 6.7\n", - " 2.0\n", - " virginica\n", + " 3.0\n", + " 5.2\n", + " 2.3\n", " \n", " \n", - " 131\n", - " 7.9\n", - " 3.8\n", - " 6.4\n", - " 2.0\n", - " virginica\n", + " 146\n", + " 6.3\n", + " 2.5\n", + " 5.0\n", + " 1.9\n", " \n", " \n", - " 135\n", - " 7.7\n", + " 147\n", + " 6.5\n", " 3.0\n", - " 6.1\n", + " 5.2\n", + " 2.0\n", + " \n", + " \n", + " 148\n", + " 6.2\n", + " 3.4\n", + " 5.4\n", " 2.3\n", - " virginica\n", + " \n", + " \n", + " 149\n", + " 5.9\n", + " 3.0\n", + " 5.1\n", + " 1.8\n", " \n", " \n", "\n", + "

150 rows × 4 columns

\n", "" ], "text/plain": [ - " sepal_length sepal_width petal_length petal_width species\n", - "obs_id \n", - "105 7.6 3.0 6.6 2.1 virginica\n", - "117 7.7 3.8 6.7 2.2 virginica\n", - "118 7.7 2.6 6.9 2.3 virginica\n", - "122 7.7 2.8 6.7 2.0 virginica\n", - "131 7.9 3.8 6.4 2.0 virginica\n", - "135 7.7 3.0 6.1 2.3 virginica" + " sepal_length sepal_width petal_length petal_width\n", + "0 5.1 3.5 1.4 0.2\n", + "1 4.9 3.0 1.4 0.2\n", + "2 4.7 3.2 1.3 0.2\n", + "3 4.6 3.1 1.5 0.2\n", + "4 5.0 3.6 1.4 0.2\n", + ".. ... ... ... ...\n", + "145 6.7 3.0 5.2 2.3\n", + "146 6.3 2.5 5.0 1.9\n", + "147 6.5 3.0 5.2 2.0\n", + "148 6.2 3.4 5.4 2.3\n", + "149 5.9 3.0 5.1 1.8\n", + "\n", + "[150 rows x 4 columns]" ] }, - "execution_count": 76, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "iris_df.loc[iris_df.sepal_length >= 7.5,:]" + "# Here we drop columns\n", + "iris_df_drop = iris_df_drop.drop(['sepal_volume', 'species'], axis=1)\n", + "iris_df_drop" + ] + }, + { + "cell_type": "markdown", + "id": "b7a618f1-5c11-49dc-855f-98df36f39267", + "metadata": {}, + "source": [ + "Note that with this `drop()` method you can also drop specific observations by setting `axis`=0" ] }, { "cell_type": "code", - "execution_count": 78, - "id": "b922f38e", + "execution_count": 51, + "id": "cf8015ca-b92e-40b5-9058-109c0fcff6d9", "metadata": {}, "outputs": [ { @@ -2302,25 +2611,22 @@ " sepal_width\n", " petal_length\n", " petal_width\n", - " species\n", - " \n", - " \n", - " obs_id\n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", + " 1\n", + " 4.9\n", + " 3.0\n", + " 1.4\n", + " 0.2\n", + " \n", + " \n", " 2\n", " 4.7\n", " 3.2\n", " 1.3\n", " 0.2\n", - " setosa\n", " \n", " \n", " 3\n", @@ -2328,242 +2634,278 @@ " 3.1\n", " 1.5\n", " 0.2\n", - " setosa\n", " \n", " \n", - " 6\n", - " 4.6\n", - " 3.4\n", + " 4\n", + " 5.0\n", + " 3.6\n", " 1.4\n", - " 0.3\n", - " setosa\n", + " 0.2\n", " \n", " \n", - " 22\n", - " 4.6\n", - " 3.6\n", - " 1.0\n", - " 0.2\n", - " setosa\n", + " 5\n", + " 5.4\n", + " 3.9\n", + " 1.7\n", + " 0.4\n", " \n", " \n", - " 29\n", - " 4.7\n", - " 3.2\n", - " 1.6\n", - " 0.2\n", - " setosa\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", " \n", " \n", - " 41\n", - " 4.5\n", + " 145\n", + " 6.7\n", + " 3.0\n", + " 5.2\n", " 2.3\n", - " 1.3\n", - " 0.3\n", - " setosa\n", " \n", " \n", - " 47\n", - " 4.6\n", - " 3.2\n", - " 1.4\n", - " 0.2\n", - " setosa\n", + " 146\n", + " 6.3\n", + " 2.5\n", + " 5.0\n", + " 1.9\n", + " \n", + " \n", + " 147\n", + " 6.5\n", + " 3.0\n", + " 5.2\n", + " 2.0\n", + " \n", + " \n", + " 148\n", + " 6.2\n", + " 3.4\n", + " 5.4\n", + " 2.3\n", + " \n", + " \n", + " 149\n", + " 5.9\n", + " 3.0\n", + " 5.1\n", + " 1.8\n", " \n", " \n", "\n", + "

149 rows × 4 columns

\n", "" ], "text/plain": [ - " sepal_length sepal_width petal_length petal_width species\n", - "obs_id \n", - "2 4.7 3.2 1.3 0.2 setosa\n", - "3 4.6 3.1 1.5 0.2 setosa\n", - "6 4.6 3.4 1.4 0.3 setosa\n", - "22 4.6 3.6 1.0 0.2 setosa\n", - "29 4.7 3.2 1.6 0.2 setosa\n", - "41 4.5 2.3 1.3 0.3 setosa\n", - "47 4.6 3.2 1.4 0.2 setosa" + " sepal_length sepal_width petal_length petal_width\n", + "1 4.9 3.0 1.4 0.2\n", + "2 4.7 3.2 1.3 0.2\n", + "3 4.6 3.1 1.5 0.2\n", + "4 5.0 3.6 1.4 0.2\n", + "5 5.4 3.9 1.7 0.4\n", + ".. ... ... ... ...\n", + "145 6.7 3.0 5.2 2.3\n", + "146 6.3 2.5 5.0 1.9\n", + "147 6.5 3.0 5.2 2.0\n", + "148 6.2 3.4 5.4 2.3\n", + "149 5.9 3.0 5.1 1.8\n", + "\n", + "[149 rows x 4 columns]" ] }, - "execution_count": 78, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "iris_df.loc[(iris_df['sepal_length' ]>= 4.5) & (iris_df['sepal_length'] <= 4.7),:]" + "# Now a particular observation\n", + "iris_df_drop = iris_df_drop.drop([0], axis=0)\n", + "iris_df_drop" ] }, { "cell_type": "markdown", - "id": "3a57da01", + "id": "921f8ed0-44b2-450c-a379-4e22372b1279", "metadata": {}, "source": [ - "## Masking" + "## Working with the dataframe as a whole" ] }, { "cell_type": "markdown", - "id": "94fe5317", + "id": "9ec3e70b-915e-4616-a680-e9e1c34cd736", "metadata": {}, "source": [ - "Here's an example of **masking** using boolean conditions passed to the dataframe selector:" + "### `iloc[]`: Selection by index" ] }, { "cell_type": "markdown", - "id": "7583a8af", + "id": "cc454247", "metadata": {}, "source": [ - "Here are the **values** for the feature `sepal length`:" + "We can use `iloc[]` to extract rows and columns using **indexes**. " ] }, { "cell_type": "code", - "execution_count": 81, - "id": "db8c53c0", + "execution_count": 52, + "id": "98a9ae6e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([5.1, 4.9, 4.7, 4.6, 5. , 5.4, 4.6, 5. , 4.4, 4.9, 5.4, 4.8, 4.8,\n", - " 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5. ,\n", - " 5. , 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5. , 5.5, 4.9, 4.4,\n", - " 5.1, 5. , 4.5, 4.4, 5. , 5.1, 4.8, 5.1, 4.6, 5.3, 5. , 7. , 6.4,\n", - " 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5. , 5.9, 6. , 6.1, 5.6,\n", - " 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7,\n", - " 6. , 5.7, 5.5, 5.5, 5.8, 6. , 5.4, 6. , 6.7, 6.3, 5.6, 5.5, 5.5,\n", - " 6.1, 5.8, 5. , 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3,\n", - " 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5,\n", - " 7.7, 7.7, 6. , 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2,\n", - " 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6. , 6.9, 6.7, 6.9, 5.8,\n", - " 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9])" + "sepal_length 4.7\n", + "sepal_width 3.2\n", + "petal_length 1.3\n", + "petal_width 0.2\n", + "species setosa\n", + "sepal_volume 15.04\n", + "sepal_volume_2 15.04\n", + "Name: 2, dtype: object" ] }, - "execution_count": 81, + "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "iris_df.sepal_length.values" - ] - }, - { - "cell_type": "markdown", - "id": "0f17cc32", - "metadata": {}, - "source": [ - "And here are **the boolean values** generated by applying a comparison operator to those values:" + "# This fetches row 3, and all columns:\n", + "iris_df.iloc[2]" ] }, { "cell_type": "code", - "execution_count": 82, - "id": "70b43e0e", - "metadata": {}, - "outputs": [], - "source": [ - "mask = iris_df.sepal_length >= 7.5" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "id": "3c50ab61", + "execution_count": 53, + "id": "014d872a-dfbd-403f-a5a6-a22db9482075", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([False, False, False, False, False, False, False, False, False,\n", - " False, False, False, False, False, False, False, False, False,\n", - " False, False, False, False, False, False, False, False, False,\n", - " False, False, False, False, False, False, False, False, False,\n", - " False, False, False, False, False, False, False, False, False,\n", - " False, False, False, False, False, False, False, False, False,\n", - " False, False, False, False, False, False, False, False, False,\n", - " False, False, False, False, False, False, False, False, False,\n", - " False, False, False, False, False, False, False, False, False,\n", - " False, False, False, False, False, False, False, False, False,\n", - " False, False, False, False, False, False, False, False, False,\n", - " False, False, False, False, False, False, True, False, False,\n", - " False, False, False, False, False, False, False, False, False,\n", - " True, True, False, False, False, True, False, False, False,\n", - " False, False, False, False, False, True, False, False, False,\n", - " True, False, False, False, False, False, False, False, False,\n", - " False, False, False, False, False, False])" + "sepal_length 4.7\n", + "sepal_width 3.2\n", + "petal_length 1.3\n", + "petal_width 0.2\n", + "species setosa\n", + "sepal_volume 15.04\n", + "sepal_volume_2 15.04\n", + "Name: 2, dtype: object" ] }, - "execution_count": 83, + "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "mask.values" + "# Similar to\n", + "iris_df.iloc[2, :]" ] }, { "cell_type": "markdown", - "id": "8f6bb3e6", + "id": "a82a9f45", "metadata": {}, "source": [ - "The two sets of values have the same shape.\n", - "\n", - "We can now overlay the logical values over the numeric ones and keep only what is `True`:" + "fetch rows with indices 1,2 (the right endpoint is exclusive), and all columns." ] }, { "cell_type": "code", - "execution_count": 84, - "id": "123042dd", + "execution_count": 55, + "id": "c5c45d06", "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspeciessepal_volumesepal_volume_2
14.93.01.40.2setosa14.7014.70
24.73.21.30.2setosa15.0415.04
\n", + "
" + ], "text/plain": [ - "array([7.6, 7.7, 7.7, 7.7, 7.9, 7.7])" + " sepal_length sepal_width petal_length petal_width species sepal_volume \\\n", + "1 4.9 3.0 1.4 0.2 setosa 14.70 \n", + "2 4.7 3.2 1.3 0.2 setosa 15.04 \n", + "\n", + " sepal_volume_2 \n", + "1 14.70 \n", + "2 15.04 " ] }, - "execution_count": 84, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "iris_df.sepal_length[mask].values" - ] - }, - { - "cell_type": "markdown", - "id": "3eaf05e6-c9f9-4472-ad1a-25f1ff859049", - "metadata": {}, - "source": [ - "## Sorting and Ranking" + "iris_df.iloc[1:3]" ] }, { "cell_type": "markdown", - "id": "f4c0a086-13be-447a-9da4-13f1d4b72fdd", + "id": "3bc78532", "metadata": {}, "source": [ - "**`.sort_values()`**\n", - "\n", - "Sort by values\n", - "- `by` parameter takes string or list of strings\n", - "- `ascending` takes True or False\n", - "- `inplace` will save sorted values into the df\n", - "\n", - "[Details](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_values.html)" + "fetch rows with indices 1,2 and first three columns (positions 0, 1, 2)" ] }, { "cell_type": "code", - "execution_count": 33, - "id": "db23981b-4838-46cc-af21-fbe1fb354e3d", + "execution_count": 56, + "id": "408ba901", "metadata": {}, "outputs": [ { @@ -2590,144 +2932,100 @@ " sepal_length\n", " sepal_width\n", " petal_length\n", - " petal_width\n", - " species\n", " \n", " \n", " \n", " \n", - " 13\n", - " 4.3\n", + " 1\n", + " 4.9\n", " 3.0\n", - " 1.1\n", - " 0.1\n", - " setosa\n", - " \n", - " \n", - " 8\n", - " 4.4\n", - " 2.9\n", " 1.4\n", - " 0.2\n", - " setosa\n", - " \n", - " \n", - " 38\n", - " 4.4\n", - " 3.0\n", - " 1.3\n", - " 0.2\n", - " setosa\n", " \n", " \n", - " 42\n", - " 4.4\n", + " 2\n", + " 4.7\n", " 3.2\n", " 1.3\n", - " 0.2\n", - " setosa\n", - " \n", - " \n", - " 41\n", - " 4.5\n", - " 2.3\n", - " 1.3\n", - " 0.3\n", - " setosa\n", - " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 122\n", - " 7.7\n", - " 2.8\n", - " 6.7\n", - " 2.0\n", - " virginica\n", - " \n", - " \n", - " 117\n", - " 7.7\n", - " 3.8\n", - " 6.7\n", - " 2.2\n", - " virginica\n", - " \n", - " \n", - " 118\n", - " 7.7\n", - " 2.6\n", - " 6.9\n", - " 2.3\n", - " virginica\n", - " \n", - " \n", - " 135\n", - " 7.7\n", - " 3.0\n", - " 6.1\n", - " 2.3\n", - " virginica\n", - " \n", - " \n", - " 131\n", - " 7.9\n", - " 3.8\n", - " 6.4\n", - " 2.0\n", - " virginica\n", " \n", " \n", "\n", - "

150 rows × 5 columns

\n", "" ], "text/plain": [ - " sepal_length sepal_width petal_length petal_width species\n", - "13 4.3 3.0 1.1 0.1 setosa\n", - "8 4.4 2.9 1.4 0.2 setosa\n", - "38 4.4 3.0 1.3 0.2 setosa\n", - "42 4.4 3.2 1.3 0.2 setosa\n", - "41 4.5 2.3 1.3 0.3 setosa\n", - ".. ... ... ... ... ...\n", - "122 7.7 2.8 6.7 2.0 virginica\n", - "117 7.7 3.8 6.7 2.2 virginica\n", - "118 7.7 2.6 6.9 2.3 virginica\n", - "135 7.7 3.0 6.1 2.3 virginica\n", - "131 7.9 3.8 6.4 2.0 virginica\n", - "\n", - "[150 rows x 5 columns]" + " sepal_length sepal_width petal_length\n", + "1 4.9 3.0 1.4\n", + "2 4.7 3.2 1.3" ] }, - "execution_count": 33, + "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "iris_df.sort_values(by=['sepal_length','petal_width'])" + "iris_df.iloc[1:3, 0:3]" ] }, { "cell_type": "markdown", - "id": "5590f9d3-bbf1-4c45-84b6-c2b5c288716f", + "id": "46975617", + "metadata": {}, + "source": [ + "You can apply slices to column names too. You don't need `.iloc[]` here." + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "5056b057", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['sepal_length', 'sepal_width', 'petal_length'], dtype='object')" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.columns[0:3]" + ] + }, + { + "cell_type": "markdown", + "id": "ccfdea8c-d7ff-48c6-89a4-a75bd16f88c8", + "metadata": {}, + "source": [ + "### `loc[]`: Selection by label" + ] + }, + { + "cell_type": "markdown", + "id": "f9be9788", + "metadata": {}, + "source": [ + "We can select by row and column labels using `.loc[]`. " + ] + }, + { + "cell_type": "markdown", + "id": "d84fe9ab", "metadata": {}, "source": [ - "## `.sort_index()`\n", + "Here we ask for rows with labels (indexes) 1-3, and it gives exactly that \n", + "`.iloc[]` returned rows with indices 1,2.\n", "\n", - "Sort by index. Example sorts by descending index" + "**Author note: This is by far the more useful of the two in my experience.**" ] }, { "cell_type": "code", - "execution_count": 34, - "id": "8aa29fc1-0ef8-40dd-8829-81395b15f6a0", + "execution_count": 58, + "id": "cd1a1a13", "metadata": {}, "outputs": [ { @@ -2756,72 +3054,20 @@ " petal_length\n", " petal_width\n", " species\n", + " sepal_volume\n", + " sepal_volume_2\n", " \n", " \n", " \n", " \n", - " 149\n", - " 5.9\n", + " 1\n", + " 4.9\n", " 3.0\n", - " 5.1\n", - " 1.8\n", - " virginica\n", - " \n", - " \n", - " 148\n", - " 6.2\n", - " 3.4\n", - " 5.4\n", - " 2.3\n", - " virginica\n", - " \n", - " \n", - " 147\n", - " 6.5\n", - " 3.0\n", - " 5.2\n", - " 2.0\n", - " virginica\n", - " \n", - " \n", - " 146\n", - " 6.3\n", - " 2.5\n", - " 5.0\n", - " 1.9\n", - " virginica\n", - " \n", - " \n", - " 145\n", - " 6.7\n", - " 3.0\n", - " 5.2\n", - " 2.3\n", - " virginica\n", - " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 4\n", - " 5.0\n", - " 3.6\n", " 1.4\n", " 0.2\n", " setosa\n", - " \n", - " \n", - " 3\n", - " 4.6\n", - " 3.1\n", - " 1.5\n", - " 0.2\n", - " setosa\n", + " 14.70\n", + " 14.70\n", " \n", " \n", " 2\n", @@ -2830,91 +3076,56 @@ " 1.3\n", " 0.2\n", " setosa\n", + " 15.04\n", + " 15.04\n", " \n", " \n", - " 1\n", - " 4.9\n", - " 3.0\n", - " 1.4\n", - " 0.2\n", - " setosa\n", - " \n", - " \n", - " 0\n", - " 5.1\n", - " 3.5\n", - " 1.4\n", + " 3\n", + " 4.6\n", + " 3.1\n", + " 1.5\n", " 0.2\n", " setosa\n", + " 14.26\n", + " 14.26\n", " \n", " \n", "\n", - "

150 rows × 5 columns

\n", "" ], "text/plain": [ - " sepal_length sepal_width petal_length petal_width species\n", - "149 5.9 3.0 5.1 1.8 virginica\n", - "148 6.2 3.4 5.4 2.3 virginica\n", - "147 6.5 3.0 5.2 2.0 virginica\n", - "146 6.3 2.5 5.0 1.9 virginica\n", - "145 6.7 3.0 5.2 2.3 virginica\n", - ".. ... ... ... ... ...\n", - "4 5.0 3.6 1.4 0.2 setosa\n", - "3 4.6 3.1 1.5 0.2 setosa\n", - "2 4.7 3.2 1.3 0.2 setosa\n", - "1 4.9 3.0 1.4 0.2 setosa\n", - "0 5.1 3.5 1.4 0.2 setosa\n", + " sepal_length sepal_width petal_length petal_width species sepal_volume \\\n", + "1 4.9 3.0 1.4 0.2 setosa 14.70 \n", + "2 4.7 3.2 1.3 0.2 setosa 15.04 \n", + "3 4.6 3.1 1.5 0.2 setosa 14.26 \n", "\n", - "[150 rows x 5 columns]" + " sepal_volume_2 \n", + "1 14.70 \n", + "2 15.04 \n", + "3 14.26 " ] }, - "execution_count": 34, + "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "iris_df.sort_index(axis=0, ascending=False)\n" + "iris_df.loc[1:3]" ] }, { "cell_type": "markdown", - "id": "db062841", - "metadata": {}, - "source": [ - "## Dealing with Missing Data\n", - "\n", - "Pandas primarily uses the data type `np.nan` from NumPy to represent missing data." - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "id": "2ae69551", - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "88d0b1c6", + "id": "5ccd9d19", "metadata": {}, - "outputs": [], "source": [ - "df_miss = pd.DataFrame({\n", - " 'x':[2, np.nan, 1], \n", - " 'y':[np.nan, np.nan, 6]}\n", - ")" + "Subset on columns with column name (as a string) or list of strings" ] }, { "cell_type": "code", - "execution_count": 55, - "id": "8404fdeb", + "execution_count": 59, + "id": "332696cc", "metadata": {}, "outputs": [ { @@ -2938,62 +3149,58 @@ " \n", " \n", " \n", - " x\n", - " y\n", + " sepal_length\n", + " petal_width\n", " \n", " \n", " \n", " \n", - " 0\n", - " 2.0\n", - " NaN\n", - " \n", - " \n", " 1\n", - " NaN\n", - " NaN\n", + " 4.9\n", + " 0.2\n", " \n", " \n", " 2\n", - " 1.0\n", - " 6.0\n", + " 4.7\n", + " 0.2\n", + " \n", + " \n", + " 3\n", + " 4.6\n", + " 0.2\n", " \n", " \n", "\n", "" ], "text/plain": [ - " x y\n", - "0 2.0 NaN\n", - "1 NaN NaN\n", - "2 1.0 6.0" + " sepal_length petal_width\n", + "1 4.9 0.2\n", + "2 4.7 0.2\n", + "3 4.6 0.2" ] }, - "execution_count": 55, + "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df_miss" + "iris_df.loc[1:3, ['sepal_length','petal_width']]" ] }, { "cell_type": "markdown", - "id": "565b8fa8", + "id": "10439dcc", "metadata": {}, "source": [ - "## `.dropna()` \n", - "\n", - "This will drop all rows with missing data in any column.\n", - "\n", - "[Details](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dropna.html)" + "Select all rows, specific columns" ] }, { "cell_type": "code", - "execution_count": 56, - "id": "0f90aff6", + "execution_count": 60, + "id": "e322dddf", "metadata": {}, "outputs": [ { @@ -3017,133 +3224,273 @@ " \n", " \n", " \n", - " x\n", - " y\n", + " sepal_length\n", + " petal_width\n", " \n", " \n", " \n", " \n", + " 0\n", + " 5.1\n", + " 0.2\n", + " \n", + " \n", + " 1\n", + " 4.9\n", + " 0.2\n", + " \n", + " \n", " 2\n", - " 1.0\n", - " 6.0\n", + " 4.7\n", + " 0.2\n", + " \n", + " \n", + " 3\n", + " 4.6\n", + " 0.2\n", + " \n", + " \n", + " 4\n", + " 5.0\n", + " 0.2\n", + " \n", + " \n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 145\n", + " 6.7\n", + " 2.3\n", + " \n", + " \n", + " 146\n", + " 6.3\n", + " 1.9\n", + " \n", + " \n", + " 147\n", + " 6.5\n", + " 2.0\n", + " \n", + " \n", + " 148\n", + " 6.2\n", + " 2.3\n", + " \n", + " \n", + " 149\n", + " 5.9\n", + " 1.8\n", " \n", " \n", "\n", + "

150 rows × 2 columns

\n", "" ], "text/plain": [ - " x y\n", - "2 1.0 6.0" + " sepal_length petal_width\n", + "0 5.1 0.2\n", + "1 4.9 0.2\n", + "2 4.7 0.2\n", + "3 4.6 0.2\n", + "4 5.0 0.2\n", + ".. ... ...\n", + "145 6.7 2.3\n", + "146 6.3 1.9\n", + "147 6.5 2.0\n", + "148 6.2 2.3\n", + "149 5.9 1.8\n", + "\n", + "[150 rows x 2 columns]" ] }, - "execution_count": 56, + "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df_drop_all = df_miss.dropna()\n", - "df_drop_all" + "iris_df.loc[:, ['sepal_length','petal_width']]" ] }, { "cell_type": "markdown", - "id": "190e3c8d", + "id": "485059f0", "metadata": {}, "source": [ - "The `subset` parameter takes a list of column names to specify which columns should have missing values." + "### Boolean Filtering\n", + "\n", + "It's very common to subset a dataframe based on some condition on the data.\n", + "\n", + "Pandas knows what to do if you pass a boolean structure." ] }, { "cell_type": "code", - "execution_count": 57, - "id": "ba5ad471", + "execution_count": 63, + "id": "ef3d5652", "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", + "text/plain": [ + "0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "4 False\n", + " ... \n", + "145 False\n", + "146 False\n", + "147 False\n", + "148 False\n", + "149 False\n", + "Name: sepal_length, Length: 150, dtype: bool" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.sepal_length >= 7.5" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "059d604e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "
\n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
xysepal_lengthsepal_widthpetal_lengthpetal_widthspeciessepal_volumesepal_volume_2
01057.63.06.62.1virginica22.8022.80
1177.73.86.72.2virginica29.2629.26
1187.72.66.92.3virginica20.0220.02
1227.72.86.72.0NaNvirginica21.5621.56
21.06.01317.93.86.42.0virginica30.0230.02
1357.73.06.12.3virginica23.1023.10
\n", "
" ], "text/plain": [ - " x y\n", - "0 2.0 NaN\n", - "2 1.0 6.0" + " sepal_length sepal_width petal_length petal_width species \\\n", + "105 7.6 3.0 6.6 2.1 virginica \n", + "117 7.7 3.8 6.7 2.2 virginica \n", + "118 7.7 2.6 6.9 2.3 virginica \n", + "122 7.7 2.8 6.7 2.0 virginica \n", + "131 7.9 3.8 6.4 2.0 virginica \n", + "135 7.7 3.0 6.1 2.3 virginica \n", + "\n", + " sepal_volume sepal_volume_2 \n", + "105 22.80 22.80 \n", + "117 29.26 29.26 \n", + "118 20.02 20.02 \n", + "122 21.56 21.56 \n", + "131 30.02 30.02 \n", + "135 23.10 23.10 " ] }, - "execution_count": 57, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df_drop_x = df_miss.dropna(subset=['x'])\n", - "df_drop_x" - ] - }, - { - "cell_type": "markdown", - "id": "c7efa14a", - "metadata": {}, - "source": [ - "## `.fillna()`\n", - "\n", - "This will replace missing values with whatever you set it to, e.g. $0$s.\n", - "\n", - "[Details](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.fillna.html)\n", - "\n", - "We can pass the results of an operation -- for example to peform simple imputation, we can replace missing values in each column with the median value of the respective column:" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "id": "c697c8f4", - "metadata": {}, - "outputs": [], - "source": [ - "df_filled = df_miss.fillna(df_miss.median())" + "iris_df.loc[iris_df.sepal_length >= 7.5,:]" ] }, { "cell_type": "code", - "execution_count": 59, - "id": "cc10a2b7", + "execution_count": 65, + "id": "b922f38e", "metadata": {}, "outputs": [ { @@ -3167,273 +3514,478 @@ " \n", " \n", " \n", - " x\n", - " y\n", + " sepal_length\n", + " sepal_width\n", + " petal_length\n", + " petal_width\n", + " species\n", + " sepal_volume\n", + " sepal_volume_2\n", " \n", " \n", " \n", " \n", - " 0\n", - " 2.0\n", - " 6.0\n", + " 2\n", + " 4.7\n", + " 3.2\n", + " 1.3\n", + " 0.2\n", + " setosa\n", + " 15.04\n", + " 15.04\n", " \n", " \n", - " 1\n", + " 3\n", + " 4.6\n", + " 3.1\n", " 1.5\n", - " 6.0\n", + " 0.2\n", + " setosa\n", + " 14.26\n", + " 14.26\n", " \n", " \n", - " 2\n", + " 6\n", + " 4.6\n", + " 3.4\n", + " 1.4\n", + " 0.3\n", + " setosa\n", + " 15.64\n", + " 15.64\n", + " \n", + " \n", + " 22\n", + " 4.6\n", + " 3.6\n", " 1.0\n", - " 6.0\n", + " 0.2\n", + " setosa\n", + " 16.56\n", + " 16.56\n", + " \n", + " \n", + " 29\n", + " 4.7\n", + " 3.2\n", + " 1.6\n", + " 0.2\n", + " setosa\n", + " 15.04\n", + " 15.04\n", + " \n", + " \n", + " 41\n", + " 4.5\n", + " 2.3\n", + " 1.3\n", + " 0.3\n", + " setosa\n", + " 10.35\n", + " 10.35\n", + " \n", + " \n", + " 47\n", + " 4.6\n", + " 3.2\n", + " 1.4\n", + " 0.2\n", + " setosa\n", + " 14.72\n", + " 14.72\n", " \n", " \n", "\n", "" ], "text/plain": [ - " x y\n", - "0 2.0 6.0\n", - "1 1.5 6.0\n", - "2 1.0 6.0" + " sepal_length sepal_width petal_length petal_width species \\\n", + "2 4.7 3.2 1.3 0.2 setosa \n", + "3 4.6 3.1 1.5 0.2 setosa \n", + "6 4.6 3.4 1.4 0.3 setosa \n", + "22 4.6 3.6 1.0 0.2 setosa \n", + "29 4.7 3.2 1.6 0.2 setosa \n", + "41 4.5 2.3 1.3 0.3 setosa \n", + "47 4.6 3.2 1.4 0.2 setosa \n", + "\n", + " sepal_volume sepal_volume_2 \n", + "2 15.04 15.04 \n", + "3 14.26 14.26 \n", + "6 15.64 15.64 \n", + "22 16.56 16.56 \n", + "29 15.04 15.04 \n", + "41 10.35 10.35 \n", + "47 14.72 14.72 " ] }, - "execution_count": 59, + "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df_filled" + "iris_df.loc[(iris_df['sepal_length' ]>= 4.5) & (iris_df['sepal_length'] <= 4.7),:]" ] }, { "cell_type": "markdown", - "id": "4e04c613", + "id": "3a57da01", "metadata": {}, "source": [ - "## Column selection, addition, deletion\n", - "\n", - "### Selection\n", - "\n", - "Use bracket notation or dot notation. \n", - "- bracket notation: variable name must be a string" + "## Masking" ] }, { - "cell_type": "code", - "execution_count": 142, - "id": "157649ef", + "cell_type": "markdown", + "id": "94fe5317", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(0 1\n", - " 1 1\n", - " obs3 0\n", - " 3 0\n", - " Name: y, dtype: int64,\n", - " pandas.core.series.Series)" - ] - }, - "execution_count": 142, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "df['y'], type(df['y'])" + "Here's an example of **masking** using boolean conditions passed to the dataframe selector:" ] }, { "cell_type": "markdown", - "id": "39b9c648", + "id": "7583a8af", "metadata": {}, "source": [ - "- As an object attribute" + "Here are the **values** for the feature `sepal length`:" ] }, { "cell_type": "code", - "execution_count": 143, - "id": "9790c7a9", + "execution_count": 66, + "id": "db8c53c0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(0 1\n", - " 1 1\n", - " obs3 0\n", - " 3 0\n", - " Name: y, dtype: int64,\n", - " pandas.core.series.Series)" + "array([5.1, 4.9, 4.7, 4.6, 5. , 5.4, 4.6, 5. , 4.4, 4.9, 5.4, 4.8, 4.8,\n", + " 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5. ,\n", + " 5. , 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5. , 5.5, 4.9, 4.4,\n", + " 5.1, 5. , 4.5, 4.4, 5. , 5.1, 4.8, 5.1, 4.6, 5.3, 5. , 7. , 6.4,\n", + " 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5. , 5.9, 6. , 6.1, 5.6,\n", + " 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7,\n", + " 6. , 5.7, 5.5, 5.5, 5.8, 6. , 5.4, 6. , 6.7, 6.3, 5.6, 5.5, 5.5,\n", + " 6.1, 5.8, 5. , 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3,\n", + " 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5,\n", + " 7.7, 7.7, 6. , 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2,\n", + " 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6. , 6.9, 6.7, 6.9, 5.8,\n", + " 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9])" ] }, - "execution_count": 143, + "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df.y, type(df.y)" + "iris_df.sepal_length.values" ] }, { "cell_type": "markdown", - "id": "e8767d21", + "id": "0f17cc32", "metadata": {}, "source": [ - "Dot notation is very convenient, since as object attributes they can be tab-completed in various editing environments.\n", - "\n", - "But:\n", - "- It only works if the column names are not reserved words\n", - "- It can't be used when created a new column (see below)" + "And here are **the boolean values** generated by applying a comparison operator to those values:" ] }, { - "cell_type": "markdown", - "id": "40a660ef", + "cell_type": "code", + "execution_count": 67, + "id": "70b43e0e", "metadata": {}, + "outputs": [], "source": [ - "As we can see, the selected columns are series, so its properties and features apply to both of them:" + "mask = iris_df.sepal_length >= 7.5" ] }, { "cell_type": "code", - "execution_count": 144, - "id": "bed23420-33c8-4114-bb63-390c878c1dee", + "execution_count": 68, + "id": "3c50ab61", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(1, 1)" + "array([False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, True, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " True, True, False, False, False, True, False, False, False,\n", + " False, False, False, False, False, True, False, False, False,\n", + " True, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False])" ] }, - "execution_count": 144, + "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# indexing\n", - "df.y.values[0], df['y'][0]" + "mask.values" ] }, { - "cell_type": "code", - "execution_count": 145, - "id": "20529c40", + "cell_type": "markdown", + "id": "8f6bb3e6", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(array([1, 1, 0, 0]), array([1, 1, 0, 0]))" - ] - }, - "execution_count": 145, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "# Accessing values attribute\n", - "df.y.values, df['y'].values" + "The two sets of values have the same shape.\n", + "\n", + "We can now overlay the logical values over the numeric ones and keep only what is `True`:" ] }, { "cell_type": "code", - "execution_count": 146, - "id": "49e07b5f", + "execution_count": 69, + "id": "123042dd", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(0.5, 0.5)" + "array([7.6, 7.7, 7.7, 7.7, 7.9, 7.7])" ] }, - "execution_count": 146, + "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Taking the mean\n", - "df.y.mean(), df['y'].mean()" + "iris_df.sepal_length[mask].values" ] }, { "cell_type": "markdown", - "id": "b2dedf62", + "id": "3eaf05e6-c9f9-4472-ad1a-25f1ff859049", "metadata": {}, "source": [ - "### Column Selection\n", - "\n", - "You select columns from a dataframe by passing a value or list (or any expression that evaluates to a list)." + "## Sorting" ] }, { - "cell_type": "code", - "execution_count": 147, - "id": "8c9aa654", + "cell_type": "markdown", + "id": "f4c0a086-13be-447a-9da4-13f1d4b72fdd", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(0 0\n", - " 1 2\n", - " obs3 1\n", - " 3 5\n", - " Name: x, dtype: int64,\n", - " pandas.core.series.Series)" - ] - }, - "execution_count": 147, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "# single bracket gives you a series\n", - "df['x'], type(df['x'])" + "- `sort_values()`: Sorts dataframe by values. You can customize this sorting with the following parameters:\n", + " \n", + " - `by` parameter takes string or list of strings\n", + " - `ascending` takes True or False\n", + " - `inplace` will save sorted values into the df\n", + "\n", + "[More details](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_values.html)" ] }, { "cell_type": "code", - "execution_count": 148, - "id": "6cc6cf62", + "execution_count": 70, + "id": "db23981b-4838-46cc-af21-fbe1fb354e3d", "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspeciessepal_volumesepal_volume_2
134.33.01.10.1setosa12.9012.90
84.42.91.40.2setosa12.7612.76
384.43.01.30.2setosa13.2013.20
424.43.21.30.2setosa14.0814.08
414.52.31.30.3setosa10.3510.35
........................
1227.72.86.72.0virginica21.5621.56
1177.73.86.72.2virginica29.2629.26
1187.72.66.92.3virginica20.0220.02
1357.73.06.12.3virginica23.1023.10
1317.93.86.42.0virginica30.0230.02
\n", + "

150 rows × 7 columns

\n", + "
" + ], "text/plain": [ - "( x\n", - " 0 0\n", - " 1 2\n", - " obs3 1\n", - " 3 5,\n", - " pandas.core.frame.DataFrame)" + " sepal_length sepal_width petal_length petal_width species \\\n", + "13 4.3 3.0 1.1 0.1 setosa \n", + "8 4.4 2.9 1.4 0.2 setosa \n", + "38 4.4 3.0 1.3 0.2 setosa \n", + "42 4.4 3.2 1.3 0.2 setosa \n", + "41 4.5 2.3 1.3 0.3 setosa \n", + ".. ... ... ... ... ... \n", + "122 7.7 2.8 6.7 2.0 virginica \n", + "117 7.7 3.8 6.7 2.2 virginica \n", + "118 7.7 2.6 6.9 2.3 virginica \n", + "135 7.7 3.0 6.1 2.3 virginica \n", + "131 7.9 3.8 6.4 2.0 virginica \n", + "\n", + " sepal_volume sepal_volume_2 \n", + "13 12.90 12.90 \n", + "8 12.76 12.76 \n", + "38 13.20 13.20 \n", + "42 14.08 14.08 \n", + "41 10.35 10.35 \n", + ".. ... ... \n", + "122 21.56 21.56 \n", + "117 29.26 29.26 \n", + "118 20.02 20.02 \n", + "135 23.10 23.10 \n", + "131 30.02 30.02 \n", + "\n", + "[150 rows x 7 columns]" ] }, - "execution_count": 148, + "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# double bracket gives you the selected column as new dataframe\n", - "df[['x']], type(df[['x']])" + "iris_df.sort_values(by=['sepal_length','petal_width'])" ] }, { "cell_type": "code", - "execution_count": 149, - "id": "8aa7d7d8", + "execution_count": 71, + "id": "75d63a9e-b466-458f-add2-5fa2f38abe54", "metadata": {}, "outputs": [ { @@ -3457,78 +4009,188 @@ " \n", " \n", " \n", - " y\n", - " x\n", + " sepal_length\n", + " sepal_width\n", + " petal_length\n", + " petal_width\n", + " species\n", + " sepal_volume\n", + " sepal_volume_2\n", " \n", " \n", " \n", " \n", - " 0\n", - " 1\n", - " 0\n", + " 131\n", + " 7.9\n", + " 3.8\n", + " 6.4\n", + " 2.0\n", + " virginica\n", + " 30.02\n", + " 30.02\n", " \n", " \n", - " 1\n", - " 1\n", - " 2\n", + " 118\n", + " 7.7\n", + " 2.6\n", + " 6.9\n", + " 2.3\n", + " virginica\n", + " 20.02\n", + " 20.02\n", " \n", " \n", - " obs3\n", - " 0\n", - " 1\n", + " 135\n", + " 7.7\n", + " 3.0\n", + " 6.1\n", + " 2.3\n", + " virginica\n", + " 23.10\n", + " 23.10\n", " \n", " \n", - " 3\n", - " 0\n", - " 5\n", + " 117\n", + " 7.7\n", + " 3.8\n", + " 6.7\n", + " 2.2\n", + " virginica\n", + " 29.26\n", + " 29.26\n", + " \n", + " \n", + " 122\n", + " 7.7\n", + " 2.8\n", + " 6.7\n", + " 2.0\n", + " virginica\n", + " 21.56\n", + " 21.56\n", + " \n", + " \n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 41\n", + " 4.5\n", + " 2.3\n", + " 1.3\n", + " 0.3\n", + " setosa\n", + " 10.35\n", + " 10.35\n", + " \n", + " \n", + " 8\n", + " 4.4\n", + " 2.9\n", + " 1.4\n", + " 0.2\n", + " setosa\n", + " 12.76\n", + " 12.76\n", + " \n", + " \n", + " 38\n", + " 4.4\n", + " 3.0\n", + " 1.3\n", + " 0.2\n", + " setosa\n", + " 13.20\n", + " 13.20\n", + " \n", + " \n", + " 42\n", + " 4.4\n", + " 3.2\n", + " 1.3\n", + " 0.2\n", + " setosa\n", + " 14.08\n", + " 14.08\n", + " \n", + " \n", + " 13\n", + " 4.3\n", + " 3.0\n", + " 1.1\n", + " 0.1\n", + " setosa\n", + " 12.90\n", + " 12.90\n", " \n", " \n", "\n", + "

150 rows × 7 columns

\n", "" ], "text/plain": [ - " y x\n", - "0 1 0\n", - "1 1 2\n", - "obs3 0 1\n", - "3 0 5" + " sepal_length sepal_width petal_length petal_width species \\\n", + "131 7.9 3.8 6.4 2.0 virginica \n", + "118 7.7 2.6 6.9 2.3 virginica \n", + "135 7.7 3.0 6.1 2.3 virginica \n", + "117 7.7 3.8 6.7 2.2 virginica \n", + "122 7.7 2.8 6.7 2.0 virginica \n", + ".. ... ... ... ... ... \n", + "41 4.5 2.3 1.3 0.3 setosa \n", + "8 4.4 2.9 1.4 0.2 setosa \n", + "38 4.4 3.0 1.3 0.2 setosa \n", + "42 4.4 3.2 1.3 0.2 setosa \n", + "13 4.3 3.0 1.1 0.1 setosa \n", + "\n", + " sepal_volume sepal_volume_2 \n", + "131 30.02 30.02 \n", + "118 20.02 20.02 \n", + "135 23.10 23.10 \n", + "117 29.26 29.26 \n", + "122 21.56 21.56 \n", + ".. ... ... \n", + "41 10.35 10.35 \n", + "8 12.76 12.76 \n", + "38 13.20 13.20 \n", + "42 14.08 14.08 \n", + "13 12.90 12.90 \n", + "\n", + "[150 rows x 7 columns]" ] }, - "execution_count": 149, + "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df[['y', 'x']]" + "iris_df.sort_values(by=['sepal_length','petal_width'], ascending=False)" ] }, { "cell_type": "markdown", - "id": "216b06d1", + "id": "5590f9d3-bbf1-4c45-84b6-c2b5c288716f", "metadata": {}, "source": [ - "### Addition\n", - "\n", - "It is typical to create a new column from existing columns. \n", + "- `sort_index()`: Sorts dataframe by index. You can customize this sorting with the following parameters:\n", + " \n", + " - `axis` along which to sort. The value 0 identifies the rows, and 1 identifies the columns.\n", + " - `ascending` takes True or False\n", + " - `inplace` will save sorted values into the df\n", "\n", - "In this example, a new column (or field) is created by summing `x` and `y`:" - ] - }, - { - "cell_type": "code", - "execution_count": 150, - "id": "45a650cb", - "metadata": {}, - "outputs": [], - "source": [ - "df['x_plus_y'] = df.x + df.y" + "[More details](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_index.html)" ] }, { "cell_type": "code", - "execution_count": 151, - "id": "a9742abe", + "execution_count": 73, + "id": "7c5afdea-05e0-4789-a052-0f3e3170c1df", "metadata": {}, "outputs": [ { @@ -3552,86 +4214,174 @@ " \n", " \n", " \n", - " x\n", - " y\n", - " is_label\n", - " x_plus_y\n", + " sepal_length\n", + " sepal_width\n", + " petal_length\n", + " petal_width\n", + " species\n", + " sepal_volume\n", + " sepal_volume_2\n", " \n", " \n", " \n", " \n", " 0\n", - " 0\n", - " 1\n", - " True\n", - " 1\n", + " 5.1\n", + " 3.5\n", + " 1.4\n", + " 0.2\n", + " setosa\n", + " 17.85\n", + " 17.85\n", " \n", " \n", " 1\n", - " 2\n", - " 1\n", - " False\n", - " 3\n", + " 4.9\n", + " 3.0\n", + " 1.4\n", + " 0.2\n", + " setosa\n", + " 14.70\n", + " 14.70\n", " \n", " \n", - " obs3\n", - " 1\n", - " 0\n", - " False\n", - " 1\n", + " 2\n", + " 4.7\n", + " 3.2\n", + " 1.3\n", + " 0.2\n", + " setosa\n", + " 15.04\n", + " 15.04\n", " \n", " \n", " 3\n", - " 5\n", - " 0\n", - " False\n", - " 5\n", + " 4.6\n", + " 3.1\n", + " 1.5\n", + " 0.2\n", + " setosa\n", + " 14.26\n", + " 14.26\n", + " \n", + " \n", + " 4\n", + " 5.0\n", + " 3.6\n", + " 1.4\n", + " 0.2\n", + " setosa\n", + " 18.00\n", + " 18.00\n", + " \n", + " \n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 145\n", + " 6.7\n", + " 3.0\n", + " 5.2\n", + " 2.3\n", + " virginica\n", + " 20.10\n", + " 20.10\n", + " \n", + " \n", + " 146\n", + " 6.3\n", + " 2.5\n", + " 5.0\n", + " 1.9\n", + " virginica\n", + " 15.75\n", + " 15.75\n", + " \n", + " \n", + " 147\n", + " 6.5\n", + " 3.0\n", + " 5.2\n", + " 2.0\n", + " virginica\n", + " 19.50\n", + " 19.50\n", + " \n", + " \n", + " 148\n", + " 6.2\n", + " 3.4\n", + " 5.4\n", + " 2.3\n", + " virginica\n", + " 21.08\n", + " 21.08\n", + " \n", + " \n", + " 149\n", + " 5.9\n", + " 3.0\n", + " 5.1\n", + " 1.8\n", + " virginica\n", + " 17.70\n", + " 17.70\n", " \n", " \n", "\n", + "

150 rows × 7 columns

\n", "" ], "text/plain": [ - " x y is_label x_plus_y\n", - "0 0 1 True 1\n", - "1 2 1 False 3\n", - "obs3 1 0 False 1\n", - "3 5 0 False 5" + " sepal_length sepal_width petal_length petal_width species \\\n", + "0 5.1 3.5 1.4 0.2 setosa \n", + "1 4.9 3.0 1.4 0.2 setosa \n", + "2 4.7 3.2 1.3 0.2 setosa \n", + "3 4.6 3.1 1.5 0.2 setosa \n", + "4 5.0 3.6 1.4 0.2 setosa \n", + ".. ... ... ... ... ... \n", + "145 6.7 3.0 5.2 2.3 virginica \n", + "146 6.3 2.5 5.0 1.9 virginica \n", + "147 6.5 3.0 5.2 2.0 virginica \n", + "148 6.2 3.4 5.4 2.3 virginica \n", + "149 5.9 3.0 5.1 1.8 virginica \n", + "\n", + " sepal_volume sepal_volume_2 \n", + "0 17.85 17.85 \n", + "1 14.70 14.70 \n", + "2 15.04 15.04 \n", + "3 14.26 14.26 \n", + "4 18.00 18.00 \n", + ".. ... ... \n", + "145 20.10 20.10 \n", + "146 15.75 15.75 \n", + "147 19.50 19.50 \n", + "148 21.08 21.08 \n", + "149 17.70 17.70 \n", + "\n", + "[150 rows x 7 columns]" ] }, - "execution_count": 151, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, - { - "cell_type": "markdown", - "id": "b70f49b7", - "metadata": {}, - "source": [ - "Note that:\n", - "\n", - "- The left side has form: DataFrame name, bracket notation, new column name\n", - "- The assignment operator `=` is used\n", - "- The right side contains an expression; here, two df columns are summed " - ] - }, - { - "cell_type": "markdown", - "id": "cb93606a", - "metadata": {}, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "Bracket notation also works on the fields, but it's more typing:" + "iris_df.sort_index(axis=0)" ] }, { "cell_type": "code", - "execution_count": 152, - "id": "65346e2d", + "execution_count": 74, + "id": "8aa29fc1-0ef8-40dd-8829-81395b15f6a0", "metadata": {}, "outputs": [ { @@ -3655,124 +4405,207 @@ " \n", " \n", " \n", - " x\n", - " y\n", - " is_label\n", - " x_plus_y\n", + " sepal_length\n", + " sepal_width\n", + " petal_length\n", + " petal_width\n", + " species\n", + " sepal_volume\n", + " sepal_volume_2\n", " \n", " \n", " \n", " \n", - " 0\n", - " 0\n", - " 1\n", - " True\n", - " 1\n", + " 149\n", + " 5.9\n", + " 3.0\n", + " 5.1\n", + " 1.8\n", + " virginica\n", + " 17.70\n", + " 17.70\n", " \n", " \n", - " 1\n", - " 2\n", - " 1\n", - " False\n", - " 3\n", + " 148\n", + " 6.2\n", + " 3.4\n", + " 5.4\n", + " 2.3\n", + " virginica\n", + " 21.08\n", + " 21.08\n", + " \n", + " \n", + " 147\n", + " 6.5\n", + " 3.0\n", + " 5.2\n", + " 2.0\n", + " virginica\n", + " 19.50\n", + " 19.50\n", + " \n", + " \n", + " 146\n", + " 6.3\n", + " 2.5\n", + " 5.0\n", + " 1.9\n", + " virginica\n", + " 15.75\n", + " 15.75\n", + " \n", + " \n", + " 145\n", + " 6.7\n", + " 3.0\n", + " 5.2\n", + " 2.3\n", + " virginica\n", + " 20.10\n", + " 20.10\n", + " \n", + " \n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", " \n", " \n", - " obs3\n", - " 1\n", - " 0\n", - " False\n", - " 1\n", + " 4\n", + " 5.0\n", + " 3.6\n", + " 1.4\n", + " 0.2\n", + " setosa\n", + " 18.00\n", + " 18.00\n", " \n", " \n", " 3\n", - " 5\n", - " 0\n", - " False\n", - " 5\n", + " 4.6\n", + " 3.1\n", + " 1.5\n", + " 0.2\n", + " setosa\n", + " 14.26\n", + " 14.26\n", + " \n", + " \n", + " 2\n", + " 4.7\n", + " 3.2\n", + " 1.3\n", + " 0.2\n", + " setosa\n", + " 15.04\n", + " 15.04\n", + " \n", + " \n", + " 1\n", + " 4.9\n", + " 3.0\n", + " 1.4\n", + " 0.2\n", + " setosa\n", + " 14.70\n", + " 14.70\n", + " \n", + " \n", + " 0\n", + " 5.1\n", + " 3.5\n", + " 1.4\n", + " 0.2\n", + " setosa\n", + " 17.85\n", + " 17.85\n", " \n", " \n", "\n", + "

150 rows × 7 columns

\n", "" ], "text/plain": [ - " x y is_label x_plus_y\n", - "0 0 1 True 1\n", - "1 2 1 False 3\n", - "obs3 1 0 False 1\n", - "3 5 0 False 5" + " sepal_length sepal_width petal_length petal_width species \\\n", + "149 5.9 3.0 5.1 1.8 virginica \n", + "148 6.2 3.4 5.4 2.3 virginica \n", + "147 6.5 3.0 5.2 2.0 virginica \n", + "146 6.3 2.5 5.0 1.9 virginica \n", + "145 6.7 3.0 5.2 2.3 virginica \n", + ".. ... ... ... ... ... \n", + "4 5.0 3.6 1.4 0.2 setosa \n", + "3 4.6 3.1 1.5 0.2 setosa \n", + "2 4.7 3.2 1.3 0.2 setosa \n", + "1 4.9 3.0 1.4 0.2 setosa \n", + "0 5.1 3.5 1.4 0.2 setosa \n", + "\n", + " sepal_volume sepal_volume_2 \n", + "149 17.70 17.70 \n", + "148 21.08 21.08 \n", + "147 19.50 19.50 \n", + "146 15.75 15.75 \n", + "145 20.10 20.10 \n", + ".. ... ... \n", + "4 18.00 18.00 \n", + "3 14.26 14.26 \n", + "2 15.04 15.04 \n", + "1 14.70 14.70 \n", + "0 17.85 17.85 \n", + "\n", + "[150 rows x 7 columns]" ] }, - "execution_count": 152, + "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df['x_plus_y'] = df['x'] + df['y']\n", - "df" + "iris_df.sort_index(axis=0, ascending=False)" ] }, { "cell_type": "markdown", - "id": "112dbcd6", + "id": "db062841", "metadata": {}, "source": [ - "The bracket notation must be used when assigning to a new column. This will break:" + "## Dealing with Missing Data\n", + "\n", + "Pandas primarily uses the data type `np.nan` from NumPy to represent missing data." ] }, { "cell_type": "code", - "execution_count": 153, - "id": "86dd3bca", + "execution_count": 77, + "id": "2ae69551", "metadata": {}, - "outputs": [ - { - "ename": "SyntaxError", - "evalue": "invalid syntax (1004225935.py, line 1)", - "output_type": "error", - "traceback": [ - "\u001b[0;36m Cell \u001b[0;32mIn[153], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m df.'x_plus_y' = df.x + df.y\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" - ] - } - ], - "source": [ - "df.'x_plus_y' = df.x + df.y" - ] - }, - { - "cell_type": "markdown", - "id": "b2bedd5b", - "metadata": { - "tags": [] - }, - "source": [ - "### Removing Columns" - ] - }, - { - "cell_type": "markdown", - "id": "30b4bfb1", - "metadata": { - "tags": [] - }, + "outputs": [], "source": [ - "- Using the reserverd keyword `del` to drop a DataFrame or single columns from the dataframe:" + "import numpy as np" ] }, { "cell_type": "code", - "execution_count": 154, - "id": "5bef1897", + "execution_count": 78, + "id": "88d0b1c6", "metadata": {}, "outputs": [], "source": [ - "df_drop = df.copy()" + "df_miss = pd.DataFrame({\n", + " 'x':[2, np.nan, 1], \n", + " 'y':[np.nan, np.nan, 6]}\n", + ")" ] }, { "cell_type": "code", - "execution_count": 155, - "id": "7a234a8c", + "execution_count": 79, + "id": "8404fdeb", "metadata": {}, "outputs": [ { @@ -3798,59 +4631,60 @@ " \n", " x\n", " y\n", - " is_label\n", - " x_plus_y\n", " \n", " \n", " \n", " \n", " 0\n", - " 0\n", - " 1\n", - " True\n", - " 1\n", + " 2.0\n", + " NaN\n", " \n", " \n", " 1\n", - " 2\n", - " 1\n", - " False\n", - " 3\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 2\n", + " 1.0\n", + " 6.0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " x y is_label x_plus_y\n", - "0 0 1 True 1\n", - "1 2 1 False 3" + " x y\n", + "0 2.0 NaN\n", + "1 NaN NaN\n", + "2 1.0 6.0" ] }, - "execution_count": 155, + "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df_drop.head(2)" + "df_miss" ] }, { - "cell_type": "code", - "execution_count": 156, - "id": "f996dd87", + "cell_type": "markdown", + "id": "565b8fa8", "metadata": {}, - "outputs": [], "source": [ - "# delete the column 'x'\n", - "del df_drop['x']" + "### dropping missing data\n", + "\n", + "We use the `dropna()` method to drop all rows with missing data in any column.\n", + "\n", + "[More details](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dropna.html)" ] }, { "cell_type": "code", - "execution_count": 157, - "id": "2f8a2a9c", + "execution_count": 82, + "id": "0f90aff6", "metadata": {}, "outputs": [ { @@ -3874,71 +4708,47 @@ " \n", " \n", " \n", + " x\n", " y\n", - " is_label\n", - " x_plus_y\n", " \n", " \n", " \n", " \n", - " 0\n", - " 1\n", - " True\n", - " 1\n", - " \n", - " \n", - " 1\n", - " 1\n", - " False\n", - " 3\n", - " \n", - " \n", - " obs3\n", - " 0\n", - " False\n", - " 1\n", - " \n", - " \n", - " 3\n", - " 0\n", - " False\n", - " 5\n", + " 2\n", + " 1.0\n", + " 6.0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " y is_label x_plus_y\n", - "0 1 True 1\n", - "1 1 False 3\n", - "obs3 0 False 1\n", - "3 0 False 5" + " x y\n", + "2 1.0 6.0" ] }, - "execution_count": 157, + "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df_drop" + "df_drop_all = df_miss.dropna()\n", + "df_drop_all" ] }, { "cell_type": "markdown", - "id": "b34fe8af", + "id": "190e3c8d", "metadata": {}, "source": [ - "- Using the method `drop()` to drop one or more columns. It takes takes `axis` parameter:\n", - " - axis=0 refers to rows \n", - " - axis=1 refers to columns " + "The `subset` parameter takes a list of column names to specify which columns should have missing values." ] }, { "cell_type": "code", - "execution_count": 158, - "id": "13358267", + "execution_count": 81, + "id": "ba5ad471", "metadata": {}, "outputs": [ { @@ -3962,53 +4772,69 @@ " \n", " \n", " \n", + " x\n", " y\n", " \n", " \n", " \n", " \n", " 0\n", - " 1\n", - " \n", - " \n", - " 1\n", - " 1\n", - " \n", - " \n", - " obs3\n", - " 0\n", + " 2.0\n", + " NaN\n", " \n", " \n", - " 3\n", - " 0\n", + " 2\n", + " 1.0\n", + " 6.0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " y\n", - "0 1\n", - "1 1\n", - "obs3 0\n", - "3 0" + " x y\n", + "0 2.0 NaN\n", + "2 1.0 6.0" ] }, - "execution_count": 158, + "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Here we drop columns\n", - "df_drop = df_drop.drop(['x_plus_y', 'is_label'], axis=1)\n", - "df_drop" + "df_drop_x = df_miss.dropna(subset=['x'])\n", + "df_drop_x" + ] + }, + { + "cell_type": "markdown", + "id": "c7efa14a", + "metadata": {}, + "source": [ + "### Replace missing values \n", + "\n", + "We can use `fillna()` to replace missing data to whatever value you like, e.g. $0$s.\n", + "\n", + "[Details](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.fillna.html)\n", + "\n", + "We can pass the results of an operation -- for example to peform simple imputation, we can replace missing values in each column with the median value of the respective column:" ] }, { "cell_type": "code", - "execution_count": 159, - "id": "cf8015ca-b92e-40b5-9058-109c0fcff6d9", + "execution_count": 83, + "id": "c697c8f4", + "metadata": {}, + "outputs": [], + "source": [ + "df_filled = df_miss.fillna(df_miss.median())" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "cc10a2b7", "metadata": {}, "outputs": [ { @@ -4032,42 +4858,44 @@ " \n", " \n", " \n", + " x\n", " y\n", " \n", " \n", " \n", " \n", - " 1\n", - " 1\n", + " 0\n", + " 2.0\n", + " 6.0\n", " \n", " \n", - " obs3\n", - " 0\n", + " 1\n", + " 1.5\n", + " 6.0\n", " \n", " \n", - " 3\n", - " 0\n", + " 2\n", + " 1.0\n", + " 6.0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " y\n", - "1 1\n", - "obs3 0\n", - "3 0" + " x y\n", + "0 2.0 6.0\n", + "1 1.5 6.0\n", + "2 1.0 6.0" ] }, - "execution_count": 159, + "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Now a particular observation\n", - "df_drop = df_drop.drop([0], axis=0)\n", - "df_drop" + "df_filled" ] } ], diff --git a/_sources/chapters/module-4/044-PandasII-exploration.ipynb b/_sources/chapters/module-4/044-PandasII-exploration.ipynb new file mode 100644 index 0000000..c7c55e1 --- /dev/null +++ b/_sources/chapters/module-4/044-PandasII-exploration.ipynb @@ -0,0 +1,3448 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "13aa848b", + "metadata": {}, + "source": [ + "# Pandas: Data Exploration\n", + "\n", + "Pandas is extremely useful for all the steps from data acquisition to analysis. Between these stages, an essential process involves exploring and processing the data to ensure it’s ready for meaningful insights.\n", + "\n", + "![](https://ds1002-resources.s3.amazonaws.com/images/workflow.png)\n", + "\n", + "In this lesson, you will learn how to use Pandas to get a first idea about your data. Specifically, we will cover:\n", + "\n", + "- Use Pandas for data inspection and exploration.\n", + "- Basic selection, indexing and slicing.\n", + "- Basic data cleaning." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "4ac80975", + "metadata": {}, + "outputs": [], + "source": [ + "# import dependencies\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "id": "f69d5682", + "metadata": { + "tags": [] + }, + "source": [ + "We will work with the following dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "495b4018", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
..................
1456.73.05.22.3virginica
1466.32.55.01.9virginica
1476.53.05.22.0virginica
1486.23.45.42.3virginica
1495.93.05.11.8virginica
\n", + "

150 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "1 4.9 3.0 1.4 0.2 setosa\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "3 4.6 3.1 1.5 0.2 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa\n", + ".. ... ... ... ... ...\n", + "145 6.7 3.0 5.2 2.3 virginica\n", + "146 6.3 2.5 5.0 1.9 virginica\n", + "147 6.5 3.0 5.2 2.0 virginica\n", + "148 6.2 3.4 5.4 2.3 virginica\n", + "149 5.9 3.0 5.1 1.8 virginica\n", + "\n", + "[150 rows x 5 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df = pd.read_csv(\"https://raw.githubusercontent.com/mwaskom/seaborn-data/refs/heads/master/iris.csv\")\n", + "iris_df" + ] + }, + { + "cell_type": "markdown", + "id": "897df551", + "metadata": {}, + "source": [ + "Check that we have a dataframe:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "cc587038", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(iris_df)" + ] + }, + { + "cell_type": "markdown", + "id": "ca1632a9-8f77-4d63-a4ca-a1f98bc0ef31", + "metadata": {}, + "source": [ + "**Try it yourself with Practice Exercise 1!**" + ] + }, + { + "cell_type": "markdown", + "id": "a28f0730-121e-4eef-91ae-b1851d68d6db", + "metadata": {}, + "source": [ + "## Data Inspection" + ] + }, + { + "cell_type": "markdown", + "id": "18a5abf0-efbb-4fec-a7eb-52ea24102d10", + "metadata": {}, + "source": [ + "### Exploring dataframe's structure" + ] + }, + { + "cell_type": "markdown", + "id": "24062066", + "metadata": {}, + "source": [ + "- `head()`: returns the first records in dataframe." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "6e79ac0b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "1 4.9 3.0 1.4 0.2 setosa\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "3 4.6 3.1 1.5 0.2 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "1a3341d5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
55.43.91.70.4setosa
64.63.41.40.3setosa
75.03.41.50.2setosa
84.42.91.40.2setosa
94.93.11.50.1setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "1 4.9 3.0 1.4 0.2 setosa\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "3 4.6 3.1 1.5 0.2 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa\n", + "5 5.4 3.9 1.7 0.4 setosa\n", + "6 4.6 3.4 1.4 0.3 setosa\n", + "7 5.0 3.4 1.5 0.2 setosa\n", + "8 4.4 2.9 1.4 0.2 setosa\n", + "9 4.9 3.1 1.5 0.1 setosa" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# You can specify how many rows to show\n", + "iris_df.head(10)" + ] + }, + { + "cell_type": "markdown", + "id": "359395f7", + "metadata": {}, + "source": [ + "- `tail()`: returns the last records in dataframe." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "096cbcf6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
1456.73.05.22.3virginica
1466.32.55.01.9virginica
1476.53.05.22.0virginica
1486.23.45.42.3virginica
1495.93.05.11.8virginica
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "145 6.7 3.0 5.2 2.3 virginica\n", + "146 6.3 2.5 5.0 1.9 virginica\n", + "147 6.5 3.0 5.2 2.0 virginica\n", + "148 6.2 3.4 5.4 2.3 virginica\n", + "149 5.9 3.0 5.1 1.8 virginica" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "cd014af8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
1406.73.15.62.4virginica
1416.93.15.12.3virginica
1425.82.75.11.9virginica
1436.83.25.92.3virginica
1446.73.35.72.5virginica
1456.73.05.22.3virginica
1466.32.55.01.9virginica
1476.53.05.22.0virginica
1486.23.45.42.3virginica
1495.93.05.11.8virginica
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "140 6.7 3.1 5.6 2.4 virginica\n", + "141 6.9 3.1 5.1 2.3 virginica\n", + "142 5.8 2.7 5.1 1.9 virginica\n", + "143 6.8 3.2 5.9 2.3 virginica\n", + "144 6.7 3.3 5.7 2.5 virginica\n", + "145 6.7 3.0 5.2 2.3 virginica\n", + "146 6.3 2.5 5.0 1.9 virginica\n", + "147 6.5 3.0 5.2 2.0 virginica\n", + "148 6.2 3.4 5.4 2.3 virginica\n", + "149 5.9 3.0 5.1 1.8 virginica" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Again, we can can specify how many rows to show\n", + "iris_df.tail(10)" + ] + }, + { + "cell_type": "markdown", + "id": "45e495f5", + "metadata": {}, + "source": [ + "- `dtypes`: returns the data types of each column." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "c12d0f01", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "sepal_length float64\n", + "sepal_width float64\n", + "petal_length float64\n", + "petal_width float64\n", + "species object\n", + "dtype: object" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.dtypes" + ] + }, + { + "cell_type": "markdown", + "id": "36df3ba6", + "metadata": {}, + "source": [ + "- `shape`: As with NumPy, the shape of the dataframe (number of rows, number of columns)." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "15b4e581", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(150, 5)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.shape" + ] + }, + { + "cell_type": "markdown", + "id": "8cb0b767", + "metadata": {}, + "source": [ + "You can also use the built-in funciton `len()` to obtain the row (record) count." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "be7bf6fa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "150" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(iris_df)" + ] + }, + { + "cell_type": "markdown", + "id": "65ecc35c", + "metadata": {}, + "source": [ + "- `columns`: contains the column names." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "89e82424", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',\n", + " 'species'],\n", + " dtype='object')" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.columns" + ] + }, + { + "cell_type": "markdown", + "id": "861715f7", + "metadata": {}, + "source": [ + "- `info()`: prints information about the dataframe including the index dtype and columns, non-null values and memory usage." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "8d306a64", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 150 entries, 0 to 149\n", + "Data columns (total 5 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 sepal_length 150 non-null float64\n", + " 1 sepal_width 150 non-null float64\n", + " 2 petal_length 150 non-null float64\n", + " 3 petal_width 150 non-null float64\n", + " 4 species 150 non-null object \n", + "dtypes: float64(4), object(1)\n", + "memory usage: 6.0+ KB\n" + ] + } + ], + "source": [ + "iris_df.info()" + ] + }, + { + "cell_type": "markdown", + "id": "27970e74-960e-4b9d-a3d0-2a785d7d62b6", + "metadata": {}, + "source": [ + "**Try it yourself with Practice Exercise 2!**" + ] + }, + { + "cell_type": "markdown", + "id": "c9e0c71a-9f48-4377-b830-241bea8a5a4b", + "metadata": {}, + "source": [ + "### Summarizing data" + ] + }, + { + "cell_type": "markdown", + "id": "cf99d600-6676-4513-91ff-bfcef43765a3", + "metadata": { + "tags": [] + }, + "source": [ + "- `describe()`: summarizes the central tendency (i.e. mean), dispersion (i.e. standard deviation) and shape of a dataset's distribution." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "1ddb0055", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_width
count150.000000150.000000150.000000150.000000
mean5.8433333.0573333.7580001.199333
std0.8280660.4358661.7652980.762238
min4.3000002.0000001.0000000.100000
25%5.1000002.8000001.6000000.300000
50%5.8000003.0000004.3500001.300000
75%6.4000003.3000005.1000001.800000
max7.9000004.4000006.9000002.500000
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width\n", + "count 150.000000 150.000000 150.000000 150.000000\n", + "mean 5.843333 3.057333 3.758000 1.199333\n", + "std 0.828066 0.435866 1.765298 0.762238\n", + "min 4.300000 2.000000 1.000000 0.100000\n", + "25% 5.100000 2.800000 1.600000 0.300000\n", + "50% 5.800000 3.000000 4.350000 1.300000\n", + "75% 6.400000 3.300000 5.100000 1.800000\n", + "max 7.900000 4.400000 6.900000 2.500000" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "30896c3d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countmeanstdmin25%50%75%max
sepal_length150.05.8433330.8280664.35.15.806.47.9
sepal_width150.03.0573330.4358662.02.83.003.34.4
petal_length150.03.7580001.7652981.01.64.355.16.9
petal_width150.01.1993330.7622380.10.31.301.82.5
\n", + "
" + ], + "text/plain": [ + " count mean std min 25% 50% 75% max\n", + "sepal_length 150.0 5.843333 0.828066 4.3 5.1 5.80 6.4 7.9\n", + "sepal_width 150.0 3.057333 0.435866 2.0 2.8 3.00 3.3 4.4\n", + "petal_length 150.0 3.758000 1.765298 1.0 1.6 4.35 5.1 6.9\n", + "petal_width 150.0 1.199333 0.762238 0.1 0.3 1.30 1.8 2.5" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# if you prefer the columns in the rows\n", + "iris_df.describe().T" + ] + }, + { + "cell_type": "markdown", + "id": "bdb7c40c-0551-4789-b3ca-633cffb9c987", + "metadata": {}, + "source": [ + "By default, if the dataframe contains mixed type data (numeric and categorical), it will summarize only the numeric data.\n", + "\n", + "If you want to summarize the categorical data, this needs to happen separately." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "294d2ade", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
species
count150
unique3
topsetosa
freq50
\n", + "
" + ], + "text/plain": [ + " species\n", + "count 150\n", + "unique 3\n", + "top setosa\n", + "freq 50" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df[[\"species\"]].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "03ed92bf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 150.000000\n", + "mean 5.843333\n", + "std 0.828066\n", + "min 4.300000\n", + "25% 5.100000\n", + "50% 5.800000\n", + "75% 6.400000\n", + "max 7.900000\n", + "Name: sepal_length, dtype: float64" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.sepal_length.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "afc78a19-4120-4119-bccb-270690aa00cf", + "metadata": {}, + "source": [ + "- `value_counts()`: returns the frequency for each distinct value. Arguments give the ability to sort by count or index, normalize, and more. Look at its [documentation](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.value_counts.html) for further details." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "8979b360", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "setosa 50\n", + "versicolor 50\n", + "virginica 50\n", + "Name: species, dtype: int64" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.species.value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "1e252829-800f-4d04-b04d-498d1abd8616", + "metadata": {}, + "source": [ + "Show percentages instead of counts" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "1345fc52", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "setosa 0.333333\n", + "versicolor 0.333333\n", + "virginica 0.333333\n", + "Name: species, dtype: float64" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.species.value_counts(normalize=True)" + ] + }, + { + "cell_type": "markdown", + "id": "dbb5a45a-f665-4226-9b03-a9c6063b0dfe", + "metadata": {}, + "source": [ + "- `corr()`: returns the correlation between numeric columns." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "8fee461e", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_15501/1934569051.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", + " iris_df.corr()\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_width
sepal_length1.000000-0.1175700.8717540.817941
sepal_width-0.1175701.000000-0.428440-0.366126
petal_length0.871754-0.4284401.0000000.962865
petal_width0.817941-0.3661260.9628651.000000
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width\n", + "sepal_length 1.000000 -0.117570 0.871754 0.817941\n", + "sepal_width -0.117570 1.000000 -0.428440 -0.366126\n", + "petal_length 0.871754 -0.428440 1.000000 0.962865\n", + "petal_width 0.817941 -0.366126 0.962865 1.000000" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.corr()" + ] + }, + { + "cell_type": "markdown", + "id": "af86a280-53c5-4231-91b3-100b1995a7be", + "metadata": {}, + "source": [ + "Correlation can be computed on two fields by subsetting on them:" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "02d1c9ee", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthpetal_length
sepal_length1.0000000.871754
petal_length0.8717541.000000
\n", + "
" + ], + "text/plain": [ + " sepal_length petal_length\n", + "sepal_length 1.000000 0.871754\n", + "petal_length 0.871754 1.000000" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df[['sepal_length','petal_length']].corr()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "f433fe95", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthpetal_lengthsepal_width
sepal_length1.0000000.871754-0.11757
petal_length0.8717541.000000-0.42844
sepal_width-0.117570-0.4284401.00000
\n", + "
" + ], + "text/plain": [ + " sepal_length petal_length sepal_width\n", + "sepal_length 1.000000 0.871754 -0.11757\n", + "petal_length 0.871754 1.000000 -0.42844\n", + "sepal_width -0.117570 -0.428440 1.00000" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df[['sepal_length','petal_length','sepal_width']].corr()" + ] + }, + { + "cell_type": "markdown", + "id": "df8bc659-c912-4711-9020-03e06de1d11e", + "metadata": {}, + "source": [ + "**Try it yourself with Practice Exercise 3!**" + ] + }, + { + "cell_type": "markdown", + "id": "93908c90-9d4c-462c-bf45-1b3a4da2b2a7", + "metadata": {}, + "source": [ + "### Visualizing data\n", + "\n", + "Pandas dataframes have a `plot` method, which enables quick data visualization. \n", + "\n", + "This method is built on Matplotlib, the primary library for creating visualizations in Python. Another important library for this purpose is Seaborn, which works seamlessly with DataFrames and produces professional, visually appealing results." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "8f29d5f7-2454-4ce0-adad-e4bb020e8ee2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# hist to display continuous data\n", + "iris_df.plot(y=\"sepal_length\", kind=\"hist\")" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "80af1a62-0ffb-438c-b20f-b1a0d31be6c7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAh8AAAHTCAYAAACHn3qDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAkEklEQVR4nO3de1TUdf7H8deYOqIMqKWDCCgKad5LN9MukgWta944Z2tXKy/ZVuhuZF5iSR0rwThlbEuZpZXWUWv3mJVbCqtmrq6JGqbmrUQlc2K9ASqByvz+6Dg/CbNQ5vOVmefjnDnH+X6/wLvThM8+3+98x+bxeDwCAAAwpI7VAwAAgMBCfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMCoulYP8FMVFRX67rvv5HA4ZLPZrB4HAAD8Ch6PRyUlJQoPD1edOhdf27ji4uO7775TZGSk1WMAAIBLUFBQoIiIiIsec8XFh8PhkPTj8CEhIRZPAwAAfo3i4mJFRkZ6/x6/mCsuPs6dagkJCSE+AACoZX7NJRNccAoAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHVig+XyyWbzVbpERYW5t3v8XjkcrkUHh6uoKAgxcXFafv27TU+NAAAqL2qvfLRsWNHHTp0yPvYunWrd19GRoZmzpyprKws5ebmKiwsTPHx8SopKanRoQEAQO1V7fioW7euwsLCvI9mzZpJ+nHVIzMzU6mpqUpMTFSnTp00b948nTp1SgsWLKjxwQEAQO1U7fjYs2ePwsPDFR0drT/84Q/au3evJCk/P19ut1sJCQneY+12u/r06aN169bV3MQAAKBWq1udg3v27Kn58+fr2muv1ffff69nn31WvXv31vbt2+V2uyVJTqez0tc4nU7t37//Z79nWVmZysrKvM+Li4urMxIAAKhlqhUf/fr18/65c+fO6tWrl9q2bat58+bppptukiTZbLZKX+PxeKpsO196erqmTZtWnTGuGK2f/JfVI/iFfTP6Wz2C3+A1WXN4XdYMXpM1x59ek5f1VttGjRqpc+fO2rNnj/ddL+dWQM4pLCysshpyvpSUFBUVFXkfBQUFlzMSAAC4wl1WfJSVlWnHjh1q0aKFoqOjFRYWppycHO/+8vJyrV69Wr179/7Z72G32xUSElLpAQAA/Fe1TruMHz9eAwYMUFRUlAoLC/Xss8+quLhYw4cPl81mU3JystLS0hQbG6vY2FilpaWpYcOGGjp0qK/mBwAAtUy14uPbb7/VH//4Rx0+fFjNmjXTTTfdpPXr16tVq1aSpIkTJ6q0tFRJSUk6duyYevbsqezsbDkcDp8MDwAAap9qxceiRYsuut9ms8nlcsnlcl3OTAAAwI/x2S4AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYNRlxUd6erpsNpuSk5O92zwej1wul8LDwxUUFKS4uDht3779cucEAAB+4pLjIzc3V6+99pq6dOlSaXtGRoZmzpyprKws5ebmKiwsTPHx8SopKbnsYQEAQO13SfFx4sQJDRs2TK+//rqaNGni3e7xeJSZmanU1FQlJiaqU6dOmjdvnk6dOqUFCxbU2NAAAKD2uqT4GDNmjPr3768777yz0vb8/Hy53W4lJCR4t9ntdvXp00fr1q274PcqKytTcXFxpQcAAPBfdav7BYsWLdLmzZuVm5tbZZ/b7ZYkOZ3OStudTqf2799/we+Xnp6uadOmVXcMAABQS1Vr5aOgoECPPfaY3nnnHTVo0OBnj7PZbJWeezyeKtvOSUlJUVFRkfdRUFBQnZEAAEAtU62Vj02bNqmwsFDdu3f3bjt79qw+++wzZWVladeuXZJ+XAFp0aKF95jCwsIqqyHn2O122e32S5kdAADUQtVa+bjjjju0detW5eXleR89evTQsGHDlJeXpzZt2igsLEw5OTnerykvL9fq1avVu3fvGh8eAADUPtVa+XA4HOrUqVOlbY0aNdLVV1/t3Z6cnKy0tDTFxsYqNjZWaWlpatiwoYYOHVpzUwMAgFqr2hec/pKJEyeqtLRUSUlJOnbsmHr27Kns7Gw5HI6a/lEAAKAWuuz4+PTTTys9t9lscrlccrlcl/utAQCAH+KzXQAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARlUrPmbNmqUuXbooJCREISEh6tWrlz755BPvfo/HI5fLpfDwcAUFBSkuLk7bt2+v8aEBAEDtVa34iIiI0IwZM7Rx40Zt3LhRffv21aBBg7yBkZGRoZkzZyorK0u5ubkKCwtTfHy8SkpKfDI8AACofaoVHwMGDNDvfvc7XXvttbr22ms1ffp0BQcHa/369fJ4PMrMzFRqaqoSExPVqVMnzZs3T6dOndKCBQt8NT8AAKhlLvmaj7Nnz2rRokU6efKkevXqpfz8fLndbiUkJHiPsdvt6tOnj9atW/ez36esrEzFxcWVHgAAwH9VOz62bt2q4OBg2e12PfLII3r//ffVoUMHud1uSZLT6ax0vNPp9O67kPT0dIWGhnofkZGR1R0JAADUItWOj3bt2ikvL0/r16/Xo48+quHDh+urr77y7rfZbJWO93g8VbadLyUlRUVFRd5HQUFBdUcCAAC1SN3qfkH9+vUVExMjSerRo4dyc3P1t7/9TZMmTZIkud1utWjRwnt8YWFhldWQ89ntdtnt9uqOAQAAaqnLvs+Hx+NRWVmZoqOjFRYWppycHO++8vJyrV69Wr17977cHwMAAPxEtVY+/vrXv6pfv36KjIxUSUmJFi1apE8//VTLli2TzWZTcnKy0tLSFBsbq9jYWKWlpalhw4YaOnSor+YHAAC1TLXi4/vvv9f999+vQ4cOKTQ0VF26dNGyZcsUHx8vSZo4caJKS0uVlJSkY8eOqWfPnsrOzpbD4fDJ8AAAoPapVnzMnTv3ovttNptcLpdcLtflzAQAAPwYn+0CAACMIj4AAIBRxAcAADCK+AAAAEYRHwAAwCjiAwAAGEV8AAAAo4gPAABgFPEBAACMIj4AAIBRxAcAADCK+AAAAEYRHwAAwCjiAwAAGEV8AAAAo4gPAABgFPEBAACMIj4AAIBRxAcAADCK+AAAAEYRHwAAwCjiAwAAGEV8AAAAo4gPAABgFPEBAACMIj4AAIBRxAcAADCK+AAAAEYRHwAAwCjiAwAAGEV8AAAAo4gPAABgFPEBAACMIj4AAIBRxAcAADCK+AAAAEYRHwAAwCjiAwAAGEV8AAAAo4gPAABgFPEBAACMIj4AAIBRxAcAADCK+AAAAEYRHwAAwCjiAwAAGEV8AAAAo4gPAABgFPEBAACMIj4AAIBRxAcAADCK+AAAAEYRHwAAwCjiAwAAGEV8AAAAo4gPAABgFPEBAACMIj4AAIBRxAcAADCqWvGRnp6u3/zmN3I4HGrevLkGDx6sXbt2VTrG4/HI5XIpPDxcQUFBiouL0/bt22t0aAAAUHtVKz5Wr16tMWPGaP369crJydGZM2eUkJCgkydPeo/JyMjQzJkzlZWVpdzcXIWFhSk+Pl4lJSU1PjwAAKh96lbn4GXLllV6/uabb6p58+batGmTbrvtNnk8HmVmZio1NVWJiYmSpHnz5snpdGrBggV6+OGHa25yAABQK13WNR9FRUWSpKZNm0qS8vPz5Xa7lZCQ4D3GbrerT58+Wrdu3eX8KAAA4CeqtfJxPo/Ho3HjxumWW25Rp06dJElut1uS5HQ6Kx3rdDq1f//+C36fsrIylZWVeZ8XFxdf6kgAAKAWuOSVj7Fjx+rLL7/UwoULq+yz2WyVnns8nirbzklPT1doaKj3ERkZeakjAQCAWuCS4uPPf/6zPvzwQ61atUoRERHe7WFhYZL+fwXknMLCwiqrIeekpKSoqKjI+ygoKLiUkQAAQC1RrfjweDwaO3asFi9erJUrVyo6OrrS/ujoaIWFhSknJ8e7rby8XKtXr1bv3r0v+D3tdrtCQkIqPQAAgP+q1jUfY8aM0YIFC/TBBx/I4XB4VzhCQ0MVFBQkm82m5ORkpaWlKTY2VrGxsUpLS1PDhg01dOhQn/wDAACA2qVa8TFr1ixJUlxcXKXtb775pkaMGCFJmjhxokpLS5WUlKRjx46pZ8+eys7OlsPhqJGBAQBA7Vat+PB4PL94jM1mk8vlksvlutSZAACAH+OzXQAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAqGrHx2effaYBAwYoPDxcNptNS5YsqbTf4/HI5XIpPDxcQUFBiouL0/bt22tqXgAAUMtVOz5Onjyprl27Kisr64L7MzIyNHPmTGVlZSk3N1dhYWGKj49XSUnJZQ8LAABqv7rV/YJ+/fqpX79+F9zn8XiUmZmp1NRUJSYmSpLmzZsnp9OpBQsW6OGHH768aQEAQK1Xo9d85Ofny+12KyEhwbvNbrerT58+Wrdu3QW/pqysTMXFxZUeAADAf9VofLjdbkmS0+mstN3pdHr3/VR6erpCQ0O9j8jIyJocCQAAXGF88m4Xm81W6bnH46my7ZyUlBQVFRV5HwUFBb4YCQAAXCGqfc3HxYSFhUn6cQWkRYsW3u2FhYVVVkPOsdvtstvtNTkGAAC4gtXoykd0dLTCwsKUk5Pj3VZeXq7Vq1erd+/eNfmjAABALVXtlY8TJ07o66+/9j7Pz89XXl6emjZtqqioKCUnJystLU2xsbGKjY1VWlqaGjZsqKFDh9bo4AAAoHaqdnxs3LhRt99+u/f5uHHjJEnDhw/XW2+9pYkTJ6q0tFRJSUk6duyYevbsqezsbDkcjpqbGgAA1FrVjo+4uDh5PJ6f3W+z2eRyueRyuS5nLgAA4Kf4bBcAAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFE+i49XXnlF0dHRatCggbp37641a9b46kcBAIBaxCfx8e677yo5OVmpqan64osvdOutt6pfv346cOCAL34cAACoRXwSHzNnztSDDz6o0aNH67rrrlNmZqYiIyM1a9YsX/w4AABQi9R4fJSXl2vTpk1KSEiotD0hIUHr1q2r6R8HAABqmbo1/Q0PHz6ss2fPyul0VtrudDrldrurHF9WVqaysjLv86KiIklScXFxTY9W4yrKTlk9gl+oDf+uawtekzWH12XN4DVZc6701+S5+Twezy8eW+PxcY7NZqv03OPxVNkmSenp6Zo2bVqV7ZGRkb4aDVeY0EyrJwCq4nWJK01teU2WlJQoNDT0osfUeHxcc801uuqqq6qschQWFlZZDZGklJQUjRs3zvu8oqJCR48e1dVXX33BWMGvV1xcrMjISBUUFCgkJMTqcQBek7gi8bqsGR6PRyUlJQoPD//FY2s8PurXr6/u3bsrJydHQ4YM8W7PycnRoEGDqhxvt9tlt9srbWvcuHFNjxXQQkJC+A8KVxRek7gS8bq8fL+04nGOT067jBs3Tvfff7969OihXr166bXXXtOBAwf0yCOP+OLHAQCAWsQn8XHvvffqyJEjevrpp3Xo0CF16tRJH3/8sVq1auWLHwcAAGoRn11wmpSUpKSkJF99e/wKdrtdU6dOrXJaC7AKr0lciXhdmmfz/Jr3xAAAANQQPlgOAAAYRXwAAACjiA8AAGAU8QEAAIwiPgD4zOnTp3X77bdr9+7dVo8C4Aris7fawnqlpaU6ffp0pW3cvQ8m1atXT9u2beOjEgBUwsqHnzl16pTGjh2r5s2bKzg4WE2aNKn0AEx74IEHNHfuXKvHALzOnj2r559/XjfeeKPCwsLUtGnTSg/4HisffmbChAlatWqVXnnlFT3wwAN6+eWXdfDgQc2ePVszZsywejwEoPLycs2ZM0c5OTnq0aOHGjVqVGn/zJkzLZoMgWratGmaM2eOxo0bp8mTJys1NVX79u3TkiVLNGXKFKvHCwjcZMzPREVFaf78+YqLi1NISIg2b96smJgYvf3221q4cKE+/vhjq0dEgLn99tt/dp/NZtPKlSsNTgNIbdu21UsvvaT+/fvL4XAoLy/Pu239+vVasGCB1SP6PVY+/MzRo0cVHR0t6cfrO44ePSpJuuWWW/Too49aORoC1KpVq6weAajE7Xarc+fOkqTg4GAVFRVJku6++25NnjzZytECBtd8+Jk2bdpo3759kqQOHTrovffekyR99NFHaty4sXWDAZK+/fZbHTx40OoxEOAiIiJ06NAhSVJMTIyys7MlSbm5uXy+iyHEh58ZOXKktmzZIklKSUnRK6+8Irvdrscff1wTJkyweDoEooqKCj399NMKDQ1Vq1atFBUVpcaNG+uZZ55RRUWF1eMhAA0ZMkQrVqyQJD322GOaPHmyYmNj9cADD2jUqFEWTxcYuObDzx04cEAbN25U27Zt1bVrV6vHQQBKSUnR3LlzNW3aNN18883yeDxau3atXC6XHnroIU2fPt3qERHgPv/8c61du1YxMTEaOHCg1eMEBOIjABw/fpxTLrBMeHi4Xn311Sq/1D/44AMlJSVxGgYIQJx28TPPPfec3n33Xe/ze+65R1dffbVatmzpPR0DmHT06FG1b9++yvb27dt7L4gGTEpPT9cbb7xRZfsbb7yh5557zoKJAg/x4Wdmz56tyMhISVJOTo5ycnL0ySefqF+/flzzAUt07dpVWVlZVbZnZWVxKhCWmD179gWDuGPHjnr11VctmCjw8FZbP3Po0CFvfCxdulT33HOPEhIS1Lp1a/Xs2dPi6RCIMjIy1L9/f/373/9Wr169ZLPZtG7dOhUUFHDfGVjC7XarRYsWVbY3a9bM+y4Y+BYrH36mSZMmKigokCQtW7ZMd955pyTJ4/Ho7NmzVo6GANWnTx/t3r1bQ4YM0fHjx3X06FElJiZq165duvXWW60eDwEoMjJSa9eurbJ97dq1Cg8Pt2CiwMPKh59JTEzU0KFDFRsbqyNHjqhfv36SpLy8PMXExFg8HQJVeHg472rBFWP06NFKTk7W6dOn1bdvX0nSihUrNHHiRD3xxBMWTxcYiA8/8+KLL6p169YqKChQRkaGgoODJf14OiYpKcni6RAovvzyy199bJcuXXw4CVDVxIkTdfToUSUlJam8vFyS1KBBA02aNEkpKSkWTxcYeKstgBpXp04d2Ww2/dKvF5vNxulAWObEiRPasWOHgoKCFBsby91NDSI+/NA333yjzMxM7dixQzabTdddd52Sk5PVpk0bq0dDgNi/f/+vPrZVq1Y+nATAlYj48DPLly/XwIED1a1bN+/dJNetW6ctW7boo48+Unx8vNUjAoBxiYmJeuuttxQSEqLExMSLHrt48WJDUwUurvnwM08++aQef/xxzZgxo8r2SZMmER+wxIVW4x577DG1bdvW6tEQIEJDQ2Wz2bx/hrVY+fAzDRo00NatWxUbG1tp++7du9WlSxf98MMPFk2GQMVqHICfYuXDzzRr1kx5eXlV4iMvL0/Nmze3aCoEMlbjAPwU8eFnHnroIf3pT3/S3r171bt3b9lsNv3nP//Rc889x/vXYYkdO3bovffeq7J91KhRyszMND8QAt7333+v8ePHa8WKFSosLKzyrizegeV7xIefmTx5shwOh1544QXv+9XDw8Plcrn0l7/8xeLpEIhYjcOVZsSIETpw4IAmT56sFi1aeK8FgTlc8+HHSkpKJEkOh8PiSRDInn76ab344ot68sknL7ga99RTT1k9IgKMw+HQmjVr1K1bN6tHCVisfPiZvn37avHixWrcuHGl6CguLtbgwYO1cuVKC6dDIGI1DleayMjIX7wBHnyLlQ8/U6dOHbnd7irL2YWFhWrZsqVOnz5t0WQAq3G4MmRnZ+uFF17Q7Nmz1bp1a6vHCUisfPiJ8z9L46uvvpLb7fY+P3v2rJYtW6aWLVtaMRoCXH5+vs6cOaPY2NhK0bFnzx7Vq1ePX/4w7t5779WpU6fUtm1bNWzYUPXq1au0/+jRoxZNFjiIDz/RrVs32Ww22Ww276c0ni8oKEh///vfLZgMgW7EiBEaNWpUlQtOP//8c82ZM0effvqpNYMhYPEuK+tx2sVP7N+/Xx6PR23atNGGDRvUrFkz77769eurefPmuuqqqyycEIEqJCREmzdvVkxMTKXtX3/9tXr06KHjx49bMxgAy7Dy4SfOfThXRUWFxZMAldlsNu+1HucrKirifgowpri4WCEhId4/X8y54+A7daweADXv7bff1s0336zw8HDvp4u++OKL+uCDDyyeDIHo1ltvVXp6eqXQOHv2rNLT03XLLbdYOBkCSZMmTVRYWChJaty4sZo0aVLlcW47fI+VDz8za9YsTZkyRcnJyZo+fbr3F36TJk2UmZmpQYMGWTwhAk1GRoZuu+02tWvXTrfeeqskac2aNSouLuat3zBm5cqVatq0qSRp1apVFk8DrvnwMx06dFBaWpoGDx4sh8OhLVu2qE2bNtq2bZvi4uJ0+PBhq0dEAPruu++UlZWlLVu2KCgoSF26dNHYsWO9fxkACCysfPiZ/Px8XX/99VW22+12nTx50oKJgB9vKpaWlmb1GICkyrcmOJ/NZlODBg0UFRUlu91ueKrAQnz4mejoaOXl5XkvQD3nk08+UYcOHSyaCoHmyy+/VKdOnVSnTp2f/UV/TpcuXQxNBfzo3K0Jfk69evV07733avbs2WrQoIHByQIH8eFnJkyYoDFjxuiHH36Qx+PRhg0btHDhQqWnp2vOnDlWj4cA0a1bN++dds/9or/QGV6bzcY7XmDc+++/r0mTJmnChAm68cYb5fF4lJubqxdeeEFTp07VmTNn9OSTT+qpp57S888/b/W4folrPvzQ66+/rmeffVYFBQWSpIiICE2dOlUPPvigxZMhUOzfv19RUVGy2Wzed1z9nJ+u0gG+duONN+qZZ57RXXfdVWn78uXLNXnyZG3YsEFLlizRE088oW+++caiKf0b8eFnSktL5fF41LBhQx0+fFh79+7V2rVr1aFDhyr/oQFAIAoKCtIXX3yh9u3bV9q+c+dOXX/99SotLdW+ffvUoUMHnTp1yqIp/Rv3+fAzgwYN0vz58yVJdevW1cCBAzVz5kwNHjxYs2bNsng6BKJ58+bpX//6l/f5xIkT1bhxY/Xu3fsXV0UAX2jfvr1mzJih8vJy77bTp09rxowZ3iA5ePCgnE6nVSP6PeLDz2zevNl7L4V//vOfcjqd2r9/v+bPn6+XXnrJ4ukQiNLS0hQUFCRJ+u9//6usrCxlZGTommuu0eOPP27xdAhEL7/8spYuXaqIiAjdeeedio+PV0REhJYuXer9n7S9e/cqKSnJ4kn9F6dd/EzDhg21c+dORUVF6Z577lHHjh01depUFRQUqF27diwhwrjzX5OTJk3SoUOHNH/+fG3fvl1xcXH63//+Z/WICEAnTpzQO++8o927d8vj8ah9+/YaOnRopU9ehu/wbhc/ExMToyVLlmjIkCFavny59/8sCwsL+bwCWCI4OFhHjhxRVFSUsrOzva/JBg0aqLS01OLpEGhOnz6tdu3aaenSpXrkkUesHidgcdrFz0yZMkXjx49X69at1bNnT/Xq1UuSlJ2dfcGbjwG+Fh8fr9GjR2v06NHavXu3+vfvL0navn27Wrdube1wCDj16tVTWVnZRe/zAd/jtIsfcrvdOnTokLp27ao6dX7syw0bNigkJKTK1d2Arx0/flyTJ0/WgQMH9Oijj+q3v/2tJGnq1KmqX7++UlNTLZ4QgWbGjBnauXOn5syZo7p1OQFgBeIDgM+cOXNG06dP16hRoxQZGWn1OIAkaciQIVqxYoWCg4PVuXNnNWrUqNL+xYsXWzRZ4CA+APhUcHCwtm3bxikWXDFGjhx50f1vvvmmoUkCF/EBwKcGDx6swYMHa8SIEVaPAuAKwckuAD7Vr18/paSkaNu2berevXuVJe6BAwdaNBkAq7DyAcCnzl30fCF8sBxMueGGG7RixQo1adJE119//UXf7bJ582aDkwUmVj4A+FRFRYXVIwAaNGiQ7Ha7pB9PBcJarHwAMOaHH35QgwYNrB4DAW7kyJEaNmyY7rjjDu73YRFuMgbAp86ePatnnnlGLVu2VHBwsPbu3StJmjx5subOnWvxdAhER44c0d13362IiAiNHz9eeXl5Vo8UcIgPAD41ffp0vfXWW8rIyFD9+vW92zt37qw5c+ZYOBkC1Ycffii3262pU6dq48aN6t69uzp06KC0tDTt27fP6vECAqddAPhUTEyMZs+erTvuuEMOh0NbtmxRmzZttHPnTvXq1UvHjh2zekQEuG+//VYLFy7UG2+8oT179ujMmTNWj+T3WPkA4FMHDx5UTExMle0VFRU6ffq0BRMB/+/06dPauHGjPv/8c+3bt09Op9PqkQIC8QHApzp27Kg1a9ZU2f6Pf/yDDzuEZVatWqWHHnpITqdTw4cPl8Ph0EcffaSCggKrRwsIvNUWgE9NnTpV999/vw4ePKiKigotXrxYu3bt0vz587V06VKrx0MAioiI0JEjR3TXXXdp9uzZGjBgAO/CMoxrPgD43PLly5WWlqZNmzapoqJCN9xwg6ZMmaKEhASrR0MAeu211/T73/9eTZo0sXqUgEV8APCpkSNH6r777lPfvn25pwIASVzzAcDHjhw5ov79+3NPBQBerHwA8Lnjx4/rvffe04IFC7RmzRq1a9dO9913n4YOHarWrVtbPR4Aw4gPAEZxTwUAnHYBYAz3VAAgER8ADOCeCgDOx2kXAD51/j0Vhg0bxj0VABAfAHyLeyoA+CniAwAAGMU1HwAAwCjiAwAAGEV8AAAAo4gPAABgFPEBAACMIj4AAIBRxAcAADCK+AAAAEb9H9LPteJMRKRdAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# barplots to show categorical data\n", + "iris_df.species.value_counts().plot(kind=\"bar\")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "72063e59-daab-4efb-9f13-13913ac0195b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAbMAAAGFCAYAAAB3zh03AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA7qklEQVR4nO3dd1yVZeMG8OucwzpsAWVPBRUXDnCghjlw5Mi0LH3Tt2FvZr4NtWxomg1NTc0cWWoqZqlljsqVlTsVJxiKA5kqsjeHc35/8MovEkHhwP08z7m+nw8f6XDGxUm5uO/nfu5HZTAYDCAiIpIxtegAREREdcUyIyIi2WOZERGR7LHMiIhI9lhmREQkeywzIiKSPZYZERHJHsuMiIhkj2VGRESyxzIjIiLZY5kREZHsscyIiEj2WGZERCR7LDMiIpI9lhkREckey4yIiGSPZUZERLLHMiMiItljmRERkeyxzIiISPZYZkREJHssMyIikj2WGRERyR7LjIiIZI9lRkREsscyIyIi2WOZERGR7LHMiIhI9lhmREQkeywzIiKSPZYZERHJHsuMiIhkj2VGRESyxzIjIiLZY5kREZHsscyIiEj2WGZERCR7LDMiIpI9lhkREckey4yIiGSPZUZERLLHMiMiItljmRERkeyxzIiISPZYZkREJHtmogMQEWAwGJBbrEN2QSmyC0uRU1iKnKLyz+985BbpUFpmgF5vQJmh/E8DAJUK0KhU0KhVUKtVsNCoYa81h8M/Puy1ZhWfW1vwnz4pC/9GEzUAvd6AtJwiXM8owPWMAiT+7+N6RgESMwuRkV+CMr2hwfJYaNRoYm8J70bW8HGyho+zNbydyj/3bqSFs61lg2UhMgaVwWBouH9BRCYg4XY+ziVn41xyNv5KzcX1jAIkZxaipEwvOtp9s7U0g1cjLfxdbBDsbo/WXg5o4+kAF5bcXa5duwZ/f3+cOnUKISEhouOYLI7MiGrJYDAg4XYBziVn4/z/yut8cjZyinSio9VZXrEOf6Xl4q+0XPx8Pq3idncHK7T2LC+2Np4OaO3pgMZ2LDgSjwtAiO6TwWBATEo2Vvx+Gf/66hjaztyNiHm/4eVvTmHFH1dw+PJtRRRZdVKzi7An9gYW7LmIf685jtAP9qLzh3sxcUM0Nv55HUmZBaIj1trmzZvRpk0baLVaODs7o0+fPsjPzwcArF69Gi1btoSVlRVatGiBpUuXVjzO398fANC+fXuoVCpEREQAAPR6PWbNmgUvLy9YWloiJCQEv/zyS8XjSkpKMHHiRLi7u8PKygp+fn746KOPKr6+YMECtGnTBjY2NvD29saECROQl5fXAO+EPHFkRlSNlKxCHLyUjgPx6Tgcn47b+SWiI0nOjZxi7Dibih1nUwEA/i42CG/mjO7NGqNbM2fYW5kLTliz1NRUPPnkk5g7dy4effRR5Obm4sCBAzAYDFi5ciVmzJiBJUuWoH379jh16hSef/552NjYYOzYsfjzzz8RFhaGvXv3olWrVrCwsAAALFq0CPPnz8eKFSvQvn17rFq1CkOGDEFMTAwCAwOxePFibNu2Dd999x18fHyQmJiIxMTEikxqtRqLFy+Gn58frl69igkTJmDq1KmVipT+H4+ZEf1Nsa4MBy6m449Lt3DwUjqupOeLjiRrGrUKbTwd0CPQBRHNm6CDjyNUKpXoWHeJjo5Gx44dce3aNfj6+lb6mo+PD+bMmYMnn3yy4rbZs2fjp59+wuHDh+95zMzT0xMvvfQS3nrrrYrbwsLCEBoais8//xyTJk1CTEwM9u7de1/vyaZNm/Diiy8iPT297t+wArHMyOSV6PQ4cOkWdpxNxd7YG8gtVvZUoUgeDlYY2MYdj7TzQIi3o+g4FcrKyhAZGYk///wTkZGR6NevH0aMGAGdTocmTZpAq9VCrf7/ozI6nQ4ODg64ceNGlWWWk5MDBwcH/Pbbb3jooYcqHvfqq6/izJkz+PXXXxEdHY2+ffvC2dkZ/fv3xyOPPIJ+/fpV3Hf//v348MMPERsbi5ycHOh0OhQVFSEvLw82NjYN9t7IBacZySTpyvQ4GJ+OHWdTsTsmTfHHuqQiJbsIXx68ii8PXoW3kxYD27hjcFsPtPZ0EJpLo9Fgz549OHz4MHbv3o3PPvsMb7/9NrZv3w4AWLlyJTp37nzXY2ryzxGXwWCouK1Dhw64evUqfv75Z+zduxePP/44+vTpg82bNyMhIQEDBw7Ef/7zH7z//vtwcnLCwYMH8eyzz6K0tNRI37WysMzIZBgMBhy5fBvbz6bgl/NpyCzgDwWREjMKseL3K1jx+xX4u9hgUBt3DA3xQKCrnZA8KpUK4eHhCA8Px/Tp0+Hr64tDhw7B09MTV65cwejRo6t83J1jZGVlZRW32dvbw8PDAwcPHkTPnj0rbj98+DDCwsIq3e+JJ57AE088gREjRqB///7IyMjAiRMnoNPpMH/+/IoR4XfffVcf37ZisMxI8bIKSvDdiURsOHYd127Ld7Wdkl1Nz8eS/fFYsj8eYX5OGNPVFwNau8Fc0zALro8dO4Z9+/ahX79+aNKkCY4dO4Zbt26hZcuWeO+99zBp0iTY29tjwIABKC4uxokTJ5CZmYnXXnutYhryl19+gZeXF6ysrODg4IApU6ZgxowZaNq0KUJCQrB69WqcPn0aUVFRAIBPP/0U7u7uCAkJgVqtxqZNm+Dm5gZHR0c0bdoUOp0On332GQYPHoxDhw5h+fLlDfJeyBXLjBTrdGIW1h1JwI6zKSjWyeeEZVP357UM/HktAy62lhgV6o2nOvvAw1Fbr69pb2+PP/74AwsXLkROTg58fX0xf/58DBgwAABgbW2NTz75BFOnToWNjQ3atGmDV155BQBgZmaGxYsXY9asWZg+fTp69OiB3377DZMmTUJOTg5ef/113Lx5E8HBwdi2bRsCAwMBALa2tpgzZw4uXboEjUaD0NBQ/PTTT1Cr1QgJCcGCBQswZ84cTJs2DT179sRHH32Ep59+ul7fBznjAhBSlKLSMmw7nYL1xxJwNilbdBwyAo1ahV7Nm+Dprr7oEegiydWQJB7LjBThZk75woJvjyciu5DHwpTKz9kaY7v54ckwH1iZ17wAg0wHy4xkLTW7EMt+u4xvjydyKtGENLazxPgeARjTxRdaC5YascxIppKzCrF0fzw2nUiS1Qa+ZFzONhZ4rkcAnu7qCxtLLgEwZSwzkpXEjAJ8vj8eW6KTUFrGv7pUrpG1OZ7t7o+x3fxgJ4Pts8j4WGYkC9dvF2Dxr5ew9VQydA143S+SFwetOf4d7odnu/uz1EwMy4wkLbuwFJ/tu4S1RxI4nUj3zcXWAq/0CcKTYT7QqLn60RSwzEiSdGV6rD+agEX7LnGnDqq1IFdbvD0oGA8FNRYdheoZy4yk5/KviD++G31O9xCdhBSiV/PGmDG4FfxcuEGvUrHMSDqyrgO73gIubIcBKkxrNA8bU91FpyKFsDBT47nu/pj4cDNYW3Dlo9KwzEg8XQlwaCFwYAGgK6y4ucClHVolT4XBwGMeZDzuDlZ4Z1AwBrXlL0pKwjIjsVLPAD+8CNyMqfLLGz2m4c0rbRo4FJmCAa3dMHtYazjbWoqOQkbAMiMxykqB3+cCBxcA+ntfS6zMpgm65n2Cm8VcZk3G52RjgVlDW+GRth6io1Adscyo4aWeAbZOAG6cv6+7R3uPxfBLkfUcikzZoDbumDW0FUdpMsYyo4ZTVgr8MQ84MB/Q3/9ye4PGEqPMF+FYln09hiNT52xjgfeHtcbANjyWJkcsM2oYaefKj43dOFerh9/w6IPOV54xciiiuw1q6473h7aGk42F6Cj0ABrmMq5k2o59Aax8uNZFBgCuKXsx3uu6EUMRVW3n2VRELvwDx67cFh2FHgBHZlR/ivOA7ZOA81uM83ROzdE6bTpK9VyqT/VPo1ZhSmRzvNAzgBcElQGWGdWPm38B3/0LSL9o1Kfd7vkaXr7cyajPSVSdvsGumDeyHRy0XFErZSwzMr6zm4Dt/wVK843+1HqtE3oWzkdSEVedUcPxcbLGsjEd0MrDQXQUugceMyPj0ZUAO18Hvn+uXooMANSFGfjCe3e9PDfRvVzPKMDwpYfx7XEet5UqjszIOLISgU1jgeST9f5SBrUZntMuxL7bTvX+WkT/NLKjF94f1hpW5hrRUehvWGZUd8kngQ1PAPm3GuwlM9y6o8O1CQ32ekR/F+LtiK/GduJJ1hLCaUaqm7ifgTWPNGiRAYBT2kG87hPfoK9JdMfpxCwMX3YYV9PrZzqdHhzLjGrv+JfAxtFAaYGQl/9P8WrYaHj1aRIj4XYBHlt2GCcTMkVHIbDMqDYMBmDP9PLFHoYyYTHMs69iccBRYa9PlJFfgtFfHsUv51NFRzF5PGZGD0ZXAmx9ETi/WXQSAIDB0g6RuoW4mK8VHYVMmFoFvDMoGM909xcdxWRxZEb3rzALWPeoZIoMAFTFuVjuvkN0DDJxegMwa0cs3t8RC44PxGCZ0f3Juwms6g8kHBSd5C7+yT9imOtN0TGI8NXBq5i08TTK9Cy0hsYyo5rl3SxfsXjrgugkVVIZ9Hjfcr3oGEQAgO1nUjBp4ykWWgNjmVH17hRZepzoJNWyu3kCM/ykWbZkenaeTWWhNTCWGd2bTIrsjn/lr4KDuU50DCIALLSGxjKjqsmsyADALDcZy/0OiI5BVIGF1nBYZnQ3GRbZHV1S1yPEPk90DKIKLLSGwTKjymRcZACg0hViSZOtomMQVcJCq38sM/p/RdnA2mGyLbI7vJJ+wr88kkXHIKpk59lUTN18VnQMxWKZUbmyUuDbMcDNGNFJjOIt9dfQqLhvI0nLlugkLNhj3KuvUzmWGZX7cSJw9Q/RKYxGm34eH/vzt2CSnsX7LuG744miYygOy4yAfe8DZzeKTmF0j2WthptliegYRHd564dz+ONiw142SelYZqbu5BrgwDzRKeqFuuAWVvjsEx2D6C46vQEToqIRk5ItOopisMxM2aU95ZdxUbC2Kd8ivBF/YJD05BXr8Mya40jJKhQdRRFYZqYq5TSwaRygV/aOGaqyEixstEl0DKIq3cgpxr9XH0dOUanoKLLHMjNFuWnAhieAEtM4ubhxyq940fua6BhEVYq7kYsJ66N5DlodscxMTZkO2PRvIC9NdJIG9YpuDSzVXKpP0nQwPh0L9sj7/E7RWGamZt97wPXDolM0OMvMi1gYcEJ0DKJ7WvrbZfz61w3RMWSLZWZKLmwHDn8mOoUwkelr4KctEh2DqEoGA/Dqt2eQmFEgOoosscxMxe3LwNaXRKcQSl2UheVeu0THILqn7MJSTIiKRrGuTHQU2WGZmYLSQuC7sUAxl6g3T9qMfi4ZomMQ3dO55GzM3B4rOobssMxMwc7JwI1zolNIgspQhrm2UaJjEFVrw7Hr+OFUkugYssIyU7rodcDp9aJTSIpj2hFM9b0kOgZRtd76/jzi0nJFx5ANlpmS3b4M/DxVdApJer5wFWzMeFyCpKuwtAyTvjmFEh1PKbkfLDOl0uuBrROAUq6Mqop5TgI+9z8iOgZRteJu5GLRPl4y5n6wzJTq2DIg8ajoFJL20I2v0dKWZU/Stvz3KziblCU6huSxzJQoPb78si5ULVVJPpa5bRcdg6haZXoDJm86w+X6NWCZKY1eD/w4AdBxJ+774Zu0DY+5ctcFkraLN/KwaC8XLVWHZaY0R5cCicdEp5ANFQyYabFOdAyiGq344wrOJGaJjiFZLDMlSb8E/DpbdArZsb0VjVn+MaJjEFWL043VY5kpxZ3Vi5xerJXRuavgbMFrSpG0XbqZh4WcbqwSy0wpjn8JJP0pOoVsafJSscz3D9ExiGr0xR9XcCE1R3QMyWGZKUF+OrD/A9EpZC80NQodHLjjAklbmd6AGds4Lf5PLDMl2DcTKMoSnUL2VLoiLHH5QXQMohr9eTUDP55OFh1DUlhmcpd8EjjFvReNxSP5F4z14A8Jkr6PfvoL+cU60TEkg2UmZwYD8PMbgIF7txnTm6o10Kj4npK0peUU4fP98aJjSAbLTM7ObwGSjotOoTja2zH4JOCM6BhENfrq4FUkZXJLNoBlJl+lRcDemaJTKNawjNVwtyoRHYOoWsU6PT7++S/RMSSBZSZXRz8Hsq+LTqFY6sJ0fOG9R3QMohrtOJuKkwmZomMIxzKTo/zbwIFPRadQvNbJ3+EhZ/6QIOn7YGes6AjCsczk6PBioITnQ9U3lb4U8+2/Ex2DqEbR17Pw+8VbomMIxTKTm4KM8t0+qEG4pP6Ol32uio5BVKPF+0x7myuWmdwc+RwoyROdwqS8XLIaWg03dyVpO5mQiUPx6aJjCMMyk5PCLODPL0SnMDkWWfFY6M9TIEj6Fpnw6IxlJidHlwHF3GBUhH63vkaAdZHoGETV+vNqBo5euS06hhAsM7koygGOLROdwmSpirOx3PNn0TGIamSqx85YZnJxbAVQlC06hUkLTNqCgY1N95gEycPhy7dx4lqG6BgNjmUmB8V55SdJk1Aqgx4fWW8QHYOoRqZ47IxlJgcn1wCFPHlXChxuHMVbfnGiYxBV68CldJxPNq2ZHJaZ1BkMwImvRKegv3mmYBXszHjpDZK2dUcSREdoUCwzqbu8D8i4IjoF/Y1ZTiKW+h8WHYOoWtvOpCC7sFR0jAbDMpO64xyVSVH3tHVoZZcvOgbRPRWWlmHLySTRMRoMy0zKshKBi7tEp6AqqErzscx1m+gYRNVaf8x0phpZZlJ2cjVg4DZKUuWdtANPuKeJjkF0T1du5ZvMFlcsM6nSlQDR60SnoGqoYMB0s7VQqQyioxDdk6ksBGGZSdWFbUD+TdEpqAY2t07jA7/zomMQ3dPeCzeQlq38rdhYZlLFy7zIxhM5q9HYwnRWjZG86PQGbPhT+VelZ5lJ0e3LwPUjolPQfdLkp2G572+iYxDd0+YTiTAYlD0dzjKTovPfi05AD6hDygaEOfKKBiRNKdlFiL6u7F2EWGZSFMMykxtVWTEWO28RHYPonrafSRUdoV6xzKTm5l/AzVjRKagW3JL34FnPRNExiKr007lU6PXKnWpkmUkNR2WyNsWwBuZq5f7AIPm6mVuMPxV8aRiWmdTweJmsWWVcwDz/aNExiKq042yK6Aj1hmUmJWnngNumdx0ipRmcsQZeVsWiYxDd5ZfzaShT6FQjy0xKOCpTBHXhbazw3i06BtFd0vNKcOTybdEx6gXLTEpifhCdgIwkOHkTHnZW9lJokielTjWyzKQi7TyQeVV0CjISlV6HefYbRccgusvu2BuKPIGaZSYVl38VnYCMzCn1AF714YVVSVoy8ksQk6K8E/xZZlJxZb/oBFQPJhSvglbDy/iQtBy4pLzLwrDMpEBXDCRwL0YlMs++gs/8j4mOQVTJwfhboiMYHctMCq4fBXSFolNQPel962sE2vD/L0nHiWuZKCpV1owBy0wKrvwmOgHVI1VxLpZ5/CQ6BlGFYp0exxW2GwjLTAp4vEzxmib9gMFNlDe1Q/J1UGHHzVhmohVkAKlnRKegeqYy6PGB1XrRMYgqHIxnmZExXf0DMOhFp6AGYH/zON71vyA6BhEAIDY1B7fzlLPtGstMtKu/i05ADWhs3io4mOtExyCCwQAcuaKcra1YZqIlnxSdgBqQWW4ylvkdFB2DCABwJjFLdASjYZmJpCsBbnLaydR0TVuPtvZ5omMQ4VxytugIRsMyE+lmDFBWIjoFNTBVaQGWNvlRdAwixCTnKGafRpaZSCmnRScgQbySdmK0uzJ3Lyf5yC3W4drtAtExjIJlJlLqadEJSKC3NWuhUinjt2KSL6VMNbLMROLIzKRZp5/Fx/7nRMcgE3eeZUZ1oisBbsaKTkGCjchehSaWpaJjkAk7l8Qyo7rg4g8CoMm/iRU+vJYdiXM+JVsRi0BYZqJwipH+JyTlG4Q3UsZvxyQ/uUU6JChgEQjLTJRbf4lOQBKhKivBp402i45BJizuRq7oCHXGMhMl85roBCQhTVL24QWv66JjkIlKzODIrF59/fXX2LlzZ8V/T506FY6OjujWrRsSEhIEJjMClhn9w2tlq2Gulv+xC5Ifllk9+/DDD6HVagEAR44cwZIlSzB37ly4uLjg1VdfFZyujjJlXsZkdJaZcfg04IToGGSCriugzMxEB6hOYmIimjVrBgDYunUrRowYgfHjxyM8PBwRERFiw9VFbhqgKxSdgiRoYPoa+Gjb4HqhlegoZEKUUGaSHpnZ2tri9u3ySxTs3r0bffr0AQBYWVmhsFDGZcApRroHdVEmvvDaJToGmZikzELZL8+XdJn17dsXzz33HJ577jlcvHgRgwYNAgDExMTAz89PbLi64BQjVaN50mb0dckQHYNMSLFOj5u58r5Qp6TL7PPPP0fXrl1x69YtbNmyBc7OzgCAkydP4sknnxScrg44MqNqqAxlmGv7jegYZGLkPtWoMsh9bClHP7wInNkgOgVJ3JImszDvejPRMchEzB/ZDo919BIdo9YkPTIDgAMHDmDMmDHo1q0bkpOTAQDr1q3DwYMyvlpvFs8nopq9ULwKNhq96BhkIhIz5T0yk3SZbdmyBZGRkdBqtYiOjkZxcfmcbm5uLj788EPB6eqgIF10ApIB8+xrWBJwWHQMMhGZ+fLeK1bSZTZ79mwsX74cK1euhLm5ecXt3bp1Q3R0tMBkdVSYJToByUTEjbVoYSvv35hJHrIL5X31BkmXWVxcHHr27HnX7fb29sjKymr4QMZSlCU6AcmEqiQPy9x2iI5BJoBlVo/c3d0RHx9/1+0HDx5EQECAgERGUFoE6IpEpyAZ8Uv6EcNdb4qOQQrHMqtHL7zwAv773//i2LFjUKlUSElJQVRUFCZPnowJEyaIjlc7HJXRA1LBgFmW60THIIWTe5lJejurqVOnIjs7G7169UJRURF69uwJS0tLTJ48GRMnThQdr3aKeN0qenC2N09ipn8sZlwNFh2FFCqnSCc6Qp3I4jyzgoICxMbGQq/XIzg4GLa2tqIj1d71Y8CqfqJTkAyV2XqgU/bHyCyV9O+gJFMWZmpcnD1AdIxak/Q04x3W1tbo1KkTwsLC5F1kAKcZqdY0eSlY5veH6BikUCU6PYpKy0THqDXJ/Yo3fPhwrFmzBvb29oiMjMTu3bvRq1cvODo63nXf77///oGe+9q1a/D398epU6cQEhJinMAPisvyqQ46p0ahg0MYorNl/ksdSVJ2YSmszDWiY9SK5MrMwcEBKpUKAODm5obHH38cVlZWUKtlMYisWYn8L09O4qh0hfjM9XuEZz8tOgopUG6RDq72olPUjtAyKy0trXQyNACsXr264vOvv/66oSPVqKSkBBYWFrV/Ar18h/EkDZ7Jv+Bpjz5Ym+IhOgopjF76Syju6b6HOytWrICnpyf0+sp7xQ0ZMgRjx44FAGzfvh0dO3aElZUVAgICMHPmTOh0/79CRqVSYfny5Rg6dChsbGwwe/ZsZGZmYvTo0WjcuDG0Wi0CAwMrCu3AgQNQqVQ4ffp0xXPExMQgIiICdnZ2sLOzQ48ePXD58mUAgF6vx6xZs+Dl5QVLS0uEhITgl19+qfb7+v333xEWFgZLS0u4u7vjzTffrJQ5IiICEydOxGuvvQYXFxf07dv3ft+yqrHMyAimqdZAo+K+jWRcZXoTKLORI0ciPT0d+/fvr7gtMzMTu3btwujRo7Fr1y6MGTMGkyZNQmxsLFasWIE1a9bggw8+qPQ8M2bMwNChQ3Hu3Dk888wzePfddxEbG4uff/4ZFy5cwLJly+Di4gIAmDx5cqXHJicno2fPnsjPz0dQUBBOnjyJZ555pqJ8Fi1ahPnz52PevHk4e/YsIiMjMWTIEFy6dKnK7yk5ORkDBw5EaGgozpw5g2XLluGrr77C7NmzK93v66+/hpmZGQ4dOoQVK1bc71tWNQPLjOpOe/s85vifFR2DFEbOZfZAS/OHDh0KFxcXfPXVVwCAL774AjNmzEBSUhJ69eqFAQMGYNq0aRX3X79+PaZOnYqUlJTyF1Op8Morr+DTTz+tuM+QIUPg4uKCVatW3fV6tra2yM/Pr1iw8dZbb2Hjxo3YuXMnunbteteWVp6ennjppZfw1ltvVdwWFhaG0NBQfP7553ctAHn77bexZcsWXLhwoeI43dKlS/HGG28gOzsbarUaERERyM7OxqlTp+73bareoUXAnunGeS4yaXprF3TLn4e04jpMexP9zbaJ4Wjr5Sg6Rq080DGz0aNHY/z48Vi6dCksLS0RFRWFUaNGQaPR4OTJkzh+/HilkVhZWRmKiopQUFAAa2trAECnTp0qPeeLL76Ixx57DNHR0ejXrx+GDRuGbt26AUBFwdxx+vRp9OjRAwUFBSgrqzzCycnJQUpKCsLDwyvdHh4ejjNnzlT5/Vy4cAFdu3at9Drh4eHIy8tDUlISfHx8qsxcJzKekyZpURXcRkjHrUjW5YmOQgqhsQwE4Cg6Rq08UJkNHjwYer0eO3fuRGhoKA4cOIAFCxYAKD9eNXPmTAwfPvyux1lZWVV8bmNjU+lrAwYMQEJCAnbu3Im9e/eid+/eeOmllzBv3jyEhoZi//79FcWl1WphMBjw0UcfoXv37lVm/GcBGgyGu26r7mt3Bqp/v/2fmetEpZBVmSTc1uCHcShTxlePIMlRqeW7C8gDlZlWq8Xw4cMRFRWF+Ph4BAUFoWPHjgCADh06IC4uDs2aPfiVcRs3boxx48Zh3Lhx6NGjB6ZMmYJ58+Zh2rRp2L9/P4YNG4Y+ffrgypUrOH/+PBwdHSsduwPKd9L38PDAwYMHK+20f/jwYYSFhVX5usHBwdiyZUulUjt8+DDs7Ozg6en5wN/HfVHL8xwOkpZ8Szss1t8WHYMURqOS78+nB16aP3r0aAwePBgxMTEYM2ZMxe3Tp0/HI488Am9vb4wcORJqtRpnz57FuXPn7lpQ8XfTp09Hx44d0apVKxQXF2PHjh1o2bIlACAwMBAA0K9fP6SlpcHLywsXL15Ely5dUFRUhEuXLuHo0aMICwtD8+bNMWXKFMyYMQNNmzZFSEgIVq9ejdOnTyMqKqrK154wYQIWLlyIl19+GRMnTkRcXBxmzJiB1157rf7Oa5PxXxaSji9a9kR69jnRMUhh7jWLJQcPXGYPP/wwnJycEBcXh6eeeqri9sjISOzYsQOzZs3C3LlzYW5ujhYtWuC5556r9vksLCwwbdo0XLt2DVqtFj169MDGjRsr3efll1+u2LHj7NmzmDJlCh566CFoNBqEhIRUHCebNGkScnJy8Prrr+PmzZsIDg7Gtm3bKkrxnzw9PfHTTz9hypQpaNeuHZycnPDss8/inXfeedC35f5xmpHqKNHZD+tz40THIAWS88hM8hsNHzhwACtWrMCVK1ewadMmeHp6Yt26dfD397/ncTNJi14LbHtZdAqSsUnt+2N/VqzoGKRAPw//GV52XqJj1IqkhwlbtmxBZGQktFotoqOjUVxcDADIzc3Fhx9+KDhdLVk5ik5AMnbEP4xFRvXG3lKme1lB4mU2e/ZsLF++HCtXrqy07VW3bt0QHS3TVVxaR9EJSKbKVBrMtTWv+Y5EtaBWqWFnbic6Rq1Juszi4uIqrUy8w97e/q4TpmWDIzOqpe9a9UZ8XqLoGKRQtua2sl4AIukyc3d3R3x8/F23Hzx4EAEBAQISGQFHZlQL2daN8HlpiugYpGD2FvKdYgQkXmYvvPAC/vvf/+LYsWNQqVRISUlBVFQUJk+ejAkTJoiOVzscmVEtfN68G7JLckTHIAWT8/EyQILXM/u7qVOnIjs7G7169UJRURF69uwJS0tLTJ48GRMnThQdr3Ys7crPNeOGw3Sf4l2bY1P2BdExSOHsLOR7vAyQwdJ8ACgoKEBsbCz0ej2Cg4Nhayvzq+zO8QMKM0WnIJkY374vjmTxvDKqX319+2JBxALRMWpN0iOzO6ytreHq6gqVSiX/IgPKpxpZZnQf9jcLZ5FRg+Axs3qk0+nw7rvvwsHBAX5+fvD19YWDgwPeeecdlJaWio5Xe9pGohOQDJRqLDDPUr4bv5K88JhZPZo4cSJ++OEHzJ07F127dgUAHDlyBO+99x7S09OxfPlywQlryd4DSJHpeXLUYNa16o3ruTGiY5CJcLN2Ex2hTiRdZt988w02btyIAQMGVNzWtm1b+Pj4YNSoUfIts0Z+ohOQxKXbNsEXhVdFxyATItdtrO6Q9DSjlZUV/Pz87rrdz88PFhYyvrouy4xqsCgoFPm6AtExyISwzOrRSy+9hPfff79iT0YAKC4uxgcffCDfpfkA0MhfdAKSsBiP1vgx87zoGGRCVFDBy1beZSbpacZTp05h37598PLyQrt27QAAZ86cQUlJCXr37l3pqtbff/+9qJgPjiMzqsacJk1gyOYJ0tRwGls3hoVGxrNdkHiZOTo64rHHHqt0m7e3t6A0RuToXX5dM4NedBKSmJ9aROBU9t1buBHVJ7mPygCJl9nSpUuh1+thY2MDALh27Rq2bt2Kli1bIjIyUnC6OjCzBOzcgZxk0UlIQgotrPGpOld0DDJBcj9eBkj8mNnQoUOxbt06AEBWVha6dOmC+fPnY9iwYVi2bJngdHXEqUb6h1XBvZBWeEt0DDJBShiZSbrMoqOj0aNHDwDA5s2b4erqioSEBKxduxaLFy8WnK6OWGb0N6mNvLEm76LoGGSiODKrZwUFBbCzK9/8cvfu3Rg+fDjUajW6dOmChIQEwenqyLmp6AQkIQv8W6OorLjmOxLVA197X9ER6kzSZdasWTNs3boViYmJ2LVrF/r16wcAuHnzJuzt5b31CtzbiU5AEhHt0wG/ZHKnDxJDo9IgqFGQ6Bh1Jukymz59OiZPngw/Pz907ty5Ykur3bt3o3379oLT1ZG7zPOTUehVanzsqIDNs0m2AhwDYGVmJTpGnUl6NeOIESPQvXt3pKamVpxnBgC9e/fGo48+KjCZEdg4Aw7eQHai6CQk0A8tH8aFXB4rI3GCnYJFRzAKSZcZALi5ucHNrfIGmGFhYYLSGJl7O5aZCcuzssdiPVcvkljBzsooM0lPMyqeR4joBCTQipY9kFHM69qRWCwzqjseNzNZCS4BiMr5S3QMMnEalQbNnZqLjmEULDORODIzWZ94B6JUL+MLzJIi+Dv4Q2umFR3DKFhmItm4APbyP1mRHsyhgC74PeuC6BhEipliBFhm4nF0ZlJ0ajPMteE/O5IGlhkZj1930QmoAX0b3BtX8pJExyACAHRy7SQ6gtGwzEQLiBCdgBpIlrUTlpayyEganK2cFbHzxx0sM9GatCy/HAwp3pLmXZFTwku8kDR0du8MlUolOobRsMykwP8h0Qmonl10bYHN2bGiYxBV6OrRVXQEo2KZSUHTXqITUD2b6+6FMkOZ6BhEFbq4dxEdwahYZlLA42aKti+wB45lc/9Fkg5/B3+42bjVfEcZYZlJgZ0b0Lil6BRUD0o0lphnweuUkbR0dVfWFCPAMpMOjs4UaW3rh5FUkCY6BlElSptiBFhm0sHjZopzy94NKwuuio5BVImZygyhbqGiYxgdy0wq/HoA5jaiU5ARLWzWEQW6AtExiCrp6NoRthbKuyAsy0wqLKyBoEjRKchIznu2wfbM86JjEN0l0l+ZP2dYZlLSSuZXzyYAgAEqfOTiAgMMoqMQVWKmMkMfnz6iY9QLlpmUBPYDLOxEp6A62tEiAmdzLouOQXSXMPcwNLJqJDpGvWCZSYm5FdB8gOgUVAcFFjZYqMoWHYOoSv39+ouOUG9YZlLTerjoBFQHXwVH4GZRuugYRHcxU5vhYZ+HRceoNywzqWnaG7ByEJ2CaiGlkQ++zuNOHyRN3Ty6wcFSuT9bWGZSY2YBtHhEdAqqhXn+rVBcxt0+SJqUPMUIsMykiasaZee4byfsyYwRHYOoShZqC/TyVvbGDCwzKQroBdi6ik5B90mvUmOOg1Z0DKJ7ivCOUOSJ0n/HMpMijRnQ4WnRKeg+bQnujbjcBNExiO7pieZPiI5Q71hmUtXx34BKIzoF1SDXygFLym6IjkF0T/4O/ghzDxMdo96xzKTKwZPnnMnAspbdkVGcJToG0T2ZwqgMYJlJW+izohNQNa42bopvci6IjkF0T1ozLYY0HSI6RoNgmUlZQC/AuZnoFHQPc72aQqfXiY5BdE8D/QfCzkS2yGOZSZlKBXR6RnQKqsKBpl1xMOsv0TGIqvVkiydFR2gwLDOpCxkNmFuLTkF/U6o2xydciU8S165xOzR3ai46RoNhmUmd1pH7NUrMN61642p+sugYRNUylYUfd7DM5KDLBAAq0SkIQIaNC5YXXxcdg6haTaybINJPmRfhvBeWmRy4tgJaDBKdggB81rwzckvzRMcgqtYzrZ+BhcZCdIwGxTKTi4emik5g8uLcgvF9FvdfJGlz0bpgRNAI0TEaHMtMLtzbAUHK3vVa6ua4uUNv0IuOQVStca3GwVJjKTpGg2OZyclDb4hOYLJ2B/XE8exLomMQVcvJygmPN39cdAwhWGZy4tkBaNZXdAqTU2xmhQXmhaJjENVoXKtx0JqZ5nkjLDO54eiswa1p9TCSC7iZMElbI8tGJrcc/+9YZnLjHVq+zRU1iJsO7viq4LLoGEQ1errV07A24Q0WWGZyFPGm6AQm49Om7VGo4xQjSZuDpYNJbV1VFZaZHPl04crGBnDGux12ZnIpPknf+DbjYWNuIzqGUCwzueo3G1CbiU6hWAaoMMepEQwwiI5CVC1fe1882dK0R2UAy0y+XAK5o3492tbyYZzLuSI6BlGNXu3wKszV5qJjCMcyk7OIaYCVg+gUilNgaYtFuC06BlGNOrl2Qm/f3qJjSALLTM6sncoLjYxqZcuHcKsoQ3QMomqpVWq8EcZTde5gmcld6PNA45aiUyhGkpMP1ubGiY5BVKORQSPRwqmF6BiSwTKTO40ZMHCu6BSKMc83GCX6EtExiKrlaOmIl9u/LDqGpLDMlMC/J9DqUdEpZO+Yfyj2ZcWKjkFUo5fbvwwHSx4v/zuWmVJEfgjwL3etlak0mGNrejuNk/y0cWljkpd4qQnLTCnsPYDID0SnkK3NrXrjUh6vIE3SZqmxxOzw2VCr+KP7n/iOKEmHf3FX/VrI1jri89JU0TGIavRSyEsIcAwQHUOSWGZKM2Qxpxsf0LIW4cgsyRYdg6habRu3xdhWY0XHkCyWmdLYewD9PxSdQjauNAnEt9kXRMcgqhanF2vGd0aJ2o8BAvuJTiELczz9oDPoRMcgqtbEkInwd/AXHUPSWGZKNXgxt7qqwe/NwnE4iydIk7S1a9wOT7d6WnQMyWOZKZW9O9D/Y9EpJKtUbY5PrMpExyCqlqXGEu+Hv8/pxfvAd0jJQp4CWjwiOoUkRbXujYT8FNExiKr1asdXOb14n1hmSjdsKdCI/xj+7rZtY6woShAdg6ha/Xz7YXTL0aJjyAbLTOmsHIDH1wJmVqKTSMbioDDkleaLjkF0T372fpgVPkt0DFlhmZkC97bAwE9Ep5CEC+7B2JoVIzoG0T1pzbRYELEANuY2oqPICsvMVHR4GggZIzqFcB+7ukFv0IuOQXRP73Z5F4GNAkXHkB2WmSkZNA9wbSM6hTC/NH8I0dnxomMQ3dPIoJEY3HSw6BiyxDIzJeZa4PGvTXK7qyJzLRZo8kTHILqnYOdgvBn2pugYssUyMzXOTYFhn4tO0eBWt+qF1MJbomMQVcnewh4LIhbAQmMhOopsscxMUcvBQMQ00SkaTJqjJ1bncXqRpMlMbYYFEQvgaespOoqsscxMVcSb5Xs4moAFAe1QWFYkOgZRlWZ1m4XO7p1Fx5A9lpkpe2QR0PRh0Snq1Snv9vg587zoGERVmhgykQs+jIRlZso0ZuUnVLspc4WjASp83MhOdAyiKj0W+BheaPeC6BiKwTIzdZZ2wFObAHsv0UmMbmtwb8TmXhMdg+gu3T27450u74iOoSgsMyrfYX/0JkUt2c+3tMMiPVcvkvS0dGqJ+Q/Nh5naTHQURWGZUTnXYGDUekAhS4NXBPfE7eJM0TGIKvGw8cDSPkthbW4tOorisMzo//n3BB5dAag0opPUyXUXf6zP+Ut0DKJKnK2csazvMrhoXURHUSSWGVXWejjw2EpZF9on3kEo1ZeKjkFUwdnKGV9FfoUAhwDRURSLZUZ3a/2YbAvtsH9n/JZ1QXQMogp3iqypY1PRURSNZUZVk2Ghlak0+MSWB9VJOlhkDYdlRvcms0L7tlVvxOclio5BBIBF1tBYZlQ9mRRatnUjLC1NER2DCACLTASWGdVMBoW2pHk3ZJfkiI5BxCITRGUwGAyiQ5BM/LUT2PwsoCsUnaSSS67NMdKmBGWGMtFRyMR52npiWZ9l8HfwFx3F5HBkRvevxSBg3A7AWlrnycz18GGRkXCtnVsjamAUi0wQlhk9GK9OwHN7ACdpTKH8GtgdR7PiRMcgExfhFYFV/VfBWessOorJYpnRg3MKAJ7dA3iFCY1RqrHAPIsSoRmInmj+BBb2WgitmVZ0FJPGMqPasXEGxm4DWjwiLMLaVr2RWJAm7PXJtKmgwisdXsE7Xd6BRi3dxVGmggtAqG70emDXNODY8gZ92XQ7Vzzi1gj5uoIGfV0iADBXm2N2+GwMDBgoOgr9D0dmVDdqNTBgDtB/DtCAl7RYFNiJRUZCOFk5YUXfFSwyieHIjIzn+lFg0zggN7VeXybGsw2etMiBAfyrSw2rfZP2+KTnJ3C1cRUdhf6BIzMyHp8uwAsHyi8lU48+dnFhkVGD+1fwv7AqchWLTKI4MiPj0+uB/R8AB+YDRi6dnS0i8GbxFaM+J1F1bM1tMSt8Fvr69hUdharBMqP6c3E38MN4oNA4V3wutLDG4GYtcKMw3SjPR1SToEZBWBCxAL72vqKjUA04zUj1J6gf8MIfgEd7ozzdquBeLDJqMEOaDkHUwCgWmUxwZEb1T1cM7J0JHFsGGPS1eorURt4Y4myForJiI4cjqszW3BZTQ6fi0cBHRUehB8Ayo4aTcAT4cQKQ8eDHvCZ3GIBdmTH1EIro/3Xz6IaZ3WbCzcZNdBR6QCwzalglBcC+mcCxFbjfxSEnfTpinOZW/eYik2ZjboPJnSZjRNAI0VGollhmJMa1Q8CPLwGZV6u9m16lxqg24biQm9BAwcjUdHHvglndZsHd1l10FKoDlhmJU1IA7J0B/LkS9xqlbWnVB+8VXGzYXGQSbMxt8FrH1/B488dFRyEjYJmReFcPANtevmuUlmdlj0F+/sgoNs7SfqI7urh3wcxuM+Fh6yE6ChkJy4ykQVcMHFoMHFwAlJbvuTiv/SB8nXVOcDBSEjcbN0zuNBmRfpGio5CRscxIWrISgd1v49qt83jU3gCdXic6ESmAhdoCY1uNxfNtn+d1xxSKZUaSFJvyJ6afmIu4TF5Fmuqmj08fvNbxNXjbe4uOQvWIZUaSpTfosTV+K5acWoJbhVyaTw+mlXMrTAmdgo6uHUVHoQbAMiPJKygtwOqY1VgXuw75pfmi45DEedp6YkLIBAwOGAyVSiU6DjUQlhnJRnZxNtbFrsOGCxuQW5orOg5JjLedN55v8zwGNx0Mswa8UCxJA8uMZCe3JBfrL6zH+tj1yCnJER2HBPOz98PzbZ/HIP9B0Kg1ouOQICwzkq28kjxs+GsD1sWuQ1Zxlug41MACHAIwvu14DPAfALWKFwAxdSwzkr2C0gJ889c3WBu7FhlFGaLjUD1r5tgML7R7Af18+7HEqALLjBSjpKwEu67twsa4jTh766zoOGREZioz9PLphVHNRyHMPazBX/+9997D1q1bcfr06To9z2+//YZevXohMzMTjo6O9/WYcePGISsrC1u3bq3Taysdy4wU6cLtC/g27lv8dPUnFOoKRcehWmqibYIRQSPwWNBjaGLdRFiOvLw8FBcXw9nZuU7PU1JSgoyMDLi6ut73Ssvs7GwYDIb7Lj9TxTIjRcstycWP8T/i27hvcS3nmug4dJ86u3XGEy2eQC/vXrJYmVhSUgILCwvRMUwaJ5xJ0ews7DAmeAy2P7odK/utRH+//tzOSKJctC4Y03IMfhz2I76M/BJ9ffs2WJGtWLECnp6e0OsrXwl9yJAhGDt2LN577z2EhIRU3D5u3DgMGzYMH330ETw8PBAUFAQAOHz4MEJCQmBlZYVOnTph69atUKlUFdOTv/32G1QqFbKysgAAa9asgaOjI3bt2oWWLVvC1tYW/fv3R2pq6l2vdYder8ecOXPQrFkzWFpawsfHBx988EHF19944w0EBQXB2toaAQEBePfdd1FaWmrcN0yCpP8rD5GRdHHvgi7uXVBQWoA/kv7Armu7cCD5AIrLikVHM1lOVk7o49MH/f37o6NrR2ELOkaOHIlJkyZh//796N27NwAgMzMTu3btwvbt23H48OG7HrNv3z7Y29tjz549MBgMyM3NxeDBgzFw4EBs2LABCQkJeOWVV2p87YKCAsybNw/r1q2DWq3GmDFjMHnyZERFRVV5/2nTpmHlypX49NNP0b17d6SmpuKvv/6q+LqdnR3WrFkDDw8PnDt3Ds8//zzs7OwwderU2r05MsEyI5NjbW6N/v790d+/P/JL87E/cT92Xd2FQymHUKpX/m+wojlYOqC3T29E+kWis1tnSZwb5uTkhP79+2PDhg0VZbZp0yY4OTmhd+/eVZaZjY0Nvvzyy4rpxeXLl0OlUmHlypWwsrJCcHAwkpOT8fzzz1f72qWlpVi+fDmaNm0KAJg4cSJmzZpV5X1zc3OxaNEiLFmyBGPHjgUANG3aFN27d6+4zzvvvFPxuZ+fH15//XV8++23LDMiJbMxt8EjAY/gkYBHkFuSi1+v/4rdCbtxPO04F44YkbOVM8I9w9Hfrz+6eHSBudpcdKS7jB49GuPHj8fSpUthaWmJqKgojBo1ChpN1WXbpk2bSsfJ4uLi0LZtW1hZWVXcFhZW88pLa2vriiIDAHd3d9y8ebPK+164cAHFxcUVhVuVzZs3Y+HChYiPj0deXh50Oh3s7e1rzCF3LDOi/7GzsMPQZkMxtNlQlJaV4vSt0ziScgTHUo8h5nYMygxloiPKhtZMiw6uHdDVvSu6uHdBUKMgye+TOHjwYOj1euzcuROhoaE4cOAAFixYcM/729jYVPpvg8Fw1/d4P+vrzM0rF7tKpbrn47Ta6o/3Hj16FKNGjcLMmTMRGRkJBwcHbNy4EfPnz68xh9yxzIiqYK4xR6hbKELdQgEAOSU5OJ56HEdSj+Bo6lEk5CQITigtGpUGwc7B6OLeBV09uiKkcQjMNdIbfVVHq9Vi+PDhiIqKQnx8PIKCgtCx4/3vuN+iRQtERUWhuLgYlpaWAIATJ04YNWNgYCC0Wi327duH55577q6vHzp0CL6+vnj77bcrbktIMI2/qywzovtgb2GP3r690du3fHonJS8FZ2+dRezt2PKPjFjklpjO5seu1q4Idg6u+AhpEgJ7C/lPZY0ePRqDBw9GTEwMxowZ80CPfeqpp/D2229j/PjxePPNN3H9+nXMmzcPAIw2KrWyssIbb7yBqVOnwsLCAuHh4bh16xZiYmLw7LPPolmzZrh+/To2btyI0NBQ7Ny5Ez/88INRXlvqWGZEteBh6wEPWw/09+9fcVtiTiJiMmIqCu7C7QuK2AjZzcYNwU7BlcrLWVu3k4el6uGHH4aTkxPi4uLw1FNPPdBj7e3tsX37drz44osICQlBmzZtMH36dDz11FOVjqPV1bvvvgszMzNMnz4dKSkpcHd3x3/+8x8AwNChQ/Hqq69i4sSJKC4uxqBBg/Duu+/ivffeM9rrSxVPmiaqR2n5aUjMTURSbhKS8pIq/SmVfSQ1Kg1crV3hZecFLzsveNp6wsu2/HMfOx84WjmKjihbUVFR+Pe//43s7Owaj3dR3bDMiAQpKC1Acl4ykvOScbvwNnJKcso/isv/zC3JrXRbbmkudHpdtc+pggrmanPYW9rD3uJ/H//73M7CrtJtTaybwMvWC+627pJcXShHa9euRUBAADw9PXHmzBlMnDgRERERWL9+vehoisdpRiJBrM2tEdgoEIGNAh/ocQaDAWWGMugNeugNemhUGqhVaqhVasmvGFS6tLQ0TJ8+HWlpaXB3d8fIkSMr7c5B9YcjMyIikj3uzUhERLLHMiMiItljmRERkeyxzIiISPZYZkREJHssMyIikj2WGRERyR7LjIiIZI9lRkREsscyIyIi2WOZERGR7LHMiIhI9lhmREQkeywzIiKSPZYZERHJHsuMiIhkj2VGRESyxzIjIiLZY5kREZHsscyIiEj2WGZERCR7LDMiIpI9lhkREckey4yIiGSPZUZERLLHMiMiItljmRERkeyxzIiISPZYZkREJHssMyIikj2WGRERyR7LjIiIZI9lRkREsscyIyIi2WOZERGR7LHMiIhI9lhmREQkeywzIiKSPZYZERHJHsuMiIhkj2VGRESyxzIjIiLZY5kREZHsscyIiEj2WGZERCR7/wc8xE3OaUAUjwAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# or a pie chart\n", + "iris_df.species.value_counts().plot(kind=\"pie\")" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "c1d226c8-bc09-4f3e-82fd-7ba398a6298e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# scatterplots to display the relation between continuous data\n", + "iris_df.plot(x=\"sepal_length\", y=\"petal_length\", kind=\"scatter\")" + ] + }, + { + "cell_type": "markdown", + "id": "250e0d24-02d9-4525-b694-51516375c083", + "metadata": {}, + "source": [ + "The relationship between a categorical and a continuous variable is often visualized using a boxplot." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "e66af9f1-353c-4b2d-8b5e-7f598336f08e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "iris_df.plot(x=\"species\", y=\"petal_length\", kind=\"box\")" + ] + }, + { + "cell_type": "markdown", + "id": "a1eaa8bf-4132-4e0a-899f-228e0b0f1735", + "metadata": {}, + "source": [ + "But this does not work as expected... We have to use `boxplot`:" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "b611f816-8c48-4a33-8c70-aa923d6a04d5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "iris_df.boxplot(\"petal_length\", by=\"species\")" + ] + }, + { + "cell_type": "markdown", + "id": "25ea113a-3706-4487-871a-e8923093042a", + "metadata": {}, + "source": [ + "**Try it yourself with Practice Exercise 4!**" + ] + }, + { + "cell_type": "markdown", + "id": "450bf331", + "metadata": {}, + "source": [ + "## Basic Indexing and Selection" + ] + }, + { + "cell_type": "markdown", + "id": "c02d8056-c7cd-401b-9644-2bcdf74ce1dd", + "metadata": {}, + "source": [ + "### Quick access to columns by name\n", + "\n", + "- **Bracket notation**: As in dictionaries, use `[]` (single or double; see below), and inside the name of the column(s), which must be a string." + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "28365a2f-af89-40f4-8db9-d57456586599", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0 5.1\n", + " 1 4.9\n", + " 2 4.7\n", + " 3 4.6\n", + " 4 5.0\n", + " ... \n", + " 145 6.7\n", + " 146 6.3\n", + " 147 6.5\n", + " 148 6.2\n", + " 149 5.9\n", + " Name: sepal_length, Length: 150, dtype: float64,\n", + " pandas.core.series.Series)" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Single brackets give a series\n", + "iris_df['sepal_length'], type(iris_df['sepal_length'])" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "1497be19-6561-4adf-bb35-ae71bf3f419f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "( sepal_length\n", + " 0 5.1\n", + " 1 4.9\n", + " 2 4.7\n", + " 3 4.6\n", + " 4 5.0\n", + " .. ...\n", + " 145 6.7\n", + " 146 6.3\n", + " 147 6.5\n", + " 148 6.2\n", + " 149 5.9\n", + " \n", + " [150 rows x 1 columns],\n", + " pandas.core.frame.DataFrame)" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# double bracket gives you the selected column as new dataframe\n", + "iris_df[['sepal_length']], type(iris_df[['sepal_length']])" + ] + }, + { + "cell_type": "markdown", + "id": "a003b66b-5881-4423-9f2a-3f64943c10d7", + "metadata": {}, + "source": [ + "This notation allows you to select multiple columns. To do this, use double brackets `[[.., ..]]`, ensuring that the returned object is a dataframe:" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "b250fd93-8c83-4389-bff1-bf87e89ea368", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthpetal_length
05.11.4
14.91.4
24.71.3
34.61.5
45.01.4
.........
1456.75.2
1466.35.0
1476.55.2
1486.25.4
1495.95.1
\n", + "

150 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length petal_length\n", + "0 5.1 1.4\n", + "1 4.9 1.4\n", + "2 4.7 1.3\n", + "3 4.6 1.5\n", + "4 5.0 1.4\n", + ".. ... ...\n", + "145 6.7 5.2\n", + "146 6.3 5.0\n", + "147 6.5 5.2\n", + "148 6.2 5.4\n", + "149 5.9 5.1\n", + "\n", + "[150 rows x 2 columns]" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df[['sepal_length', 'petal_length']]" + ] + }, + { + "cell_type": "markdown", + "id": "62e149f9-6641-4a0b-a98f-fc59529b0d07", + "metadata": {}, + "source": [ + "- **Dot notation**: Here columns are object attributes." + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "0ab28157-28dd-4a10-8714-c83cab905d8c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0 5.1\n", + " 1 4.9\n", + " 2 4.7\n", + " 3 4.6\n", + " 4 5.0\n", + " ... \n", + " 145 6.7\n", + " 146 6.3\n", + " 147 6.5\n", + " 148 6.2\n", + " 149 5.9\n", + " Name: sepal_length, Length: 150, dtype: float64,\n", + " pandas.core.series.Series)" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.sepal_length, type(iris_df.sepal_length)" + ] + }, + { + "cell_type": "markdown", + "id": "85754a99-11c2-467f-a35a-9c4183a86e33", + "metadata": {}, + "source": [ + "Dot notation is very convenient, since as object attributes they can be tab-completed in various editing environments.\n", + "\n", + "But: \n", + "- It only works if the column names are not reserved keywords.\n", + "- It can not be used when creating a new column (see below).\n", + "- It allows you to select just one column." + ] + }, + { + "cell_type": "markdown", + "id": "921f8ed0-44b2-450c-a379-4e22372b1279", + "metadata": {}, + "source": [ + "### Selecting Data by Position: `iloc[]`" + ] + }, + { + "cell_type": "markdown", + "id": "cc454247", + "metadata": {}, + "source": [ + "We can use `iloc[]` to extract rows and columns using **indexes**. " + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "98a9ae6e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "sepal_length 4.7\n", + "sepal_width 3.2\n", + "petal_length 1.3\n", + "petal_width 0.2\n", + "species setosa\n", + "Name: 2, dtype: object" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This fetches row 3, and all columns:\n", + "iris_df.iloc[2]" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "014d872a-dfbd-403f-a5a6-a22db9482075", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "sepal_length 4.7\n", + "sepal_width 3.2\n", + "petal_length 1.3\n", + "petal_width 0.2\n", + "species setosa\n", + "Name: 2, dtype: object" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Similar to\n", + "iris_df.iloc[2, :]" + ] + }, + { + "cell_type": "markdown", + "id": "a82a9f45", + "metadata": {}, + "source": [ + "fetch rows with indices 1,2 (the right endpoint is exclusive), and all columns." + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "c5c45d06", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
14.93.01.40.2setosa
24.73.21.30.2setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "1 4.9 3.0 1.4 0.2 setosa\n", + "2 4.7 3.2 1.3 0.2 setosa" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.iloc[1:3]" + ] + }, + { + "cell_type": "markdown", + "id": "3bc78532", + "metadata": {}, + "source": [ + "fetch rows with indices 1,2 and first three columns (positions 0, 1, 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "408ba901", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_length
14.93.01.4
24.73.21.3
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length\n", + "1 4.9 3.0 1.4\n", + "2 4.7 3.2 1.3" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.iloc[1:3, 0:3]" + ] + }, + { + "cell_type": "markdown", + "id": "46975617", + "metadata": {}, + "source": [ + "You can apply slices to column names too. You don't need `.iloc[]` here." + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "5056b057", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['sepal_length', 'sepal_width', 'petal_length'], dtype='object')" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.columns[0:3]" + ] + }, + { + "cell_type": "markdown", + "id": "ccfdea8c-d7ff-48c6-89a4-a75bd16f88c8", + "metadata": {}, + "source": [ + "### Selecting Data by Label: `loc[]`" + ] + }, + { + "cell_type": "markdown", + "id": "f9be9788", + "metadata": {}, + "source": [ + "We can select by row and column labels using the `loc[]` method. " + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "cd1a1a13", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "1 4.9 3.0 1.4 0.2 setosa\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "3 4.6 3.1 1.5 0.2 setosa" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Here we ask for rows with labels (indexes) 1-3\n", + "iris_df.loc[1:3]" + ] + }, + { + "cell_type": "markdown", + "id": "dbf27061-e0e2-4e2e-97ba-80acc121c491", + "metadata": {}, + "source": [ + "Look at the difference with respect to `iloc`: " + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "e951d7b7-88ae-4d8b-adc2-8092a4af37c2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
14.93.01.40.2setosa
24.73.21.30.2setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "1 4.9 3.0 1.4 0.2 setosa\n", + "2 4.7 3.2 1.3 0.2 setosa" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.iloc[1:3]" + ] + }, + { + "cell_type": "markdown", + "id": "7fe8a7e7-ac6c-4f55-82a9-e933b9df7bcf", + "metadata": {}, + "source": [ + "To make the difference between the two approaches more apparent, let's create a copy of the DataFrame with redefined index labels:" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "79d4b175-c76c-4be5-aebe-89e926dc531b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
obs05.13.51.40.2setosa
obs14.93.01.40.2setosa
obs24.73.21.30.2setosa
obs34.63.11.50.2setosa
obs45.03.61.40.2setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "obs0 5.1 3.5 1.4 0.2 setosa\n", + "obs1 4.9 3.0 1.4 0.2 setosa\n", + "obs2 4.7 3.2 1.3 0.2 setosa\n", + "obs3 4.6 3.1 1.5 0.2 setosa\n", + "obs4 5.0 3.6 1.4 0.2 setosa" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_copy = iris_df.copy() # Create a copy to not affect the original dataframe\n", + "iris_copy.index = [f\"obs{ii}\" for ii in iris_copy.index] \n", + "\n", + "iris_copy.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "f54afba0-3b6e-4fac-8b2f-971eb75dafcf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
obs14.93.01.40.2setosa
obs24.73.21.30.2setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "obs1 4.9 3.0 1.4 0.2 setosa\n", + "obs2 4.7 3.2 1.3 0.2 setosa" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This is still Ok\n", + "iris_copy.iloc[1:3,:]" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "12bb3a2b-a11b-40fb-bb6d-4b45abe39861", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "cannot do slice indexing on Index with these indexers [1] of type int", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[56], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# But this will give an error\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m iris_copy\u001b[38;5;241m.\u001b[39mloc[\u001b[38;5;241m1\u001b[39m:\u001b[38;5;241m3\u001b[39m,:]\n", + "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1067\u001b[0m, in \u001b[0;36m_LocationIndexer.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1065\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_is_scalar_access(key):\n\u001b[1;32m 1066\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39m_get_value(\u001b[38;5;241m*\u001b[39mkey, takeable\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_takeable)\n\u001b[0;32m-> 1067\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_tuple(key)\n\u001b[1;32m 1068\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1069\u001b[0m \u001b[38;5;66;03m# we by definition only have the 0th axis\u001b[39;00m\n\u001b[1;32m 1070\u001b[0m axis \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxis \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n", + "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1256\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_tuple\u001b[0;34m(self, tup)\u001b[0m\n\u001b[1;32m 1253\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_multi_take_opportunity(tup):\n\u001b[1;32m 1254\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_multi_take(tup)\n\u001b[0;32m-> 1256\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_tuple_same_dim(tup)\n", + "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:924\u001b[0m, in \u001b[0;36m_LocationIndexer._getitem_tuple_same_dim\u001b[0;34m(self, tup)\u001b[0m\n\u001b[1;32m 921\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m com\u001b[38;5;241m.\u001b[39mis_null_slice(key):\n\u001b[1;32m 922\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[0;32m--> 924\u001b[0m retval \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(retval, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname)\u001b[38;5;241m.\u001b[39m_getitem_axis(key, axis\u001b[38;5;241m=\u001b[39mi)\n\u001b[1;32m 925\u001b[0m \u001b[38;5;66;03m# We should never have retval.ndim < self.ndim, as that should\u001b[39;00m\n\u001b[1;32m 926\u001b[0m \u001b[38;5;66;03m# be handled by the _getitem_lowerdim call above.\u001b[39;00m\n\u001b[1;32m 927\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m retval\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mndim\n", + "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1290\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1288\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, \u001b[38;5;28mslice\u001b[39m):\n\u001b[1;32m 1289\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_key(key, axis)\n\u001b[0;32m-> 1290\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_slice_axis(key, axis\u001b[38;5;241m=\u001b[39maxis)\n\u001b[1;32m 1291\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m com\u001b[38;5;241m.\u001b[39mis_bool_indexer(key):\n\u001b[1;32m 1292\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getbool_axis(key, axis\u001b[38;5;241m=\u001b[39maxis)\n", + "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1324\u001b[0m, in \u001b[0;36m_LocIndexer._get_slice_axis\u001b[0;34m(self, slice_obj, axis)\u001b[0m\n\u001b[1;32m 1321\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39mcopy(deep\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 1323\u001b[0m labels \u001b[38;5;241m=\u001b[39m obj\u001b[38;5;241m.\u001b[39m_get_axis(axis)\n\u001b[0;32m-> 1324\u001b[0m indexer \u001b[38;5;241m=\u001b[39m labels\u001b[38;5;241m.\u001b[39mslice_indexer(slice_obj\u001b[38;5;241m.\u001b[39mstart, slice_obj\u001b[38;5;241m.\u001b[39mstop, slice_obj\u001b[38;5;241m.\u001b[39mstep)\n\u001b[1;32m 1326\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(indexer, \u001b[38;5;28mslice\u001b[39m):\n\u001b[1;32m 1327\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39m_slice(indexer, axis\u001b[38;5;241m=\u001b[39maxis)\n", + "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:6559\u001b[0m, in \u001b[0;36mIndex.slice_indexer\u001b[0;34m(self, start, end, step, kind)\u001b[0m\n\u001b[1;32m 6516\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 6517\u001b[0m \u001b[38;5;124;03mCompute the slice indexer for input labels and step.\u001b[39;00m\n\u001b[1;32m 6518\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 6555\u001b[0m \u001b[38;5;124;03mslice(1, 3, None)\u001b[39;00m\n\u001b[1;32m 6556\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 6557\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_deprecated_arg(kind, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mkind\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mslice_indexer\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 6559\u001b[0m start_slice, end_slice \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mslice_locs(start, end, step\u001b[38;5;241m=\u001b[39mstep)\n\u001b[1;32m 6561\u001b[0m \u001b[38;5;66;03m# return a slice\u001b[39;00m\n\u001b[1;32m 6562\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_scalar(start_slice):\n", + "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:6767\u001b[0m, in \u001b[0;36mIndex.slice_locs\u001b[0;34m(self, start, end, step, kind)\u001b[0m\n\u001b[1;32m 6765\u001b[0m start_slice \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 6766\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m start \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 6767\u001b[0m start_slice \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_slice_bound(start, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mleft\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 6768\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m start_slice \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 6769\u001b[0m start_slice \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n", + "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:6676\u001b[0m, in \u001b[0;36mIndex.get_slice_bound\u001b[0;34m(self, label, side, kind)\u001b[0m\n\u001b[1;32m 6672\u001b[0m original_label \u001b[38;5;241m=\u001b[39m label\n\u001b[1;32m 6674\u001b[0m \u001b[38;5;66;03m# For datetime indices label may be a string that has to be converted\u001b[39;00m\n\u001b[1;32m 6675\u001b[0m \u001b[38;5;66;03m# to datetime boundary according to its resolution.\u001b[39;00m\n\u001b[0;32m-> 6676\u001b[0m label \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_maybe_cast_slice_bound(label, side)\n\u001b[1;32m 6678\u001b[0m \u001b[38;5;66;03m# we need to look up the label\u001b[39;00m\n\u001b[1;32m 6679\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", + "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:6623\u001b[0m, in \u001b[0;36mIndex._maybe_cast_slice_bound\u001b[0;34m(self, label, side, kind)\u001b[0m\n\u001b[1;32m 6618\u001b[0m \u001b[38;5;66;03m# We are a plain index here (sub-class override this method if they\u001b[39;00m\n\u001b[1;32m 6619\u001b[0m \u001b[38;5;66;03m# wish to have special treatment for floats/ints, e.g. Float64Index and\u001b[39;00m\n\u001b[1;32m 6620\u001b[0m \u001b[38;5;66;03m# datetimelike Indexes\u001b[39;00m\n\u001b[1;32m 6621\u001b[0m \u001b[38;5;66;03m# reject them, if index does not contain label\u001b[39;00m\n\u001b[1;32m 6622\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (is_float(label) \u001b[38;5;129;01mor\u001b[39;00m is_integer(label)) \u001b[38;5;129;01mand\u001b[39;00m label \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n\u001b[0;32m-> 6623\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_invalid_indexer(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mslice\u001b[39m\u001b[38;5;124m\"\u001b[39m, label)\n\u001b[1;32m 6625\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m label\n", + "\u001b[0;31mTypeError\u001b[0m: cannot do slice indexing on Index with these indexers [1] of type int" + ] + } + ], + "source": [ + "# But this will give an error\n", + "iris_copy.loc[1:3,:]" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "8c3b45de-117d-4081-8a5a-cdc529df1292", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
obs14.93.01.40.2setosa
obs24.73.21.30.2setosa
obs34.63.11.50.2setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "obs1 4.9 3.0 1.4 0.2 setosa\n", + "obs2 4.7 3.2 1.3 0.2 setosa\n", + "obs3 4.6 3.1 1.5 0.2 setosa" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# With loc[] labels need to be passed\n", + "iris_copy.loc[[\"obs1\", \"obs2\", \"obs3\"]]" + ] + }, + { + "cell_type": "markdown", + "id": "5ccd9d19", + "metadata": {}, + "source": [ + "Subset on columns with column name (as a string) or list of strings" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "332696cc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "( sepal_length petal_width\n", + " 1 4.9 0.2\n", + " 2 4.7 0.2\n", + " 3 4.6 0.2,\n", + " sepal_length petal_width\n", + " obs1 4.9 0.2\n", + " obs2 4.7 0.2\n", + " obs3 4.6 0.2)" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.loc[1:3, ['sepal_length','petal_width']], iris_copy.loc[[\"obs1\", \"obs2\", \"obs3\"], ['sepal_length','petal_width']]" + ] + }, + { + "cell_type": "markdown", + "id": "10439dcc", + "metadata": {}, + "source": [ + "Select all rows, specific columns" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "e322dddf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthpetal_width
05.10.2
14.90.2
24.70.2
34.60.2
45.00.2
.........
1456.72.3
1466.31.9
1476.52.0
1486.22.3
1495.91.8
\n", + "

150 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length petal_width\n", + "0 5.1 0.2\n", + "1 4.9 0.2\n", + "2 4.7 0.2\n", + "3 4.6 0.2\n", + "4 5.0 0.2\n", + ".. ... ...\n", + "145 6.7 2.3\n", + "146 6.3 1.9\n", + "147 6.5 2.0\n", + "148 6.2 2.3\n", + "149 5.9 1.8\n", + "\n", + "[150 rows x 2 columns]" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.loc[:, ['sepal_length','petal_width']]" + ] + }, + { + "cell_type": "markdown", + "id": "d839cad1-b8b0-4313-be09-ac1b0f216f26", + "metadata": {}, + "source": [ + "**Try it yourself with Practice Exercise 5!**" + ] + }, + { + "cell_type": "markdown", + "id": "db062841", + "metadata": {}, + "source": [ + "## Basic data cleaning\n", + "\n", + "Pandas primarily uses the data type `np.nan` from NumPy to represent missing data." + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "2ae69551", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "88d0b1c6", + "metadata": {}, + "outputs": [], + "source": [ + "df_miss = pd.DataFrame({\n", + " 'x':[2, np.nan, 1], \n", + " 'y':[np.nan, np.nan, 6]}\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "8404fdeb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
02.0NaN
1NaNNaN
21.06.0
\n", + "
" + ], + "text/plain": [ + " x y\n", + "0 2.0 NaN\n", + "1 NaN NaN\n", + "2 1.0 6.0" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_miss" + ] + }, + { + "cell_type": "markdown", + "id": "565b8fa8", + "metadata": {}, + "source": [ + "### Drop missing data\n", + "\n", + "We use the `dropna()` method to drop all rows with missing data in any column. Look at its [documentation](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dropna.html) for further details." + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "0f90aff6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
21.06.0
\n", + "
" + ], + "text/plain": [ + " x y\n", + "2 1.0 6.0" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_drop_all = df_miss.dropna()\n", + "df_drop_all" + ] + }, + { + "cell_type": "markdown", + "id": "190e3c8d", + "metadata": {}, + "source": [ + "The `subset` parameter takes a list of column names to specify which columns should have missing values." + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "ba5ad471", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
02.0NaN
21.06.0
\n", + "
" + ], + "text/plain": [ + " x y\n", + "0 2.0 NaN\n", + "2 1.0 6.0" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_drop_x = df_miss.dropna(subset=['x'])\n", + "df_drop_x" + ] + }, + { + "cell_type": "markdown", + "id": "c7efa14a", + "metadata": {}, + "source": [ + "### Impute missing values \n", + "\n", + "We can use `fillna()` to replace missing data to whatever value you like, e.g. $0$s. Look at its [documentation](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.fillna.html) for further details.\n", + "\n", + "We can pass the results of an operation -- for example to peform simple imputation, we can replace missing values in each column with the median value of the respective column:" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "c697c8f4", + "metadata": {}, + "outputs": [], + "source": [ + "df_filled = df_miss.fillna(df_miss.median())" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "cc10a2b7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
02.06.0
11.56.0
21.06.0
\n", + "
" + ], + "text/plain": [ + " x y\n", + "0 2.0 6.0\n", + "1 1.5 6.0\n", + "2 1.0 6.0" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_filled" + ] + }, + { + "cell_type": "markdown", + "id": "3f02ee99-89a3-4f23-89d3-e6d24a26ad0f", + "metadata": {}, + "source": [ + "**Try it yourself with Practice Exercise 6!**" + ] + }, + { + "cell_type": "markdown", + "id": "55f1c2c8-7d88-442d-9c2b-eccc5494ffae", + "metadata": {}, + "source": [ + "## Practice exercises" + ] + }, + { + "cell_type": "markdown", + "id": "3e566893-e301-41ba-a1c1-d192460a455d", + "metadata": {}, + "source": [ + "```{exercise}\n", + ":label: pandas3\n", + "\n", + "1- Go to https://mockaroo.com and generate 1000-record data set. You can drop the `ip_address` column, and add one called `age` that is populated by plain whole numbers. Allow 5% of values in this column to be blank. Be sure to format it as a CSV and to include the header row. Upload the dataset to your Rivanna space. Import the dataset into this notebook. Store it in a variable `mock_df`.\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "af0b33a6-2d93-41e4-8fdc-8b34cbca72b1", + "metadata": {}, + "outputs": [], + "source": [ + "# Your answers from here" + ] + }, + { + "cell_type": "markdown", + "id": "74ce79fd-9824-4d79-928d-dfec9f984495", + "metadata": {}, + "source": [ + "```{exercise}\n", + ":label: pandas4\n", + "\n", + "2- Explore `mock_df` using the methods introduced in this notebook.\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2644e478-4227-46fe-b922-fe0a37371e46", + "metadata": {}, + "outputs": [], + "source": [ + "# Your answers from here" + ] + }, + { + "cell_type": "markdown", + "id": "d2809cb9-93c5-48ad-be7c-e5148b4a91e4", + "metadata": {}, + "source": [ + "```{exercise}\n", + ":label: pandas5\n", + "\n", + "3- Summarize the data in `mock_df` using the methods introduced in this notebook.\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "c4560328-3001-44d5-8425-ba0043b917ce", + "metadata": {}, + "outputs": [], + "source": [ + "# Your answers from here" + ] + }, + { + "cell_type": "markdown", + "id": "6341e1b6-0a5a-4876-9239-cf6bebd4710e", + "metadata": {}, + "source": [ + "```{exercise}\n", + ":label: pandas6\n", + "\n", + "4- Try to visualize some of data in `mock_df` using the methods introduced in this notebook.\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0a2b3b94-4ae5-4b91-8de3-e09a8eeeb024", + "metadata": {}, + "outputs": [], + "source": [ + "# Your answers from here" + ] + }, + { + "cell_type": "markdown", + "id": "efda3322-8be1-4121-be94-70cd6a25e929", + "metadata": {}, + "source": [ + "```{exercise}\n", + ":label: pandas7\n", + "\n", + "5- Play around with selecting columns and rows of `mock_df` using `loc` and `iloc`\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "25b7879e-c706-4641-8a5a-b201ac339fd8", + "metadata": {}, + "outputs": [], + "source": [ + "# Your answers from here" + ] + }, + { + "cell_type": "markdown", + "id": "8f72d9ff-260a-4d64-b0c3-97ae06310d61", + "metadata": {}, + "source": [ + "```{exercise}\n", + ":label: pandas8\n", + "\n", + "6- Drop observations in blank values in `mock_df`. Then, instead of dropping these values, impute them in any way you like (e.g. by replacing them with the mean)\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a39c9fb1-2459-428d-be79-8bc98077cba0", + "metadata": {}, + "outputs": [], + "source": [ + "# Your answers from here" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/_sources/chapters/module-4/045-PandasIII-manipulation.ipynb b/_sources/chapters/module-4/045-PandasIII-manipulation.ipynb new file mode 100644 index 0000000..726e55b --- /dev/null +++ b/_sources/chapters/module-4/045-PandasIII-manipulation.ipynb @@ -0,0 +1,3183 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d7b77377-b849-4596-8d06-42244e5c0cb6", + "metadata": {}, + "source": [ + "# PandasIII: Data Manipulation" + ] + }, + { + "cell_type": "markdown", + "id": "28e01bb0-f7bf-46af-bbdf-12b685949937", + "metadata": {}, + "source": [ + "![](https://ds1002-resources.s3.amazonaws.com/images/workflow.png)\n", + "\n", + "In the previous lesson, we studied how Pandas can be used to learn about your data through inspection and exploration. \n", + "\n", + "In this lesson, you’ll learn how Pandas can help you process the data to prepare them for analysis. \n", + "\n", + "Specifically, we will cover:\n", + " \n", + "- Advance filtering and subsetting.\n", + "- The creation and removal of columns.\n", + "- Transforming data through functions.\n", + "- Sorting data." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "8a937e69-24f5-4459-91a7-f3b2ee54d5a7", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "id": "35ba75ac-4dbd-4971-a62f-3a32f39b6155", + "metadata": {}, + "source": [ + "## Advanced Filtering and Subsetting" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "226728b9-9e9b-4435-82c7-8d95133df325", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
..................
1456.73.05.22.3virginica
1466.32.55.01.9virginica
1476.53.05.22.0virginica
1486.23.45.42.3virginica
1495.93.05.11.8virginica
\n", + "

150 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "1 4.9 3.0 1.4 0.2 setosa\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "3 4.6 3.1 1.5 0.2 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa\n", + ".. ... ... ... ... ...\n", + "145 6.7 3.0 5.2 2.3 virginica\n", + "146 6.3 2.5 5.0 1.9 virginica\n", + "147 6.5 3.0 5.2 2.0 virginica\n", + "148 6.2 3.4 5.4 2.3 virginica\n", + "149 5.9 3.0 5.1 1.8 virginica\n", + "\n", + "[150 rows x 5 columns]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df = pd.read_csv(\"https://raw.githubusercontent.com/mwaskom/seaborn-data/refs/heads/master/iris.csv\")\n", + "iris_df" + ] + }, + { + "cell_type": "markdown", + "id": "c06891f8-3084-444c-99df-25188902cbd3", + "metadata": {}, + "source": [ + "### Boolean Filtering\n", + "\n", + "It is very common to subset a dataframe based on some condition on the data.\n", + "\n", + "Like with NumPy, Pandas also knows what to do if you pass a boolean structure." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "ba718cc4-f0f1-4593-adc5-4c7650c5192a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "4 False\n", + " ... \n", + "145 False\n", + "146 False\n", + "147 False\n", + "148 False\n", + "149 False\n", + "Name: sepal_length, Length: 150, dtype: bool" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.sepal_length >= 7.5" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "b9b7cbb2-4180-417e-8fd5-56bc1710648f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
1057.63.06.62.1virginica
1177.73.86.72.2virginica
1187.72.66.92.3virginica
1227.72.86.72.0virginica
1317.93.86.42.0virginica
1357.73.06.12.3virginica
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "105 7.6 3.0 6.6 2.1 virginica\n", + "117 7.7 3.8 6.7 2.2 virginica\n", + "118 7.7 2.6 6.9 2.3 virginica\n", + "122 7.7 2.8 6.7 2.0 virginica\n", + "131 7.9 3.8 6.4 2.0 virginica\n", + "135 7.7 3.0 6.1 2.3 virginica" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Rows satisfying the above condition\n", + "iris_df.loc[iris_df.sepal_length >= 7.5,:]" + ] + }, + { + "cell_type": "markdown", + "id": "e8ef8b0b-3d96-4bd1-af74-edecce3c9c79", + "metadata": {}, + "source": [ + "We can combine more than one conditions, similarly to NumPy:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "b503c1d1-779e-4e43-a52d-37cbee98582b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
24.73.21.30.2setosa
34.63.11.50.2setosa
64.63.41.40.3setosa
224.63.61.00.2setosa
294.73.21.60.2setosa
414.52.31.30.3setosa
474.63.21.40.2setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "3 4.6 3.1 1.5 0.2 setosa\n", + "6 4.6 3.4 1.4 0.3 setosa\n", + "22 4.6 3.6 1.0 0.2 setosa\n", + "29 4.7 3.2 1.6 0.2 setosa\n", + "41 4.5 2.3 1.3 0.3 setosa\n", + "47 4.6 3.2 1.4 0.2 setosa" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.loc[(iris_df['sepal_length' ]>= 4.5) & (iris_df['sepal_length'] <= 4.7),:]" + ] + }, + { + "cell_type": "markdown", + "id": "3f8cbff4-ae7f-45df-a4a1-62b947d5d395", + "metadata": {}, + "source": [ + "### Masking" + ] + }, + { + "cell_type": "markdown", + "id": "015a26b4-2f5d-4cf7-8c09-9969e419cf00", + "metadata": {}, + "source": [ + "Here's an example of **masking** using boolean conditions passed to the dataframe selector:" + ] + }, + { + "cell_type": "markdown", + "id": "2cd960f3-6723-4eb4-a826-013b327a7ed1", + "metadata": {}, + "source": [ + "Here are the **values** for the feature `sepal length`:" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "81740854-d681-4800-9a84-1e75f68547fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([5.1, 4.9, 4.7, 4.6, 5. , 5.4, 4.6, 5. , 4.4, 4.9, 5.4, 4.8, 4.8,\n", + " 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5. ,\n", + " 5. , 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5. , 5.5, 4.9, 4.4,\n", + " 5.1, 5. , 4.5, 4.4, 5. , 5.1, 4.8, 5.1, 4.6, 5.3, 5. , 7. , 6.4,\n", + " 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5. , 5.9, 6. , 6.1, 5.6,\n", + " 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7,\n", + " 6. , 5.7, 5.5, 5.5, 5.8, 6. , 5.4, 6. , 6.7, 6.3, 5.6, 5.5, 5.5,\n", + " 6.1, 5.8, 5. , 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3,\n", + " 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5,\n", + " 7.7, 7.7, 6. , 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2,\n", + " 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6. , 6.9, 6.7, 6.9, 5.8,\n", + " 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9])" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.sepal_length.values" + ] + }, + { + "cell_type": "markdown", + "id": "dc987421-4057-4b23-9053-622c0687fc78", + "metadata": {}, + "source": [ + "And here are **the boolean values** generated by applying a comparison operator to those values:" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "fd4c7e52-33e4-4ffc-b20d-bf922ccb0af5", + "metadata": {}, + "outputs": [], + "source": [ + "mask = iris_df.sepal_length >= 7.5" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "e4ba934b-49e3-4d7d-b7cf-b7e3765d4d60", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, True, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " True, True, False, False, False, True, False, False, False,\n", + " False, False, False, False, False, True, False, False, False,\n", + " True, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False])" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mask.values" + ] + }, + { + "cell_type": "markdown", + "id": "3b6fc0f1-9012-40b7-a587-c263ae51c653", + "metadata": {}, + "source": [ + "The two sets of values have the same shape.\n", + "\n", + "We can now overlay the logical values over the numeric ones and keep only what is `True`:" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "fd3874cb-b3b4-4fcf-b25c-dbe1e4bbc97c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([7.6, 7.7, 7.7, 7.7, 7.9, 7.7])" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.sepal_length[mask].values" + ] + }, + { + "cell_type": "markdown", + "id": "c1f81f06-15d2-41ea-b9a2-cd149e148e33", + "metadata": {}, + "source": [ + "## Creating columns\n", + "\n", + "It is typical to create new columns from existing columns. \n", + "\n", + "### Using brackets `[]`\n", + " \n", + "Like in dictionaries, you can create a new entry (column) in the dataframe using `[]`.\n", + "\n", + "In this example, a new column (or field) is created by multiplying summing `sepal_length` with `sepal_width`:" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "cd076728-1d9b-4acc-9afb-157cfc33c9ce", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspeciessepal_volume
05.13.51.40.2setosa17.85
14.93.01.40.2setosa14.70
24.73.21.30.2setosa15.04
34.63.11.50.2setosa14.26
45.03.61.40.2setosa18.00
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species sepal_volume\n", + "0 5.1 3.5 1.4 0.2 setosa 17.85\n", + "1 4.9 3.0 1.4 0.2 setosa 14.70\n", + "2 4.7 3.2 1.3 0.2 setosa 15.04\n", + "3 4.6 3.1 1.5 0.2 setosa 14.26\n", + "4 5.0 3.6 1.4 0.2 setosa 18.00" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df['sepal_volume'] = iris_df.sepal_length * iris_df.sepal_width\n", + "\n", + "iris_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "8919ce05-9279-46f5-88e3-3962fc7f2919", + "metadata": {}, + "source": [ + "Note that:\n", + "\n", + "- The left side has form: DataFrame name, bracket notation, new column name\n", + "- The assignment operator `=` is used\n", + "- The right side contains an expression; here, two df columns are multiplied together " + ] + }, + { + "cell_type": "markdown", + "id": "9f68ca9b-0976-4f6d-8988-46b1241afad4", + "metadata": {}, + "source": [ + "Bracket notation also works on the fields, but it's more typing:" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "ebbb13be-676e-495c-98c0-e49085ffd2de", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspeciessepal_volumesepal_volume_2
05.13.51.40.2setosa17.8517.85
14.93.01.40.2setosa14.7014.70
24.73.21.30.2setosa15.0415.04
34.63.11.50.2setosa14.2614.26
45.03.61.40.2setosa18.0018.00
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species sepal_volume \\\n", + "0 5.1 3.5 1.4 0.2 setosa 17.85 \n", + "1 4.9 3.0 1.4 0.2 setosa 14.70 \n", + "2 4.7 3.2 1.3 0.2 setosa 15.04 \n", + "3 4.6 3.1 1.5 0.2 setosa 14.26 \n", + "4 5.0 3.6 1.4 0.2 setosa 18.00 \n", + "\n", + " sepal_volume_2 \n", + "0 17.85 \n", + "1 14.70 \n", + "2 15.04 \n", + "3 14.26 \n", + "4 18.00 " + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df['sepal_volume_2'] = iris_df['sepal_length'] * iris_df['sepal_width']\n", + "\n", + "iris_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "678d5586-16db-4b40-81a7-a8194fe352c7", + "metadata": {}, + "source": [ + "The bracket notation must be used when assigning to a new column. This will break:" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "291f76ae-2dc1-4b5d-ade9-3fb03a98d444", + "metadata": {}, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "invalid syntax (1290401302.py, line 1)", + "output_type": "error", + "traceback": [ + "\u001b[0;36m Cell \u001b[0;32mIn[29], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m iris_df.'sepal_volume_3' = iris_df.sepal_length + iris_df.sepal_width\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" + ] + } + ], + "source": [ + "iris_df.'sepal_volume_3' = iris_df.sepal_length + iris_df.sepal_width" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "e7bcd6db-b82d-4268-92d8-a2e014106516", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_21005/2179810851.py:1: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access\n", + " iris_df.sepal_volume_3 = iris_df.sepal_length + iris_df.sepal_width\n" + ] + } + ], + "source": [ + "iris_df.sepal_volume_3 = iris_df.sepal_length + iris_df.sepal_width" + ] + }, + { + "cell_type": "markdown", + "id": "ac91bed5-a0f2-4d0c-870b-d456ff45f3b3", + "metadata": {}, + "source": [ + "### Using `assign`\n", + "\n", + "This method also allows you to create new columns in a given dataset.\n", + "\n", + "**it is very useful when you want to create a new dataframe while maintaining the original as it was.**" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "36cf73c9-c666-41ea-840e-c1daa64e7717", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspeciessepal_volumesepal_volume_2
05.13.51.40.2setosa17.8517.85
14.93.01.40.2setosa14.7014.70
24.73.21.30.2setosa15.0415.04
34.63.11.50.2setosa14.2614.26
45.03.61.40.2setosa18.0018.00
........................
1456.73.05.22.3virginica20.1020.10
1466.32.55.01.9virginica15.7515.75
1476.53.05.22.0virginica19.5019.50
1486.23.45.42.3virginica21.0821.08
1495.93.05.11.8virginica17.7017.70
\n", + "

150 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species \\\n", + "0 5.1 3.5 1.4 0.2 setosa \n", + "1 4.9 3.0 1.4 0.2 setosa \n", + "2 4.7 3.2 1.3 0.2 setosa \n", + "3 4.6 3.1 1.5 0.2 setosa \n", + "4 5.0 3.6 1.4 0.2 setosa \n", + ".. ... ... ... ... ... \n", + "145 6.7 3.0 5.2 2.3 virginica \n", + "146 6.3 2.5 5.0 1.9 virginica \n", + "147 6.5 3.0 5.2 2.0 virginica \n", + "148 6.2 3.4 5.4 2.3 virginica \n", + "149 5.9 3.0 5.1 1.8 virginica \n", + "\n", + " sepal_volume sepal_volume_2 \n", + "0 17.85 17.85 \n", + "1 14.70 14.70 \n", + "2 15.04 15.04 \n", + "3 14.26 14.26 \n", + "4 18.00 18.00 \n", + ".. ... ... \n", + "145 20.10 20.10 \n", + "146 15.75 15.75 \n", + "147 19.50 19.50 \n", + "148 21.08 21.08 \n", + "149 17.70 17.70 \n", + "\n", + "[150 rows x 7 columns]" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "b31b715c-0c54-4625-95f4-2947f7a05365", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspeciessepal_volumesepal_volume_2sepal_volume_4
05.13.51.40.2setosa17.8517.8517.85
14.93.01.40.2setosa14.7014.7014.70
24.73.21.30.2setosa15.0415.0415.04
34.63.11.50.2setosa14.2614.2614.26
45.03.61.40.2setosa18.0018.0018.00
...........................
1456.73.05.22.3virginica20.1020.1020.10
1466.32.55.01.9virginica15.7515.7515.75
1476.53.05.22.0virginica19.5019.5019.50
1486.23.45.42.3virginica21.0821.0821.08
1495.93.05.11.8virginica17.7017.7017.70
\n", + "

150 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species \\\n", + "0 5.1 3.5 1.4 0.2 setosa \n", + "1 4.9 3.0 1.4 0.2 setosa \n", + "2 4.7 3.2 1.3 0.2 setosa \n", + "3 4.6 3.1 1.5 0.2 setosa \n", + "4 5.0 3.6 1.4 0.2 setosa \n", + ".. ... ... ... ... ... \n", + "145 6.7 3.0 5.2 2.3 virginica \n", + "146 6.3 2.5 5.0 1.9 virginica \n", + "147 6.5 3.0 5.2 2.0 virginica \n", + "148 6.2 3.4 5.4 2.3 virginica \n", + "149 5.9 3.0 5.1 1.8 virginica \n", + "\n", + " sepal_volume sepal_volume_2 sepal_volume_4 \n", + "0 17.85 17.85 17.85 \n", + "1 14.70 14.70 14.70 \n", + "2 15.04 15.04 15.04 \n", + "3 14.26 14.26 14.26 \n", + "4 18.00 18.00 18.00 \n", + ".. ... ... ... \n", + "145 20.10 20.10 20.10 \n", + "146 15.75 15.75 15.75 \n", + "147 19.50 19.50 19.50 \n", + "148 21.08 21.08 21.08 \n", + "149 17.70 17.70 17.70 \n", + "\n", + "[150 rows x 8 columns]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.assign(sepal_volume_4 = iris_df[\"sepal_length\"] * iris_df[\"sepal_width\"])" + ] + }, + { + "cell_type": "markdown", + "id": "0bbc881e-eefe-4763-a03a-098d996d5d6c", + "metadata": {}, + "source": [ + "## Removing columns" + ] + }, + { + "cell_type": "markdown", + "id": "219a5fa9-44ef-4da0-8455-f8f9267b6b7c", + "metadata": {}, + "source": [ + "- Using the reserverd keyword `del` to drop a DataFrame or single columns from the dataframe:" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "46206b51-9573-41f8-9117-4cd34b168ab8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The history saving thread hit an unexpected error (OperationalError('attempt to write a readonly database')).History will not be written to the database.\n" + ] + } + ], + "source": [ + "iris_df_drop = iris_df.copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "334fac50-05cb-4a21-bb08-9ba663c5a1bb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspeciessepal_volumesepal_volume_2
05.13.51.40.2setosa17.8517.85
14.93.01.40.2setosa14.7014.70
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species sepal_volume \\\n", + "0 5.1 3.5 1.4 0.2 setosa 17.85 \n", + "1 4.9 3.0 1.4 0.2 setosa 14.70 \n", + "\n", + " sepal_volume_2 \n", + "0 17.85 \n", + "1 14.70 " + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df_drop.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "bd7b140e-ea6e-4f64-afc8-c9d77d4ea1cc", + "metadata": {}, + "outputs": [], + "source": [ + "# delete the column 'x'\n", + "del iris_df_drop['sepal_volume_2']" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "0485740b-cb4d-447e-8461-44adeb83f30b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspeciessepal_volume
05.13.51.40.2setosa17.85
14.93.01.40.2setosa14.70
24.73.21.30.2setosa15.04
34.63.11.50.2setosa14.26
45.03.61.40.2setosa18.00
.....................
1456.73.05.22.3virginica20.10
1466.32.55.01.9virginica15.75
1476.53.05.22.0virginica19.50
1486.23.45.42.3virginica21.08
1495.93.05.11.8virginica17.70
\n", + "

150 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species \\\n", + "0 5.1 3.5 1.4 0.2 setosa \n", + "1 4.9 3.0 1.4 0.2 setosa \n", + "2 4.7 3.2 1.3 0.2 setosa \n", + "3 4.6 3.1 1.5 0.2 setosa \n", + "4 5.0 3.6 1.4 0.2 setosa \n", + ".. ... ... ... ... ... \n", + "145 6.7 3.0 5.2 2.3 virginica \n", + "146 6.3 2.5 5.0 1.9 virginica \n", + "147 6.5 3.0 5.2 2.0 virginica \n", + "148 6.2 3.4 5.4 2.3 virginica \n", + "149 5.9 3.0 5.1 1.8 virginica \n", + "\n", + " sepal_volume \n", + "0 17.85 \n", + "1 14.70 \n", + "2 15.04 \n", + "3 14.26 \n", + "4 18.00 \n", + ".. ... \n", + "145 20.10 \n", + "146 15.75 \n", + "147 19.50 \n", + "148 21.08 \n", + "149 17.70 \n", + "\n", + "[150 rows x 6 columns]" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df_drop" + ] + }, + { + "cell_type": "markdown", + "id": "8a368b8b-34dc-4849-877e-232ecca7b085", + "metadata": {}, + "source": [ + "- Using the method `drop()` to drop one or more columns by specifying `axis` argument equal 1: " + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "7b22db5a-c3e3-47f8-8862-82749c9c221f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_width
05.13.51.40.2
14.93.01.40.2
24.73.21.30.2
34.63.11.50.2
45.03.61.40.2
...............
1456.73.05.22.3
1466.32.55.01.9
1476.53.05.22.0
1486.23.45.42.3
1495.93.05.11.8
\n", + "

150 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width\n", + "0 5.1 3.5 1.4 0.2\n", + "1 4.9 3.0 1.4 0.2\n", + "2 4.7 3.2 1.3 0.2\n", + "3 4.6 3.1 1.5 0.2\n", + "4 5.0 3.6 1.4 0.2\n", + ".. ... ... ... ...\n", + "145 6.7 3.0 5.2 2.3\n", + "146 6.3 2.5 5.0 1.9\n", + "147 6.5 3.0 5.2 2.0\n", + "148 6.2 3.4 5.4 2.3\n", + "149 5.9 3.0 5.1 1.8\n", + "\n", + "[150 rows x 4 columns]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Here we drop columns\n", + "iris_df_drop = iris_df_drop.drop(['sepal_volume', 'species'], axis=1)\n", + "iris_df_drop" + ] + }, + { + "cell_type": "markdown", + "id": "795cb096-cea9-4311-a96e-7b713c6e360b", + "metadata": {}, + "source": [ + "Note that with this `drop()` method you can also drop specific observations by setting `axis`=0" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "5cb12d5f-e033-44b8-90ba-c4ea1b09d70d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_width
14.93.01.40.2
24.73.21.30.2
34.63.11.50.2
45.03.61.40.2
55.43.91.70.4
...............
1456.73.05.22.3
1466.32.55.01.9
1476.53.05.22.0
1486.23.45.42.3
1495.93.05.11.8
\n", + "

149 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width\n", + "1 4.9 3.0 1.4 0.2\n", + "2 4.7 3.2 1.3 0.2\n", + "3 4.6 3.1 1.5 0.2\n", + "4 5.0 3.6 1.4 0.2\n", + "5 5.4 3.9 1.7 0.4\n", + ".. ... ... ... ...\n", + "145 6.7 3.0 5.2 2.3\n", + "146 6.3 2.5 5.0 1.9\n", + "147 6.5 3.0 5.2 2.0\n", + "148 6.2 3.4 5.4 2.3\n", + "149 5.9 3.0 5.1 1.8\n", + "\n", + "[149 rows x 4 columns]" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Now a particular observation\n", + "iris_df_drop = iris_df_drop.drop([0], axis=0)\n", + "iris_df_drop" + ] + }, + { + "cell_type": "markdown", + "id": "00e2a057-31c9-436e-af6d-362c5cb93906", + "metadata": {}, + "source": [ + "## Transforming your data" + ] + }, + { + "cell_type": "markdown", + "id": "b8bc6fd9-68a6-4d3e-87e1-84c18c608329", + "metadata": {}, + "source": [ + "Sometimes, as part of preprocessing, you may need to apply a function to transform your dataframe. \n", + "\n", + "The most straightforward way to do this is with the `apply` function. Let's explore some common uses: " + ] + }, + { + "cell_type": "markdown", + "id": "77659c9e-ddcc-4cdd-9da9-f8e4fd97b2a7", + "metadata": {}, + "source": [ + "- Creation of new columns, particularly when these need to arise from a complex operation" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "c00b8740-7209-4fb3-aa5a-b38785142523", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 111.541\n", + "1 98.739\n", + "2 87.033\n", + "3 81.576\n", + "4 105.000\n", + " ... \n", + "145 259.173\n", + "146 214.057\n", + "147 235.875\n", + "148 203.688\n", + "149 174.669\n", + "Name: sepal_length, Length: 150, dtype: float64" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def my_func(x):\n", + " return x**3 - x**2 - x + 10\n", + "\n", + "iris_df[\"sepal_length\"].apply(my_func)" + ] + }, + { + "cell_type": "markdown", + "id": "f4fb13f3-7cdf-4a10-bcb8-91ee6220e465", + "metadata": {}, + "source": [ + "You can also use a `lambda` function, which is basically a small anonymous function that can take any number of arguments, but can only have one expression.\n", + "\n", + "```lambda arguments : expression```" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "b7831d33-0b0e-4dee-b37a-e51272bcec2b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 111.541\n", + "1 98.739\n", + "2 87.033\n", + "3 81.576\n", + "4 105.000\n", + " ... \n", + "145 259.173\n", + "146 214.057\n", + "147 235.875\n", + "148 203.688\n", + "149 174.669\n", + "Name: sepal_length, Length: 150, dtype: float64" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df[\"sepal_length\"].apply(lambda x: x**3 - x**2 - x + 10)" + ] + }, + { + "cell_type": "markdown", + "id": "04526587-80c1-4eee-ace8-1e849d93ffaf", + "metadata": {}, + "source": [ + "`apply` takes the argument `axis`, which specifies the axis along which the function is applied. By default, `axis` is set to 0, meaning the function is applied to each column." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "ed163a2c-43de-47c8-9ed3-df8bdcd8a3ed", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "sepal_length 5.843333\n", + "petal_length 3.758000\n", + "dtype: float64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "iris_df[[\"sepal_length\", \"petal_length\"]].apply(np.mean)" + ] + }, + { + "cell_type": "markdown", + "id": "fee7361e-25b8-4ed2-9abb-6e52aceb8d06", + "metadata": {}, + "source": [ + "But we can change this to apply a function to each row by setting `axis` to 1:" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "c0481562-2fa4-48d6-ad03-db362b64aec3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 3.25\n", + "1 3.15\n", + "2 3.00\n", + "3 3.05\n", + "4 3.20\n", + " ... \n", + "145 5.95\n", + "146 5.65\n", + "147 5.85\n", + "148 5.80\n", + "149 5.50\n", + "Length: 150, dtype: float64" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy\n", + "iris_df[[\"sepal_length\", \"petal_length\"]].apply(numpy.mean, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "22a789f3-a8fa-40f2-af48-a0d35c4886f0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspeciessepal_volumesepal_volume_2avg_lengthavg_width
05.13.51.40.2setosa17.8517.853.251.85
14.93.01.40.2setosa14.7014.703.151.60
24.73.21.30.2setosa15.0415.043.001.70
34.63.11.50.2setosa14.2614.263.051.65
45.03.61.40.2setosa18.0018.003.201.90
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species sepal_volume \\\n", + "0 5.1 3.5 1.4 0.2 setosa 17.85 \n", + "1 4.9 3.0 1.4 0.2 setosa 14.70 \n", + "2 4.7 3.2 1.3 0.2 setosa 15.04 \n", + "3 4.6 3.1 1.5 0.2 setosa 14.26 \n", + "4 5.0 3.6 1.4 0.2 setosa 18.00 \n", + "\n", + " sepal_volume_2 avg_length avg_width \n", + "0 17.85 3.25 1.85 \n", + "1 14.70 3.15 1.60 \n", + "2 15.04 3.00 1.70 \n", + "3 14.26 3.05 1.65 \n", + "4 18.00 3.20 1.90 " + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df[\"avg_length\"] = iris_df[[\"sepal_length\", \"petal_length\"]].apply(numpy.mean, axis=1)\n", + "iris_df[\"avg_width\"] = iris_df[[\"sepal_width\", \"petal_width\"]].apply(numpy.mean, axis=1)\n", + "\n", + "iris_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "a3d7b6d9-67aa-4b19-a787-f5bbb5c7feea", + "metadata": {}, + "source": [ + "## Sorting Data" + ] + }, + { + "cell_type": "markdown", + "id": "e16e0a4a-7846-4249-93fb-405d0876b314", + "metadata": {}, + "source": [ + "### By values: `sort_values()`\n", + "\n", + "You can customize this sorting with the following parameters:\n", + " \n", + "- `by` parameter takes string or list of strings\n", + "- `ascending` takes True or False\n", + "- `inplace` will save sorted values into the df\n", + "\n", + "Look at its [documentation](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_values.html) for further details." + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "c8b9b5f2-cf6a-46af-9e64-f4c6f98a1eb0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspeciessepal_volumesepal_volume_2avg_lengthavg_width
134.33.01.10.1setosa12.9012.902.701.55
84.42.91.40.2setosa12.7612.762.901.55
384.43.01.30.2setosa13.2013.202.851.60
424.43.21.30.2setosa14.0814.082.851.70
414.52.31.30.3setosa10.3510.352.901.30
..............................
1227.72.86.72.0virginica21.5621.567.202.40
1177.73.86.72.2virginica29.2629.267.203.00
1187.72.66.92.3virginica20.0220.027.302.45
1357.73.06.12.3virginica23.1023.106.902.65
1317.93.86.42.0virginica30.0230.027.152.90
\n", + "

150 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species \\\n", + "13 4.3 3.0 1.1 0.1 setosa \n", + "8 4.4 2.9 1.4 0.2 setosa \n", + "38 4.4 3.0 1.3 0.2 setosa \n", + "42 4.4 3.2 1.3 0.2 setosa \n", + "41 4.5 2.3 1.3 0.3 setosa \n", + ".. ... ... ... ... ... \n", + "122 7.7 2.8 6.7 2.0 virginica \n", + "117 7.7 3.8 6.7 2.2 virginica \n", + "118 7.7 2.6 6.9 2.3 virginica \n", + "135 7.7 3.0 6.1 2.3 virginica \n", + "131 7.9 3.8 6.4 2.0 virginica \n", + "\n", + " sepal_volume sepal_volume_2 avg_length avg_width \n", + "13 12.90 12.90 2.70 1.55 \n", + "8 12.76 12.76 2.90 1.55 \n", + "38 13.20 13.20 2.85 1.60 \n", + "42 14.08 14.08 2.85 1.70 \n", + "41 10.35 10.35 2.90 1.30 \n", + ".. ... ... ... ... \n", + "122 21.56 21.56 7.20 2.40 \n", + "117 29.26 29.26 7.20 3.00 \n", + "118 20.02 20.02 7.30 2.45 \n", + "135 23.10 23.10 6.90 2.65 \n", + "131 30.02 30.02 7.15 2.90 \n", + "\n", + "[150 rows x 9 columns]" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.sort_values(by=['sepal_length','petal_width'])" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "6fe15c85-e0a7-4061-896c-79a78ec48b58", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspeciessepal_volumesepal_volume_2avg_lengthavg_width
1317.93.86.42.0virginica30.0230.027.152.90
1187.72.66.92.3virginica20.0220.027.302.45
1357.73.06.12.3virginica23.1023.106.902.65
1177.73.86.72.2virginica29.2629.267.203.00
1227.72.86.72.0virginica21.5621.567.202.40
..............................
414.52.31.30.3setosa10.3510.352.901.30
84.42.91.40.2setosa12.7612.762.901.55
384.43.01.30.2setosa13.2013.202.851.60
424.43.21.30.2setosa14.0814.082.851.70
134.33.01.10.1setosa12.9012.902.701.55
\n", + "

150 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species \\\n", + "131 7.9 3.8 6.4 2.0 virginica \n", + "118 7.7 2.6 6.9 2.3 virginica \n", + "135 7.7 3.0 6.1 2.3 virginica \n", + "117 7.7 3.8 6.7 2.2 virginica \n", + "122 7.7 2.8 6.7 2.0 virginica \n", + ".. ... ... ... ... ... \n", + "41 4.5 2.3 1.3 0.3 setosa \n", + "8 4.4 2.9 1.4 0.2 setosa \n", + "38 4.4 3.0 1.3 0.2 setosa \n", + "42 4.4 3.2 1.3 0.2 setosa \n", + "13 4.3 3.0 1.1 0.1 setosa \n", + "\n", + " sepal_volume sepal_volume_2 avg_length avg_width \n", + "131 30.02 30.02 7.15 2.90 \n", + "118 20.02 20.02 7.30 2.45 \n", + "135 23.10 23.10 6.90 2.65 \n", + "117 29.26 29.26 7.20 3.00 \n", + "122 21.56 21.56 7.20 2.40 \n", + ".. ... ... ... ... \n", + "41 10.35 10.35 2.90 1.30 \n", + "8 12.76 12.76 2.90 1.55 \n", + "38 13.20 13.20 2.85 1.60 \n", + "42 14.08 14.08 2.85 1.70 \n", + "13 12.90 12.90 2.70 1.55 \n", + "\n", + "[150 rows x 9 columns]" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.sort_values(by=['sepal_length','petal_width'], ascending=False)" + ] + }, + { + "cell_type": "markdown", + "id": "33bb2cdb-c5e3-4717-9faa-26cae8d718ad", + "metadata": {}, + "source": [ + "### By index: `sort_index()`\n", + "\n", + "You can customize this sorting with the following parameters:\n", + " \n", + " - `axis` along which to sort. The value 0 identifies the rows, and 1 identifies the columns.\n", + " - `ascending` takes True or False\n", + " - `inplace` will save sorted values into the df\n", + "\n", + "Look at its [documentation](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_index.html) for further details." + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "1fc06271-5917-4faf-818c-66b0436460d7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspeciessepal_volumesepal_volume_2avg_lengthavg_width
05.13.51.40.2setosa17.8517.853.251.85
14.93.01.40.2setosa14.7014.703.151.60
24.73.21.30.2setosa15.0415.043.001.70
34.63.11.50.2setosa14.2614.263.051.65
45.03.61.40.2setosa18.0018.003.201.90
..............................
1456.73.05.22.3virginica20.1020.105.952.65
1466.32.55.01.9virginica15.7515.755.652.20
1476.53.05.22.0virginica19.5019.505.852.50
1486.23.45.42.3virginica21.0821.085.802.85
1495.93.05.11.8virginica17.7017.705.502.40
\n", + "

150 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species \\\n", + "0 5.1 3.5 1.4 0.2 setosa \n", + "1 4.9 3.0 1.4 0.2 setosa \n", + "2 4.7 3.2 1.3 0.2 setosa \n", + "3 4.6 3.1 1.5 0.2 setosa \n", + "4 5.0 3.6 1.4 0.2 setosa \n", + ".. ... ... ... ... ... \n", + "145 6.7 3.0 5.2 2.3 virginica \n", + "146 6.3 2.5 5.0 1.9 virginica \n", + "147 6.5 3.0 5.2 2.0 virginica \n", + "148 6.2 3.4 5.4 2.3 virginica \n", + "149 5.9 3.0 5.1 1.8 virginica \n", + "\n", + " sepal_volume sepal_volume_2 avg_length avg_width \n", + "0 17.85 17.85 3.25 1.85 \n", + "1 14.70 14.70 3.15 1.60 \n", + "2 15.04 15.04 3.00 1.70 \n", + "3 14.26 14.26 3.05 1.65 \n", + "4 18.00 18.00 3.20 1.90 \n", + ".. ... ... ... ... \n", + "145 20.10 20.10 5.95 2.65 \n", + "146 15.75 15.75 5.65 2.20 \n", + "147 19.50 19.50 5.85 2.50 \n", + "148 21.08 21.08 5.80 2.85 \n", + "149 17.70 17.70 5.50 2.40 \n", + "\n", + "[150 rows x 9 columns]" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.sort_index(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "e7415d71-b01f-48a3-8e24-1ad32ba4f664", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspeciessepal_volumesepal_volume_2avg_lengthavg_width
1495.93.05.11.8virginica17.7017.705.502.40
1486.23.45.42.3virginica21.0821.085.802.85
1476.53.05.22.0virginica19.5019.505.852.50
1466.32.55.01.9virginica15.7515.755.652.20
1456.73.05.22.3virginica20.1020.105.952.65
..............................
45.03.61.40.2setosa18.0018.003.201.90
34.63.11.50.2setosa14.2614.263.051.65
24.73.21.30.2setosa15.0415.043.001.70
14.93.01.40.2setosa14.7014.703.151.60
05.13.51.40.2setosa17.8517.853.251.85
\n", + "

150 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species \\\n", + "149 5.9 3.0 5.1 1.8 virginica \n", + "148 6.2 3.4 5.4 2.3 virginica \n", + "147 6.5 3.0 5.2 2.0 virginica \n", + "146 6.3 2.5 5.0 1.9 virginica \n", + "145 6.7 3.0 5.2 2.3 virginica \n", + ".. ... ... ... ... ... \n", + "4 5.0 3.6 1.4 0.2 setosa \n", + "3 4.6 3.1 1.5 0.2 setosa \n", + "2 4.7 3.2 1.3 0.2 setosa \n", + "1 4.9 3.0 1.4 0.2 setosa \n", + "0 5.1 3.5 1.4 0.2 setosa \n", + "\n", + " sepal_volume sepal_volume_2 avg_length avg_width \n", + "149 17.70 17.70 5.50 2.40 \n", + "148 21.08 21.08 5.80 2.85 \n", + "147 19.50 19.50 5.85 2.50 \n", + "146 15.75 15.75 5.65 2.20 \n", + "145 20.10 20.10 5.95 2.65 \n", + ".. ... ... ... ... \n", + "4 18.00 18.00 3.20 1.90 \n", + "3 14.26 14.26 3.05 1.65 \n", + "2 15.04 15.04 3.00 1.70 \n", + "1 14.70 14.70 3.15 1.60 \n", + "0 17.85 17.85 3.25 1.85 \n", + "\n", + "[150 rows x 9 columns]" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.sort_index(axis=0, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "id": "22c70953-65d0-469a-8991-4cd622fa4e40", + "metadata": {}, + "source": [ + "## Practice exercises" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13003483-345a-4c58-9efb-c9babc5e3491", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/_sources/chapters/module-4/046-PandasIII-Merging_Concatenating_Aggregating.ipynb b/_sources/chapters/module-4/046-PandasIII-Merging_Concatenating_Aggregating.ipynb new file mode 100644 index 0000000..6aac98d --- /dev/null +++ b/_sources/chapters/module-4/046-PandasIII-Merging_Concatenating_Aggregating.ipynb @@ -0,0 +1,811 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 8, + "id": "ca3e52c1-205a-4b79-a122-ca6de7694f08", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8d5c18d4-14ae-4298-bfe5-f36d6ebbfa7d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
..................
1456.73.05.22.3virginica
1466.32.55.01.9virginica
1476.53.05.22.0virginica
1486.23.45.42.3virginica
1495.93.05.11.8virginica
\n", + "

150 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "1 4.9 3.0 1.4 0.2 setosa\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "3 4.6 3.1 1.5 0.2 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa\n", + ".. ... ... ... ... ...\n", + "145 6.7 3.0 5.2 2.3 virginica\n", + "146 6.3 2.5 5.0 1.9 virginica\n", + "147 6.5 3.0 5.2 2.0 virginica\n", + "148 6.2 3.4 5.4 2.3 virginica\n", + "149 5.9 3.0 5.1 1.8 virginica\n", + "\n", + "[150 rows x 5 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df = pd.read_csv(\"https://raw.githubusercontent.com/mwaskom/seaborn-data/refs/heads/master/iris.csv\")\n", + "iris_df" + ] + }, + { + "cell_type": "markdown", + "id": "383c6fe5-50d7-4b20-b761-cbe3db8c47fe", + "metadata": {}, + "source": [ + "## Concatenating and Merging\n", + "\n", + "### `pd.concat()` \n", + "\n", + "Concatenate pandas objects along an axis\n", + "\n", + "[Details](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.concat.html)" + ] + }, + { + "cell_type": "markdown", + "id": "23e8b17e-adab-4594-a8ad-2b72ad72eae0", + "metadata": {}, + "source": [ + "Create two dfs and vertically stack them" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "869e9f26-9576-4128-a6ab-f4bdb13cd8ed", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0 1 2 3\n", + "0 0.442947 -0.617434 0.401841 -0.990547\n", + "1 0.404872 -0.729196 0.834374 -1.633626\n", + "2 -0.727989 -0.455244 -0.107535 0.788234\n", + "---------------------------------------------\n", + " 0 1 2 3\n", + "0 1.887038 0.631577 -0.373963 -0.239185\n", + "1 0.810859 0.454026 -0.796657 0.866273\n", + "2 2.243792 -0.983704 -0.527390 0.155886\n", + "---------------------------------------------\n", + " 0 1 2 3\n", + "0 0.442947 -0.617434 0.401841 -0.990547\n", + "1 0.404872 -0.729196 0.834374 -1.633626\n", + "2 -0.727989 -0.455244 -0.107535 0.788234\n", + "0 1.887038 0.631577 -0.373963 -0.239185\n", + "1 0.810859 0.454026 -0.796657 0.866273\n", + "2 2.243792 -0.983704 -0.527390 0.155886\n" + ] + } + ], + "source": [ + "df1 = pd.DataFrame(np.random.randn(3, 4))\n", + "df2 = pd.DataFrame(np.random.randn(3, 4))\n", + "\n", + "print(df1)\n", + "print('-'*45)\n", + "print(df2)\n", + "\n", + "df3 = pd.concat([df1, df2], axis=0)\n", + "\n", + "print('-'*45)\n", + "print(df3)" + ] + }, + { + "cell_type": "markdown", + "id": "dff68262-90ba-4e21-9107-1695388d51f9", + "metadata": {}, + "source": [ + "**Concat columns** \n", + "This assumes that the indexes represent IDs of specific things or events" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "c6a7e550-9972-47ea-b271-32a490dcb5ff", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
foobar
01230123
00.442947-0.6174340.401841-0.9905471.8870380.631577-0.373963-0.239185
10.404872-0.7291960.834374-1.6336260.8108590.454026-0.7966570.866273
2-0.727989-0.455244-0.1075350.7882342.243792-0.983704-0.5273900.155886
\n", + "
" + ], + "text/plain": [ + " foo bar \\\n", + " 0 1 2 3 0 1 2 \n", + "0 0.442947 -0.617434 0.401841 -0.990547 1.887038 0.631577 -0.373963 \n", + "1 0.404872 -0.729196 0.834374 -1.633626 0.810859 0.454026 -0.796657 \n", + "2 -0.727989 -0.455244 -0.107535 0.788234 2.243792 -0.983704 -0.527390 \n", + "\n", + " \n", + " 3 \n", + "0 -0.239185 \n", + "1 0.866273 \n", + "2 0.155886 " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df4 = pd.concat([df1,df2], axis = 1, keys = ['foo', 'bar'])\n", + "\n", + "df4" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "a5f24ba4-0f83-437d-94a6-53167ddff3be", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123
00.442947-0.6174340.401841-0.990547
10.404872-0.7291960.834374-1.633626
2-0.727989-0.455244-0.1075350.788234
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3\n", + "0 0.442947 -0.617434 0.401841 -0.990547\n", + "1 0.404872 -0.729196 0.834374 -1.633626\n", + "2 -0.727989 -0.455244 -0.107535 0.788234" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df4.foo" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d954fa94-80b4-41f1-835e-cec68a473599", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123
01.8870380.631577-0.373963-0.239185
10.8108590.454026-0.7966570.866273
22.243792-0.983704-0.5273900.155886
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3\n", + "0 1.887038 0.631577 -0.373963 -0.239185\n", + "1 0.810859 0.454026 -0.796657 0.866273\n", + "2 2.243792 -0.983704 -0.527390 0.155886" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df4.bar" + ] + }, + { + "cell_type": "markdown", + "id": "f942b10c-0cde-4adb-a1e2-195144c6e169", + "metadata": {}, + "source": [ + "### `merge()`\n", + "\n", + "SQL-style joining of tables (DataFrames)\n", + "\n", + "Important parameters include:\n", + "\n", + "- `how` : type of merge {'left', 'right', 'outer', 'inner', 'cross'}, default ‘inner’\n", + "- `on` : names to join on\n", + " \n", + "[Details](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.merge.html)" + ] + }, + { + "cell_type": "markdown", + "id": "df302e38-6caf-40dd-a2b9-ec2efa00917a", + "metadata": {}, + "source": [ + "**Very useful!**" + ] + }, + { + "cell_type": "markdown", + "id": "dfbdb7ee-aafd-4ff8-bc09-6da066178f15", + "metadata": {}, + "source": [ + "Create two tables, `left` and `right`. Then right join them on `key`. \n", + "Right join means include all records from table on right. \n", + "The `key` is used for matching up the records." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "77899cbc-dc68-411e-8ff2-69d2db87c9ba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---left\n", + " key lval\n", + "0 jamie 15\n", + "1 bill 22\n", + "\n", + "---right\n", + " key rval\n", + "0 jamie 4\n", + "1 bill 5\n", + "2 asher 8\n", + "\n", + "---joined\n", + " key lval rval\n", + "0 jamie 15.0 4\n", + "1 bill 22.0 5\n", + "2 asher NaN 8\n" + ] + } + ], + "source": [ + "left = pd.DataFrame({\"key\": [\"jamie\", \"bill\"], \"lval\": [15, 22]})\n", + "right = pd.DataFrame({\"key\": [\"jamie\", \"bill\", \"asher\"], \"rval\": [4, 5, 8]})\n", + "\n", + "joined = pd.merge(left, right, on=\"key\", how=\"right\")\n", + "\n", + "print('---left')\n", + "print(left)\n", + "print('\\n---right')\n", + "print(right)\n", + "print('\\n---joined')\n", + "print(joined)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "956199c0-ce5a-44e2-a0a2-89b33899d33d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "0eb6b71a-25f1-44f1-a4af-ce6377732756", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "* Use **join** if you have shared indexes\n", + "* Use **merge** if you do not have shared indexes\n", + "* Use **concat** to combine based on shared indexes or columns" + ] + }, + { + "cell_type": "markdown", + "id": "08dd64e7-5ef2-43cf-9198-ff63dc38400c", + "metadata": {}, + "source": [ + "## Aggregation\n", + "\n", + "Involves one or more of:\n", + "\n", + "- splitting the data into groups\n", + "- applying a function to each group\n", + "- combining results" + ] + }, + { + "cell_type": "markdown", + "id": "cf1a6bc2-705f-44f9-8497-8a5fc53b948e", + "metadata": {}, + "source": [ + "### `.groupby()`\n", + "\n", + "Compute mean of each column, grouped (separately) by species" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9fff6ac6-bd68-46af-90d2-cb994becb5f8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_width
species
setosa5.0063.4281.4620.246
versicolor5.9362.7704.2601.326
virginica6.5882.9745.5522.026
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width\n", + "species \n", + "setosa 5.006 3.428 1.462 0.246\n", + "versicolor 5.936 2.770 4.260 1.326\n", + "virginica 6.588 2.974 5.552 2.026" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_df.groupby(\"species\").mean()" + ] + }, + { + "cell_type": "markdown", + "id": "c1f0f6a4-955a-45c0-bd8d-96948b8f04d4", + "metadata": {}, + "source": [ + "### `pd.pivot_table()`\n", + "\n", + "Apply a function `aggfunc` to selected values grouped by columns\n", + "\n", + "[Details](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.pivot_table.html)" + ] + }, + { + "cell_type": "markdown", + "id": "48857f21-842c-4655-887f-2cb6bf441b19", + "metadata": {}, + "source": [ + "Compute mean sepal length for each species:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "779c5fbd-fce1-4a41-8f34-1c0642feb70a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
speciessetosaversicolorvirginica
sepal_length5.0065.9366.588
\n", + "
" + ], + "text/plain": [ + "species setosa versicolor virginica\n", + "sepal_length 5.006 5.936 6.588" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.pivot_table(iris_df, values=\"sepal_length\", columns=[\"species\"], aggfunc = np.mean)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59236d2c-750e-42b4-a123-52a762411615", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/chapters/01-getting_started.html b/chapters/01-getting_started.html index fff861a..1112f35 100644 --- a/chapters/01-getting_started.html +++ b/chapters/01-getting_started.html @@ -208,7 +208,8 @@
diff --git a/chapters/02-python-basics.html b/chapters/02-python-basics.html index b69928b..16d0b12 100644 --- a/chapters/02-python-basics.html +++ b/chapters/02-python-basics.html @@ -208,7 +208,8 @@ diff --git a/chapters/04-python-basics.html b/chapters/04-python-basics.html index 63f9f0d..73e6e86 100644 --- a/chapters/04-python-basics.html +++ b/chapters/04-python-basics.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-1/012-intro_python (copia).html b/chapters/module-1/012-intro_python (copia).html index ab394c7..58a1f31 100644 --- a/chapters/module-1/012-intro_python (copia).html +++ b/chapters/module-1/012-intro_python (copia).html @@ -208,7 +208,8 @@ diff --git a/chapters/module-1/012-intro_python.html b/chapters/module-1/012-intro_python.html index 78aadff..a090768 100644 --- a/chapters/module-1/012-intro_python.html +++ b/chapters/module-1/012-intro_python.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-1/013-intro_R.html b/chapters/module-1/013-intro_R.html index 508d142..6cac5c8 100644 --- a/chapters/module-1/013-intro_R.html +++ b/chapters/module-1/013-intro_R.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-1/Practice.html b/chapters/module-1/Practice.html index 6c88945..775c938 100644 --- a/chapters/module-1/Practice.html +++ b/chapters/module-1/Practice.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-1/about_course.html b/chapters/module-1/about_course.html index 43397f4..cd6f559 100644 --- a/chapters/module-1/about_course.html +++ b/chapters/module-1/about_course.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-1/jupyter_notebooks.html b/chapters/module-1/jupyter_notebooks.html index e185f79..d431345 100644 --- a/chapters/module-1/jupyter_notebooks.html +++ b/chapters/module-1/jupyter_notebooks.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-1/programming.html b/chapters/module-1/programming.html index 529703f..3b639d4 100644 --- a/chapters/module-1/programming.html +++ b/chapters/module-1/programming.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-1/tech_stack.html b/chapters/module-1/tech_stack.html index 760a6e8..c5c9ac4 100644 --- a/chapters/module-1/tech_stack.html +++ b/chapters/module-1/tech_stack.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-1/your_first_program.html b/chapters/module-1/your_first_program.html index 36776e7..f6545a5 100644 --- a/chapters/module-1/your_first_program.html +++ b/chapters/module-1/your_first_program.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-2/02-cover.html b/chapters/module-2/02-cover.html index 3bf13d1..203b94c 100644 --- a/chapters/module-2/02-cover.html +++ b/chapters/module-2/02-cover.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-2/021-variables.html b/chapters/module-2/021-variables.html index f7667f2..26e9f51 100644 --- a/chapters/module-2/021-variables.html +++ b/chapters/module-2/021-variables.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-2/022-operators.html b/chapters/module-2/022-operators.html index 5000f57..d5171e2 100644 --- a/chapters/module-2/022-operators.html +++ b/chapters/module-2/022-operators.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-2/023-strings.html b/chapters/module-2/023-strings.html index 0e67b77..f9f49b8 100644 --- a/chapters/module-2/023-strings.html +++ b/chapters/module-2/023-strings.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-2/024-structures.html b/chapters/module-2/024-structures.html index abf0c3d..b504c8d 100644 --- a/chapters/module-2/024-structures.html +++ b/chapters/module-2/024-structures.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-2/0241-structures_exercises.html b/chapters/module-2/0241-structures_exercises.html index db9e446..25bf667 100644 --- a/chapters/module-2/0241-structures_exercises.html +++ b/chapters/module-2/0241-structures_exercises.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-2/025-conditional.html b/chapters/module-2/025-conditional.html index 935a5a4..e775456 100644 --- a/chapters/module-2/025-conditional.html +++ b/chapters/module-2/025-conditional.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-2/0251-conditional_exercises.html b/chapters/module-2/0251-conditional_exercises.html index 755bd92..cecd267 100644 --- a/chapters/module-2/0251-conditional_exercises.html +++ b/chapters/module-2/0251-conditional_exercises.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-2/026-iterables_and_iterators.html b/chapters/module-2/026-iterables_and_iterators.html index 8bf6ca6..d2ff0f2 100644 --- a/chapters/module-2/026-iterables_and_iterators.html +++ b/chapters/module-2/026-iterables_and_iterators.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-2/0261-functions_exercises.html b/chapters/module-2/0261-functions_exercises.html index 5a85529..7e8de7a 100644 --- a/chapters/module-2/0261-functions_exercises.html +++ b/chapters/module-2/0261-functions_exercises.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-2/027-functions.html b/chapters/module-2/027-functions.html index 30fe71c..e4a69c7 100644 --- a/chapters/module-2/027-functions.html +++ b/chapters/module-2/027-functions.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-3/029-packages.html b/chapters/module-3/029-packages.html index 565163f..0b337c6 100644 --- a/chapters/module-3/029-packages.html +++ b/chapters/module-3/029-packages.html @@ -208,7 +208,8 @@ @@ -634,9 +635,7 @@

Images are Numerical Data
Requirement already satisfied: scikit-image in /home/javi/anaconda3/lib/python3.11/site-packages (0.20.0)
 Requirement already satisfied: matplotlib in /home/javi/anaconda3/lib/python3.11/site-packages (3.7.1)
-
-
-
diff --git a/chapters/module-3/031-errors_and_exceptions_w_sols.html b/chapters/module-3/031-errors_and_exceptions_w_sols.html index 9482d03..7cdf663 100644 --- a/chapters/module-3/031-errors_and_exceptions_w_sols.html +++ b/chapters/module-3/031-errors_and_exceptions_w_sols.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-3/032-classes.html b/chapters/module-3/032-classes.html index 098e8a4..a3f8838 100644 --- a/chapters/module-3/032-classes.html +++ b/chapters/module-3/032-classes.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-3/033-reading_writing_files.html b/chapters/module-3/033-reading_writing_files.html index 0be8ded..256fd39 100644 --- a/chapters/module-3/033-reading_writing_files.html +++ b/chapters/module-3/033-reading_writing_files.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-3/lab-recursion.html b/chapters/module-3/lab-recursion.html index 8eadd90..27e2e68 100644 --- a/chapters/module-3/lab-recursion.html +++ b/chapters/module-3/lab-recursion.html @@ -208,7 +208,8 @@ diff --git a/chapters/module-4/041-numpyI.html b/chapters/module-4/041-numpyI.html index f339712..b620b27 100644 --- a/chapters/module-4/041-numpyI.html +++ b/chapters/module-4/041-numpyI.html @@ -208,7 +208,8 @@ @@ -482,7 +483,7 @@

The ndarray object -
-1.5761378385558922 <class 'float'>
+
-0.24326299558377007 <class 'float'>
 
@@ -496,8 +497,8 @@

The ndarray object -
[[-0.51627652 -0.22615495 -1.60086561]
- [ 0.42263309 -0.57286585 -1.38456337]] <class 'numpy.ndarray'>
+
[[-0.54691547  0.73067779  0.02729858]
+ [-0.28328225  1.11208023  0.74172046]] <class 'numpy.ndarray'>
 
@@ -510,8 +511,8 @@

The ndarray object -
array([[ -5.16276516,  -2.26154948, -16.00865612],
-       [  4.22633085,  -5.72865848, -13.8456337 ]])
+
array([[-5.46915473,  7.30677793,  0.27298579],
+       [-2.83282247, 11.1208023 ,  7.41720455]])
 
@@ -526,10 +527,10 @@

The ndarray object -
[[-1.03255303 -0.4523099  -3.20173122]
- [ 0.84526617 -1.1457317  -2.76912674]]
-[[-1.03255303 -0.4523099  -3.20173122]
- [ 0.84526617 -1.1457317  -2.76912674]]
+
[[-1.09383095  1.46135559  0.05459716]
+ [-0.56656449  2.22416046  1.48344091]]
+[[-1.09383095  1.46135559  0.05459716]
+ [-0.56656449  2.22416046  1.48344091]]
 
@@ -557,9 +558,9 @@

The ndarray object -
[[-0.51627652 -0.22615495]
- [-1.60086561  0.42263309]
- [-0.57286585 -1.38456337]]
+
[[-0.54691547  0.73067779]
+ [ 0.02729858 -0.28328225]
+ [ 1.11208023  0.74172046]]
 (3, 2)
 
@@ -846,11 +847,11 @@

Creating ndarrays - @@ -818,13 +819,13 @@

Boolean slicing
['Bob' 'Joe' 'Will' 'Bob' 'Will' 'Joe' 'Joe']
-[[-1.96348811  0.38917398 -0.55350267 -0.34562291]
- [-0.51174724 -0.45391605 -0.02121473 -0.18177089]
- [-0.21537611 -0.50449883 -1.23998849  0.26507934]
- [-0.05670664 -0.82630603 -1.11246125  1.34743931]
- [-1.29738587  0.37546207  0.34822322 -0.48115035]
- [ 0.99217197  0.39639181  0.3301427   0.02685344]
- [-3.25715276 -0.75724747  2.13559351 -0.61480613]]
+[[ 0.92955365  0.66158729 -0.39141742 -1.20167438]
+ [-0.7099215  -1.47968728  2.44189276  0.29293041]
+ [ 1.64752804 -0.42051231 -0.33339607  0.11959951]
+ [ 0.91276971 -0.75328513  1.43879725  0.21027777]
+ [ 2.36320733 -0.03742102  0.07586576  0.38709055]
+ [ 1.85886751 -0.86425065  1.12457198  0.39906275]
+ [-0.94769757  0.35331011  2.08108815  0.2433657 ]]
 

@@ -863,8 +864,8 @@

Boolean slicing -
array([[-1.96348811,  0.38917398, -0.55350267, -0.34562291],
-       [-0.05670664, -0.82630603, -1.11246125,  1.34743931]])
+
array([[ 0.92955365,  0.66158729, -0.39141742, -1.20167438],
+       [ 0.91276971, -0.75328513,  1.43879725,  0.21027777]])
 
@@ -877,8 +878,8 @@

Boolean slicing -
array([[-0.55350267, -0.34562291],
-       [-1.11246125,  1.34743931]])
+
array([[-0.39141742, -1.20167438],
+       [ 1.43879725,  0.21027777]])
 
@@ -893,11 +894,11 @@

Boolean slicing -
array([[-0.51174724, -0.45391605, -0.02121473, -0.18177089],
-       [-0.21537611, -0.50449883, -1.23998849,  0.26507934],
-       [-1.29738587,  0.37546207,  0.34822322, -0.48115035],
-       [ 0.99217197,  0.39639181,  0.3301427 ,  0.02685344],
-       [-3.25715276, -0.75724747,  2.13559351, -0.61480613]])
+
array([[-0.7099215 , -1.47968728,  2.44189276,  0.29293041],
+       [ 1.64752804, -0.42051231, -0.33339607,  0.11959951],
+       [ 2.36320733, -0.03742102,  0.07586576,  0.38709055],
+       [ 1.85886751, -0.86425065,  1.12457198,  0.39906275],
+       [-0.94769757,  0.35331011,  2.08108815,  0.2433657 ]])
 
@@ -910,11 +911,11 @@

Boolean slicing - @@ -948,12 +949,12 @@

Boolean slicing
array([[ 7.        ,  7.        ,  7.        ,  7.        ],
-       [-0.51174724, -0.45391605, -0.02121473, -0.18177089],
+       [-0.7099215 , -1.47968728,  2.44189276,  0.29293041],
        [ 7.        ,  7.        ,  7.        ,  7.        ],
        [ 7.        ,  7.        ,  7.        ,  7.        ],
        [ 7.        ,  7.        ,  7.        ,  7.        ],
-       [ 0.99217197,  0.39639181,  0.3301427 ,  0.02685344],
-       [-3.25715276, -0.75724747,  2.13559351, -0.61480613]])
+       [ 1.85886751, -0.86425065,  1.12457198,  0.39906275],
+       [-0.94769757,  0.35331011,  2.08108815,  0.2433657 ]])
 

@@ -1305,8 +1306,8 @@

More useful calculations -
0x7f6ddd948f90
-0x7f6ddd91ff30
+
0x7fc546428810
+0x7fc546428d50
 
@@ -1585,7 +1586,7 @@

More useful calculations -
/tmp/ipykernel_105905/1198364157.py:1: RuntimeWarning: invalid value encountered in sqrt
+
/tmp/ipykernel_14437/1198364157.py:1: RuntimeWarning: invalid value encountered in sqrt
   np.sqrt(np.array([4, -3, 16, 9, -5]))
 
@@ -1739,7 +1740,7 @@

Practice exercises

next

-

Introduction to pandas

+

Introduction to Pandas

diff --git a/chapters/module-4/043-PandasI-Introduction.html b/chapters/module-4/043-PandasI-Introduction.html index 58fb7b5..219a546 100644 --- a/chapters/module-4/043-PandasI-Introduction.html +++ b/chapters/module-4/043-PandasI-Introduction.html @@ -32,9 +32,9 @@ - + - + @@ -61,6 +61,7 @@ + @@ -208,6 +209,7 @@
  • NumPy (Part I)
  • NumPy (Part II)
  • Introduction to Pandas
  • +
  • Pandas: Data Exploration
  • @@ -1461,7 +1463,7 @@

    An introduction to some attributes and methods - + +
    +

    next

    +

    Pandas: Data Exploration

    +
    + +

    diff --git a/chapters/module-4/044-PandasII-Exploration_and_Manipulation.html b/chapters/module-4/044-PandasII-Exploration_and_Manipulation-Copy1.html similarity index 75% rename from chapters/module-4/044-PandasII-Exploration_and_Manipulation.html rename to chapters/module-4/044-PandasII-Exploration_and_Manipulation-Copy1.html index ff6d8d3..e419cde 100644 --- a/chapters/module-4/044-PandasII-Exploration_and_Manipulation.html +++ b/chapters/module-4/044-PandasII-Exploration_and_Manipulation-Copy1.html @@ -8,7 +8,7 @@ - PandasII: Exploration and Manipulation — DS-1002 Programming for Data Science + PandasII: Exploration — DS-1002 Programming for Data Science @@ -60,7 +60,7 @@ - + @@ -210,7 +210,8 @@

    @@ -265,7 +266,7 @@ -
  • -

    PandasII: Exploration and Manipulation

    +

    PandasII: Exploration

    @@ -364,23 +365,22 @@

    Contents

  • Summarizing data
  • -
  • Selection and Indexing
  • -

    Select all rows, specific columns

    - -
    -

    Boolean Filtering#

    -

    It’s very common to subset a dataframe based on some condition on the data.

    -

    🔑 Note that even though we are filtering rows, we are not using .loc[] or .iloc[] here.

    -

    Pandas knows what to do if you pass a boolean structure.

    -
    -
    -
    iris_df.sepal_length >= 7.5
    -
    -
    -
    -
    -
    0      False
    -1      False
    -2      False
    -3      False
    -4      False
    -       ...  
    -145    False
    -146    False
    -147    False
    -148    False
    -149    False
    -Name: sepal_length, Length: 150, dtype: bool
    -
    -
    -
    -
    +
      +
    • Using the method drop() to drop one or more columns by specifying axis argument equal 1:

    • +
    -
    iris_df.loc[iris_df.sepal_length >= 7.5,:]
    +
    # Here we drop columns
    +iris_df_drop = iris_df_drop.drop(['sepal_volume', 'species'], axis=1)
    +iris_df_drop
     
    @@ -1903,65 +2148,97 @@

    Boolean Filteringdrop() method you can also drop specific observations by setting axis=0

    -
    iris_df.loc[(iris_df['sepal_length' ]>= 4.5) & (iris_df['sepal_length'] <= 4.7),:]
    +
    # Now a particular observation
    +iris_df_drop = iris_df_drop.drop([0], axis=0)
    +iris_df_drop
     
    @@ -1988,17 +2265,22 @@

    Boolean Filtering -

    Masking#

    -

    Here’s an example of masking using boolean conditions passed to the dataframe selector:

    -

    Here are the values for the feature sepal length:

    +
    +

    Working with the dataframe as a whole#

    +
    +

    iloc[]: Selection by index#

    +

    We can use iloc[] to extract rows and columns using indexes.

    -
    iris_df.sepal_length.values
    +
    # This fetches row 3, and all columns:
    +iris_df.iloc[2]
     
    -
    array([5.1, 4.9, 4.7, 4.6, 5. , 5.4, 4.6, 5. , 4.4, 4.9, 5.4, 4.8, 4.8,
    -       4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5. ,
    -       5. , 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5. , 5.5, 4.9, 4.4,
    -       5.1, 5. , 4.5, 4.4, 5. , 5.1, 4.8, 5.1, 4.6, 5.3, 5. , 7. , 6.4,
    -       6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5. , 5.9, 6. , 6.1, 5.6,
    -       6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7,
    -       6. , 5.7, 5.5, 5.5, 5.8, 6. , 5.4, 6. , 6.7, 6.3, 5.6, 5.5, 5.5,
    -       6.1, 5.8, 5. , 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3,
    -       6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5,
    -       7.7, 7.7, 6. , 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2,
    -       7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6. , 6.9, 6.7, 6.9, 5.8,
    -       6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9])
    +
    sepal_length         4.7
    +sepal_width          3.2
    +petal_length         1.3
    +petal_width          0.2
    +species           setosa
    +sepal_volume       15.04
    +sepal_volume_2     15.04
    +Name: 2, dtype: object
     
    -

    And here are the boolean values generated by applying a comparison operator to those values:

    -
    mask = iris_df.sepal_length >= 7.5
    -
    -
    -
    -
    -
    -
    -
    mask.values
    -
    -
    -
    -
    -
    array([False, False, False, False, False, False, False, False, False,
    -       False, False, False, False, False, False, False, False, False,
    -       False, False, False, False, False, False, False, False, False,
    -       False, False, False, False, False, False, False, False, False,
    -       False, False, False, False, False, False, False, False, False,
    -       False, False, False, False, False, False, False, False, False,
    -       False, False, False, False, False, False, False, False, False,
    -       False, False, False, False, False, False, False, False, False,
    -       False, False, False, False, False, False, False, False, False,
    -       False, False, False, False, False, False, False, False, False,
    -       False, False, False, False, False, False, False, False, False,
    -       False, False, False, False, False, False,  True, False, False,
    -       False, False, False, False, False, False, False, False, False,
    -        True,  True, False, False, False,  True, False, False, False,
    -       False, False, False, False, False,  True, False, False, False,
    -        True, False, False, False, False, False, False, False, False,
    -       False, False, False, False, False, False])
    -
    -
    -
    -
    -

    The two sets of values have the same shape.

    -

    We can now overlay the logical values over the numeric ones and keep only what is True:

    -
    -
    -
    iris_df.sepal_length[mask].values
    +
    # Similar to
    +iris_df.iloc[2, :]
     
    -
    array([7.6, 7.7, 7.7, 7.7, 7.9, 7.7])
    +
    sepal_length         4.7
    +sepal_width          3.2
    +petal_length         1.3
    +petal_width          0.2
    +species           setosa
    +sepal_volume       15.04
    +sepal_volume_2     15.04
    +Name: 2, dtype: object
     
    -
    -
    -

    Sorting and Ranking#

    -

    .sort_values()

    -

    Sort by values

    -
      -
    • by parameter takes string or list of strings

    • -
    • ascending takes True or False

    • -
    • inplace will save sorted values into the df

    • -
    -

    Details

    +

    fetch rows with indices 1,2 (the right endpoint is exclusive), and all columns.

    -
    iris_df.sort_values(by=['sepal_length','petal_width'])
    +
    iris_df.iloc[1:3]
     
    @@ -2172,109 +2428,39 @@

    Sorting and Ranking -

    .sort_index()#

    -

    Sort by index. Example sorts by descending index

    +

    fetch rows with indices 1,2 and first three columns (positions 0, 1, 2)

    -
    iris_df.sort_index(axis=0, ascending=False)
    +
    iris_df.iloc[1:3, 0:3]
     
    @@ -2300,128 +2486,48 @@

    .sort_index()sepal_length sepal_width petal_length - petal_width - species - 149 - 5.9 - 3.0 - 5.1 - 1.8 - virginica - - - 148 - 6.2 - 3.4 - 5.4 - 2.3 - virginica - - - 147 - 6.5 - 3.0 - 5.2 - 2.0 - virginica - - - 146 - 6.3 - 2.5 - 5.0 - 1.9 - virginica - - - 145 - 6.7 + 1 + 4.9 3.0 - 5.2 - 2.3 - virginica - - - ... - ... - ... - ... - ... - ... - - - 4 - 5.0 - 3.6 1.4 - 0.2 - setosa - - - 3 - 4.6 - 3.1 - 1.5 - 0.2 - setosa 2 4.7 3.2 1.3 - 0.2 - setosa - - - 1 - 4.9 - 3.0 - 1.4 - 0.2 - setosa - - - 0 - 5.1 - 3.5 - 1.4 - 0.2 - setosa -

    150 rows × 5 columns

    -
    -
    -

    Dealing with Missing Data#

    -

    Pandas primarily uses the data type np.nan from NumPy to represent missing data.

    +

    You can apply slices to column names too. You don’t need .iloc[] here.

    -
    import numpy as np
    +
    iris_df.columns[0:3]
     
    -
    -
    -
    -
    df_miss = pd.DataFrame({
    -    'x':[2, np.nan, 1], 
    -    'y':[np.nan, np.nan, 6]}
    -)
    +
    +
    Index(['sepal_length', 'sepal_width', 'petal_length'], dtype='object')
     
    +
    +
    +

    loc[]: Selection by label#

    +

    We can select by row and column labels using .loc[].

    +

    Here we ask for rows with labels (indexes) 1-3, and it gives exactly that
    +.iloc[] returned rows with indices 1,2.

    +

    Author note: This is by far the more useful of the two in my experience.

    -

    -
    -

    .dropna()#

    -

    This will drop all rows with missing data in any column.

    -

    Details

    +

    Subset on columns with column name (as a string) or list of strings

    -
    df_drop_all = df_miss.dropna()
    -df_drop_all
    +
    iris_df.loc[1:3, ['sepal_length','petal_width']]
     
    @@ -2499,25 +2620,34 @@

    .dropna() - x - y + sepal_length + petal_width + + 1 + 4.9 + 0.2 + 2 - 1.0 - 6.0 + 4.7 + 0.2 + + + 3 + 4.6 + 0.2

    -

    The subset parameter takes a list of column names to specify which columns should have missing values.

    +

    Select all rows, specific columns

    -
    df_drop_x = df_miss.dropna(subset=['x'])
    -df_drop_x
    +
    iris_df.loc[:, ['sepal_length','petal_width']]
     
    @@ -2540,41 +2670,102 @@

    .dropna() - x - y + sepal_length + petal_width 0 - 2.0 - NaN + 5.1 + 0.2 + + + 1 + 4.9 + 0.2 2 - 1.0 - 6.0 + 4.7 + 0.2 + + + 3 + 4.6 + 0.2 + + + 4 + 5.0 + 0.2 + + + ... + ... + ... + + + 145 + 6.7 + 2.3 + + + 146 + 6.3 + 1.9 + + + 147 + 6.5 + 2.0 + + + 148 + 6.2 + 2.3 + + + 149 + 5.9 + 1.8 +

    150 rows × 2 columns

    -
    -

    .fillna()#

    -

    This will replace missing values with whatever you set it to, e.g. \(0\)s.

    -

    Details

    -

    We can pass the results of an operation – for example to peform simple imputation, we can replace missing values in each column with the median value of the respective column:

    +
    +

    Boolean Filtering#

    +

    It’s very common to subset a dataframe based on some condition on the data.

    +

    Pandas knows what to do if you pass a boolean structure.

    -
    df_filled = df_miss.fillna(df_miss.median())
    +
    iris_df.sepal_length >= 7.5
    +
    +
    +
    +
    +
    0      False
    +1      False
    +2      False
    +3      False
    +4      False
    +       ...  
    +145    False
    +146    False
    +147    False
    +148    False
    +149    False
    +Name: sepal_length, Length: 150, dtype: bool
     
    -
    df_filled
    +
    iris_df.loc[iris_df.sepal_length >= 7.5,:]
     
    @@ -2597,165 +2788,284 @@

    .fillna() - x - y + sepal_length + sepal_width + petal_length + petal_width + species + sepal_volume + sepal_volume_2 - 0 + 105 + 7.6 + 3.0 + 6.6 + 2.1 + virginica + 22.80 + 22.80 + + + 117 + 7.7 + 3.8 + 6.7 + 2.2 + virginica + 29.26 + 29.26 + + + 118 + 7.7 + 2.6 + 6.9 + 2.3 + virginica + 20.02 + 20.02 + + + 122 + 7.7 + 2.8 + 6.7 2.0 - 6.0 + virginica + 21.56 + 21.56 - 1 - 1.5 - 6.0 + 131 + 7.9 + 3.8 + 6.4 + 2.0 + virginica + 30.02 + 30.02 - 2 - 1.0 - 6.0 + 135 + 7.7 + 3.0 + 6.1 + 2.3 + virginica + 23.10 + 23.10

    -
    -
    -

    Column selection, addition, deletion#

    -
    -

    Selection#

    -

    Use bracket notation or dot notation.

    -
      -
    • bracket notation: variable name must be a string

    • -
    -
    df['y'], type(df['y'])
    +
    iris_df.loc[(iris_df['sepal_length' ]>= 4.5) & (iris_df['sepal_length'] <= 4.7),:]
     
    -
    ---------------------------------------------------------------------------
    -NameError                                 Traceback (most recent call last)
    -Cell In[45], line 1
    -----> 1 df['y'], type(df['y'])
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    sepal_lengthsepal_widthpetal_lengthpetal_widthspeciessepal_volumesepal_volume_2
    24.73.21.30.2setosa15.0415.04
    34.63.11.50.2setosa14.2614.26
    64.63.41.40.3setosa15.6415.64
    224.63.61.00.2setosa16.5616.56
    294.73.21.60.2setosa15.0415.04
    414.52.31.30.3setosa10.3510.35
    474.63.21.40.2setosa14.7214.72
    +
    +
    +
    +
    +
    +

    Masking#

    +

    Here’s an example of masking using boolean conditions passed to the dataframe selector:

    +

    Here are the values for the feature sepal length:

    +
    +
    +
    iris_df.sepal_length.values
     
    +
    +
    array([5.1, 4.9, 4.7, 4.6, 5. , 5.4, 4.6, 5. , 4.4, 4.9, 5.4, 4.8, 4.8,
    +       4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5. ,
    +       5. , 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5. , 5.5, 4.9, 4.4,
    +       5.1, 5. , 4.5, 4.4, 5. , 5.1, 4.8, 5.1, 4.6, 5.3, 5. , 7. , 6.4,
    +       6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5. , 5.9, 6. , 6.1, 5.6,
    +       6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7,
    +       6. , 5.7, 5.5, 5.5, 5.8, 6. , 5.4, 6. , 6.7, 6.3, 5.6, 5.5, 5.5,
    +       6.1, 5.8, 5. , 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3,
    +       6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5,
    +       7.7, 7.7, 6. , 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2,
    +       7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6. , 6.9, 6.7, 6.9, 5.8,
    +       6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9])
    +
    +
    +
    +

    And here are the boolean values generated by applying a comparison operator to those values:

    -
    # double bracket gives you the selected column as new dataframe
    -df[['x']], type(df[['x']])
    +
    mask = iris_df.sepal_length >= 7.5
    +
    +
    +
    +
    +
    +
    +
    mask.values
     
    -
    (      x
    - 0     0
    - 1     2
    - obs3  1
    - 3     5,
    - pandas.core.frame.DataFrame)
    +
    array([False, False, False, False, False, False, False, False, False,
    +       False, False, False, False, False, False, False, False, False,
    +       False, False, False, False, False, False, False, False, False,
    +       False, False, False, False, False, False, False, False, False,
    +       False, False, False, False, False, False, False, False, False,
    +       False, False, False, False, False, False, False, False, False,
    +       False, False, False, False, False, False, False, False, False,
    +       False, False, False, False, False, False, False, False, False,
    +       False, False, False, False, False, False, False, False, False,
    +       False, False, False, False, False, False, False, False, False,
    +       False, False, False, False, False, False, False, False, False,
    +       False, False, False, False, False, False,  True, False, False,
    +       False, False, False, False, False, False, False, False, False,
    +        True,  True, False, False, False,  True, False, False, False,
    +       False, False, False, False, False,  True, False, False, False,
    +        True, False, False, False, False, False, False, False, False,
    +       False, False, False, False, False, False])
     
    +

    The two sets of values have the same shape.

    +

    We can now overlay the logical values over the numeric ones and keep only what is True:

    -
    df[['y', 'x']]
    +
    iris_df.sepal_length[mask].values
    +
    +
    +
    +
    +
    array([7.6, 7.7, 7.7, 7.7, 7.9, 7.7])
    +
    +
    +
    +
    +
    +
    +

    Sorting#

    +
      +
    • sort_values(): Sorts dataframe by values. You can customize this sorting with the following parameters:

      +
        +
      • by parameter takes string or list of strings

      • +
      • ascending takes True or False

      • +
      • inplace will save sorted values into the df

      • +
      +
    • +
    +

    More details

    +

    + +
    +
    +
    iris_df.sort_values(by=['sepal_length','petal_width'], ascending=False)
    +
    +
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    sepal_lengthsepal_widthpetal_lengthpetal_widthspeciessepal_volumesepal_volume_2
    1317.93.86.42.0virginica30.0230.02
    1187.72.66.92.3virginica20.0220.02
    1357.73.06.12.3virginica23.1023.10
    1177.73.86.72.2virginica29.2629.26
    1227.72.86.72.0virginica21.5621.56
    ........................
    414.52.31.30.3setosa10.3510.35
    84.42.91.40.2setosa12.7612.76
    384.43.01.30.2setosa13.2013.20
    424.43.21.30.2setosa14.0814.08
    134.33.01.10.1setosa12.9012.90
    +

    150 rows × 7 columns

    +
    +
    +
      +
    • sort_index(): Sorts dataframe by index. You can customize this sorting with the following parameters:

      +
        +
      • axis along which to sort. The value 0 identifies the rows, and 1 identifies the columns.

      • +
      • ascending takes True or False

      • +
      • inplace will save sorted values into the df

      • +
      +
    • +
    +

    More details

    +
    +
    +
    iris_df.sort_index(axis=0)
    +
    +
    +
    +
    +
    + + + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + + + + + + + + - - - + + + + + + + + - - - + + + + + + + +
    sepal_lengthsepal_widthpetal_lengthpetal_widthspeciessepal_volumesepal_volume_2
    0105.13.51.40.2setosa17.8517.85
    14.93.01.40.2setosa14.7014.70
    24.73.21.30.2setosa15.0415.04
    34.63.11.50.2setosa14.2614.26
    45.03.61.40.2setosa18.0018.00
    ........................
    1456.73.05.22.3virginica20.1020.10
    1466.32.55.01.9virginica15.7515.75
    1121476.53.05.22.0virginica19.5019.50
    obs3011486.23.45.42.3virginica21.0821.08
    3051495.93.05.11.8virginica17.7017.70
    +

    150 rows × 7 columns

    - -
    -

    Addition#

    -

    It is typical to create a new column from existing columns.

    -

    In this example, a new column (or field) is created by summing x and y:

    -
    -
    -
    df['x_plus_y'] = df.x + df.y
    -
    -
    -
    -
    - -

    Note that:

    -
      -
    • The left side has form: DataFrame name, bracket notation, new column name

    • -
    • The assignment operator = is used

    • -
    • The right side contains an expression; here, two df columns are summed

    • -
    -

    Bracket notation also works on the fields, but it’s more typing:

    -
    -
    -
    df['x_plus_y'] = df['x'] + df['y']
    -df
    -
    -
    -
    -
    -
    - - - - - - - - - + + + + + + + + - - - - - - - + + + + + + + + - - - - - + + + + + + + + - - - - - + + + + + + + + - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    xyis_labelx_plus_y1466.32.55.01.9virginica15.7515.75
    001True11456.73.05.22.3virginica20.1020.10
    121False3........................
    obs310False145.03.61.40.2setosa18.0018.00
    350False54.63.11.50.2setosa14.2614.26
    24.73.21.30.2setosa15.0415.04
    14.93.01.40.2setosa14.7014.70
    05.13.51.40.2setosa17.8517.85
    +

    150 rows × 7 columns

    -

    The bracket notation must be used when assigning to a new column. This will break:

    +
    +
    +

    Dealing with Missing Data#

    +

    Pandas primarily uses the data type np.nan from NumPy to represent missing data.

    -
    df.'x_plus_y' = df.x + df.y
    -
    -
    -
    -
    -
      Cell In[153], line 1
    -    df.'x_plus_y' = df.x + df.y
    -       ^
    -SyntaxError: invalid syntax
    +
    import numpy as np
     
    -
    -
    -

    Removing Columns#

    -
      -
    • Using the reserverd keyword del to drop a DataFrame or single columns from the dataframe:

    • -
    +
    +

    Replace missing values#

    +

    We can use fillna() to replace missing data to whatever value you like, e.g. \(0\)s.

    +

    Details

    +

    We can pass the results of an operation – for example to peform simple imputation, we can replace missing values in each column with the median value of the respective column:

    -
    # Now a particular observation
    -df_drop = df_drop.drop([0], axis=0)
    -df_drop
    +
    df_filled = df_miss.fillna(df_miss.median())
    +
    +
    +
    +
    +
    +
    +
    df_filled
     
    @@ -3187,21 +3871,25 @@

    Removing Columns + x y - 1 - 1 + 0 + 2.0 + 6.0 - obs3 - 0 + 1 + 1.5 + 6.0 - 3 - 0 + 2 + 1.0 + 6.0 @@ -3262,23 +3950,22 @@

    Removing ColumnsSummarizing data -
  • Selection and Indexing
  • diff --git a/chapters/module-4/Untitled.html b/chapters/module-4/Untitled.html index 66d0a21..c6e16b2 100644 --- a/chapters/module-4/Untitled.html +++ b/chapters/module-4/Untitled.html @@ -208,7 +208,8 @@
    diff --git a/genindex.html b/genindex.html index 1de5a9f..dc75e9f 100644 --- a/genindex.html +++ b/genindex.html @@ -208,6 +208,7 @@
  • NumPy (Part I)
  • NumPy (Part II)
  • Introduction to Pandas
  • +
  • Pandas: Data Exploration
  • diff --git a/index.html b/index.html index ff1cd20..d9885f1 100644 --- a/index.html +++ b/index.html @@ -58,6 +58,8 @@ + + @@ -210,6 +212,7 @@
  • NumPy (Part I)
  • NumPy (Part II)
  • Introduction to Pandas
  • +
  • Pandas: Data Exploration
  • diff --git a/objects.inv b/objects.inv index 8747087..6939f6f 100644 Binary files a/objects.inv and b/objects.inv differ diff --git a/reports/chapters/module-4/044-PandasII-Exploration_and_Manipulation.err.log b/reports/chapters/module-4/044-PandasII-Exploration_and_Manipulation-Copy1.err.log similarity index 70% rename from reports/chapters/module-4/044-PandasII-Exploration_and_Manipulation.err.log rename to reports/chapters/module-4/044-PandasII-Exploration_and_Manipulation-Copy1.err.log index 7edfc45..b0f92a5 100644 --- a/reports/chapters/module-4/044-PandasII-Exploration_and_Manipulation.err.log +++ b/reports/chapters/module-4/044-PandasII-Exploration_and_Manipulation-Copy1.err.log @@ -13,14 +13,13 @@ Traceback (most recent call last): raise CellExecutionError.from_cell_and_msg(cell, exec_reply_content) nbclient.exceptions.CellExecutionError: An error occurred while executing the following cell: ------------------ -df['y'], type(df['y']) +iris_df.'sepal_volume_3' = iris_df.sepal_length + iris_df.sepal_width ------------------ ---------------------------------------------------------------------------- -NameError Traceback (most recent call last) -Cell In[45], line 1 -----> 1 df['y'], type(df['y']) + Cell In[28], line 1 + iris_df.'sepal_volume_3' = iris_df.sepal_length + iris_df.sepal_width + ^ +SyntaxError: invalid syntax -NameError: name 'df' is not defined diff --git a/reports/chapters/module-4/044-PandasII-exploration.err.log b/reports/chapters/module-4/044-PandasII-exploration.err.log new file mode 100644 index 0000000..15c88b3 --- /dev/null +++ b/reports/chapters/module-4/044-PandasII-exploration.err.log @@ -0,0 +1,99 @@ +Traceback (most recent call last): + File "/home/javi/anaconda3/lib/python3.11/site-packages/myst_nb/core/execute/inline.py", line 120, in code_cell_outputs + self._client.execute_cell( + File "/home/javi/anaconda3/lib/python3.11/site-packages/jupyter_core/utils/__init__.py", line 165, in wrapped + return loop.run_until_complete(inner) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/javi/anaconda3/lib/python3.11/asyncio/base_events.py", line 654, in run_until_complete + return future.result() + ^^^^^^^^^^^^^^^ + File "/home/javi/anaconda3/lib/python3.11/site-packages/nbclient/client.py", line 1058, in async_execute_cell + await self._check_raise_for_error(cell, cell_index, exec_reply) + File "/home/javi/anaconda3/lib/python3.11/site-packages/nbclient/client.py", line 914, in _check_raise_for_error + raise CellExecutionError.from_cell_and_msg(cell, exec_reply_content) +nbclient.exceptions.CellExecutionError: An error occurred while executing the following cell: +------------------ +# But this will give an error +iris_copy.loc[1:3,:] +------------------ + + +--------------------------------------------------------------------------- +TypeError Traceback (most recent call last) +Cell In[41], line 2 + 1 # But this will give an error +----> 2 iris_copy.loc[1:3,:] + +File ~/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1067, in _LocationIndexer.__getitem__(self, key) + 1065 if self._is_scalar_access(key): + 1066 return self.obj._get_value(*key, takeable=self._takeable) +-> 1067 return self._getitem_tuple(key) + 1068 else: + 1069 # we by definition only have the 0th axis + 1070 axis = self.axis or 0 + +File ~/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1256, in _LocIndexer._getitem_tuple(self, tup) + 1253 if self._multi_take_opportunity(tup): + 1254 return self._multi_take(tup) +-> 1256 return self._getitem_tuple_same_dim(tup) + +File ~/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:924, in _LocationIndexer._getitem_tuple_same_dim(self, tup) + 921 if com.is_null_slice(key): + 922 continue +--> 924 retval = getattr(retval, self.name)._getitem_axis(key, axis=i) + 925 # We should never have retval.ndim < self.ndim, as that should + 926 # be handled by the _getitem_lowerdim call above. + 927 assert retval.ndim == self.ndim + +File ~/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1290, in _LocIndexer._getitem_axis(self, key, axis) + 1288 if isinstance(key, slice): + 1289 self._validate_key(key, axis) +-> 1290 return self._get_slice_axis(key, axis=axis) + 1291 elif com.is_bool_indexer(key): + 1292 return self._getbool_axis(key, axis=axis) + +File ~/anaconda3/lib/python3.11/site-packages/pandas/core/indexing.py:1324, in _LocIndexer._get_slice_axis(self, slice_obj, axis) + 1321 return obj.copy(deep=False) + 1323 labels = obj._get_axis(axis) +-> 1324 indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step) + 1326 if isinstance(indexer, slice): + 1327 return self.obj._slice(indexer, axis=axis) + +File ~/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:6559, in Index.slice_indexer(self, start, end, step, kind) + 6516 """ + 6517 Compute the slice indexer for input labels and step. + 6518 + (...) + 6555 slice(1, 3, None) + 6556 """ + 6557 self._deprecated_arg(kind, "kind", "slice_indexer") +-> 6559 start_slice, end_slice = self.slice_locs(start, end, step=step) + 6561 # return a slice + 6562 if not is_scalar(start_slice): + +File ~/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:6767, in Index.slice_locs(self, start, end, step, kind) + 6765 start_slice = None + 6766 if start is not None: +-> 6767 start_slice = self.get_slice_bound(start, "left") + 6768 if start_slice is None: + 6769 start_slice = 0 + +File ~/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:6676, in Index.get_slice_bound(self, label, side, kind) + 6672 original_label = label + 6674 # For datetime indices label may be a string that has to be converted + 6675 # to datetime boundary according to its resolution. +-> 6676 label = self._maybe_cast_slice_bound(label, side) + 6678 # we need to look up the label + 6679 try: + +File ~/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:6623, in Index._maybe_cast_slice_bound(self, label, side, kind) + 6618 # We are a plain index here (sub-class override this method if they + 6619 # wish to have special treatment for floats/ints, e.g. Float64Index and + 6620 # datetimelike Indexes + 6621 # reject them, if index does not contain label + 6622 if (is_float(label) or is_integer(label)) and label not in self: +-> 6623 raise self._invalid_indexer("slice", label) + 6625 return label + +TypeError: cannot do slice indexing on Index with these indexers [1] of type int + diff --git a/reports/chapters/module-4/045-PandasIII-manipulation.err.log b/reports/chapters/module-4/045-PandasIII-manipulation.err.log new file mode 100644 index 0000000..5212c06 --- /dev/null +++ b/reports/chapters/module-4/045-PandasIII-manipulation.err.log @@ -0,0 +1,25 @@ +Traceback (most recent call last): + File "/home/javi/anaconda3/lib/python3.11/site-packages/myst_nb/core/execute/inline.py", line 120, in code_cell_outputs + self._client.execute_cell( + File "/home/javi/anaconda3/lib/python3.11/site-packages/jupyter_core/utils/__init__.py", line 165, in wrapped + return loop.run_until_complete(inner) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/javi/anaconda3/lib/python3.11/asyncio/base_events.py", line 654, in run_until_complete + return future.result() + ^^^^^^^^^^^^^^^ + File "/home/javi/anaconda3/lib/python3.11/site-packages/nbclient/client.py", line 1058, in async_execute_cell + await self._check_raise_for_error(cell, cell_index, exec_reply) + File "/home/javi/anaconda3/lib/python3.11/site-packages/nbclient/client.py", line 914, in _check_raise_for_error + raise CellExecutionError.from_cell_and_msg(cell, exec_reply_content) +nbclient.exceptions.CellExecutionError: An error occurred while executing the following cell: +------------------ +iris_df.'sepal_volume_3' = iris_df.sepal_length + iris_df.sepal_width +------------------ + + + Cell In[12], line 1 + iris_df.'sepal_volume_3' = iris_df.sepal_length + iris_df.sepal_width + ^ +SyntaxError: invalid syntax + + diff --git a/search.html b/search.html index 2a35b9b..2491979 100644 --- a/search.html +++ b/search.html @@ -210,6 +210,7 @@
  • NumPy (Part I)
  • NumPy (Part II)
  • Introduction to Pandas
  • +
  • Pandas: Data Exploration
  • diff --git a/searchindex.js b/searchindex.js index ba9bd6c..3484bcb 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"alltitles": {"": [[9, "Practice1"], [12, "Practice2"], [14, "variables1"], [14, "variables2"], [14, "variables3"], [15, "operators1"], [15, "operators2"], [16, "strings1"], [16, "strings2"], [17, "structures1"], [17, "structures2"], [17, "structures3"], [17, "structures4"], [17, "structures5"], [17, "structures6"], [19, "conditional1"], [19, "conditional2"], [19, "conditional3"], [21, "iterables1"], [21, "iterables2"], [23, "functions1"], [23, "functions2"], [26, "exceptions1"], [26, "exceptions2"], [28, "classes1"], [28, "classes2"], [29, "files1"], [29, "files2"], [31, "numpy1"], [31, "numpy2"], [31, "numpy3"], [31, "numpy4"], [32, "numpy6"], [32, "numpy7"], [32, "numpy8"], [33, "pandas1"], [33, "pandas2"]], ".dropna()": [[34, "dropna"]], ".fillna()": [[34, "fillna"]], ".sort_index()": [[34, "sort-index"]], "Adding a new entry": [[17, "adding-a-new-entry"]], "Adding finally and else blocks": [[26, "adding-finally-and-else-blocks"], [27, "adding-finally-and-else-blocks"]], "Addition": [[34, "addition"]], "An introduction to some attributes and methods": [[33, "an-introduction-to-some-attributes-and-methods"]], "Arguments": [[2, "arguments"]], "Arguments and parameters": [[23, "arguments-and-parameters"]], "Arithmetic Operators": [[15, "arithmetic-operators"]], "Attributes and methods": [[28, "attributes-and-methods"]], "Axis Labels": [[33, "axis-labels"]], "Basic Array Manipulations + Calculations": [[35, "basic-array-manipulations-calculations"]], "Basic NumPy Array Functionality": [[0, "basic-numpy-array-functionality"], [24, "basic-numpy-array-functionality"]], "Basic calculations": [[32, "basic-calculations"]], "Boolean Filtering": [[34, "boolean-filtering"]], "Boolean slicing": [[32, "boolean-slicing"]], "Brief Introduction to Modules and Packages": [[31, "brief-introduction-to-modules-and-packages"]], "Brief introduction to programming languages": [[10, "brief-introduction-to-programming-languages"]], "Built-in functions": [[23, "built-in-functions"]], "By Index": [[34, "by-index"]], "By label": [[34, "by-label"]], "CONCEPTS": [[35, "concepts"]], "Canvas": [[11, "canvas"]], "Cell menu": [[9, "cell-menu"]], "Classes and Objects": [[28, "classes-and-objects"]], "Column Selection": [[34, "column-selection"]], "Column selection, addition, deletion": [[34, "column-selection-addition-deletion"]], "Command mode": [[9, "command-mode"]], "Comparison Operators": [[3, "comparison-operators"], [15, "comparison-operators"]], "Compiled vs Interpreted languages": [[10, "compiled-vs-interpreted-languages"]], "Components": [[9, "components"]], "Comprehension": [[21, "comprehension"]], "Conditions": [[19, "conditions"]], "Constructing": [[17, "constructing"]], "Constructing a dictionary": [[17, "constructing-a-dictionary"]], "Constructing a list": [[17, "constructing-a-list"]], "Control Structures": [[19, "control-structures"]], "Converting Data Types": [[3, "converting-data-types"], [14, "converting-data-types"]], "Creating and calling a function": [[23, "creating-and-calling-a-function"]], "Creating ndarrays": [[31, "creating-ndarrays"]], "Data Frames": [[33, "data-frames"]], "Data Inspection": [[34, "data-inspection"]], "Data Structures": [[17, "data-structures"]], "Data Structures Exercises": [[18, "data-structures-exercises"]], "Data Types": [[3, "data-types"], [31, "data-types"], [35, "data-types"]], "Dealing with Missing Data": [[34, "dealing-with-missing-data"]], "Default Arguments": [[23, "default-arguments"]], "Defining a class": [[28, "defining-a-class"]], "Dictionaries": [[17, "dictionaries"], [21, "dictionaries"]], "Docstring": [[23, "docstring"]], "Edit mode": [[9, "edit-mode"]], "Errors and Exceptions": [[26, "errors-and-exceptions"], [27, "errors-and-exceptions"]], "Examples": [[21, "examples"], [21, "id2"]], "Exploring its structure": [[34, "exploring-its-structure"]], "Expressions": [[3, "expressions"], [15, "expressions"]], "Fancy Indexing": [[32, "fancy-indexing"]], "File Modes": [[29, "file-modes"]], "Functions": [[23, "functions"]], "General Theory": [[21, "general-theory"]], "Getting started": [[1, "getting-started"]], "Guidelines when passing arguments:": [[23, "guidelines-when-passing-arguments"]], "Handling runtime errors with try and except": [[26, "handling-runtime-errors-with-try-and-except"], [27, "handling-runtime-errors-with-try-and-except"]], "How You Will Know You Are Learning": [[8, "how-you-will-know-you-are-learning"]], "How to create a dataframe": [[33, "how-to-create-a-dataframe"]], "How to create a series": [[33, "how-to-create-a-series"]], "How will you succeed in this course?": [[8, "how-will-you-succeed-in-this-course"]], "Identity Operators": [[15, "identity-operators"]], "Images are Numerical Data": [[0, "images-are-numerical-data"], [24, "images-are-numerical-data"]], "Immutability": [[16, "immutability"]], "Import Aliases": [[0, "import-aliases"], [24, "import-aliases"], [31, "import-aliases"]], "Importing": [[0, "importing"], [24, "importing"], [31, "importing"]], "Importing pandas": [[33, "importing-pandas"]], "Indentation": [[19, "indentation"]], "Indexing": [[2, "indexing"], [16, "indexing"], [17, "indexing"]], "Indexing and Slicing": [[32, "indexing-and-slicing"]], "Inheritance": [[28, "inheritance"]], "Initialize classes": [[28, "initialize-classes"]], "Inserting + Dropping Array Values": [[32, "inserting-dropping-array-values"], [35, "inserting-dropping-array-values"]], "Installing": [[0, "installing"], [24, "installing"], [31, "installing"]], "Installing & Importing Packages": [[0, "installing-importing-packages"], [24, "installing-importing-packages"]], "Introduction": [[8, "introduction"], [9, "introduction"], [19, "introduction"], [21, "introduction"], [21, "id1"], [23, "introduction"], [26, "introduction"], [27, "introduction"], [28, "introduction"], [29, "introduction"]], "Introduction to Pandas": [[33, "introduction-to-pandas"]], "Introduction to object-oriented programming (OOP)": [[28, "introduction-to-object-oriented-programming-oop"]], "Iterables": [[21, "iterables"]], "Iterables and Iterators": [[21, "iterables-and-iterators"]], "Iterators": [[21, "iterators"]], "Jupyter Notebooks": [[9, "jupyter-notebooks"]], "JupyterLab": [[11, "jupyterlab"]], "Keyboard Navigation": [[9, "keyboard-navigation"]], "Learning Objectives": [[8, "learning-objectives"]], "Lists": [[17, "lists"], [21, "lists"]], "Local versus Global Variables": [[23, "local-versus-global-variables"]], "Logical Operators": [[3, "logical-operators"], [15, "logical-operators"]], "Loops": [[19, "loops"]], "Masking": [[34, "masking"]], "Metadata": [[2, "metadata"], [3, "metadata"]], "Modal editor": [[9, "modal-editor"]], "More Resources": [[9, "more-resources"]], "More useful calculations": [[32, "more-useful-calculations"]], "Mouse navigation": [[9, "mouse-navigation"]], "Mutability": [[17, "mutability"]], "Nested Loops": [[21, "nested-loops"]], "Notebook Basics": [[9, "notebook-basics"]], "Notebook documents": [[9, "notebook-documents"]], "Notes": [[17, "notes"]], "NumPy (Part I)": [[31, "numpy-part-i"]], "NumPy (Part II)": [[32, "numpy-part-ii"]], "Numeric Operators": [[3, "numeric-operators"]], "OBJECTIVES": [[35, "objectives"]], "Object ID": [[3, "object-id"]], "Open OnDemand": [[11, "open-ondemand"]], "Opening a File": [[29, "opening-a-file"]], "Operations on lists": [[17, "operations-on-lists"]], "Operator in: check membership": [[16, "operator-in-check-membership"]], "Operators": [[3, "operators"], [15, "operators"]], "Operators and Expressions": [[15, "operators-and-expressions"]], "PREREQUISITES": [[35, "prerequisites"]], "Packing": [[23, "packing"]], "Packing and Unpacking arguments": [[23, "packing-and-unpacking-arguments"]], "PandasII: Exploration and Manipulation": [[34, "pandasii-exploration-and-manipulation"]], "Practice": [[7, "practice"]], "Practice excersises": [[26, "practice-excersises"], [27, "practice-excersises"], [28, "practice-excersises"], [29, "practice-excersises"]], "Practice exercise": [[17, "practice-exercise"], [17, "id1"], [17, "id2"], [17, "id3"], [17, "id4"], [17, "id6"]], "Practice exercises": [[14, "practice-exercises"], [15, "practice-exercises"], [16, "practice-exercises"], [19, "practice-exercises"], [21, "practice-exercises"], [23, "practice-exercises"], [31, "practice-exercises"], [32, "practice-exercises"], [33, "practice-exercises"]], "Programming and Data Science": [[10, "programming-and-data-science"]], "Programming paradigms": [[10, "programming-paradigms"]], "Properties overview": [[33, "properties-overview"]], "Python (Beginner)": [[13, "python-beginner"]], "Python (Intermediate)": [[25, "python-intermediate"]], "Raising exceptions": [[26, "raising-exceptions"], [27, "raising-exceptions"]], "Ranges": [[17, "ranges"], [21, "ranges"]], "Reading and Writing Files": [[29, "reading-and-writing-files"]], "Reading from a File": [[29, "reading-from-a-file"]], "Removing Columns": [[34, "removing-columns"]], "Reserved names (keywords)": [[14, "reserved-names-keywords"]], "Restarting the kernels": [[9, "restarting-the-kernels"]], "Retrieve a value": [[17, "retrieve-a-value"]], "Returning Values": [[23, "returning-values"]], "Running Code (edit mode)": [[9, "running-code-edit-mode"]], "SOURCES": [[35, "sources"]], "Selection": [[34, "selection"]], "Selection and Indexing": [[34, "selection-and-indexing"]], "Series": [[33, "series"]], "Sets": [[17, "sets"], [21, "sets"]], "Slicing": [[16, "slicing"], [17, "slicing"], [35, "slicing"]], "Some best practices": [[26, "some-best-practices"], [27, "some-best-practices"]], "Some methods": [[17, "some-methods"], [17, "id5"]], "Some useful built-in functions with loops": [[19, "some-useful-built-in-functions-with-loops"]], "Some useful methods": [[17, "some-useful-methods"]], "Sorting and Ranking": [[34, "sorting-and-ranking"]], "String Formatting": [[16, "string-formatting"]], "String Methods": [[16, "string-methods"]], "String Operators": [[3, "string-operators"], [16, "string-operators"]], "Strings": [[16, "strings"], [21, "strings"]], "Subsetting a string": [[16, "subsetting-a-string"]], "Summarizing data": [[34, "summarizing-data"]], "Summary": [[17, "summary"]], "Tech Stack": [[11, "tech-stack"]], "The ndarray object": [[31, "the-ndarray-object"]], "Tips for creating good functions": [[23, "tips-for-creating-good-functions"]], "Tuples": [[17, "tuples"], [21, "tuples"]], "Unary Operators": [[3, "unary-operators"], [15, "unary-operators"]], "Unpacking": [[23, "unpacking"]], "Using multiple conditions": [[19, "using-multiple-conditions"]], "Variable Names": [[3, "variable-names"]], "Variable Scope": [[23, "variable-scope"]], "Variable naming": [[14, "variable-naming"]], "Variable types": [[14, "variable-types"]], "Variables": [[2, "variables"], [3, "variables"]], "Variables and data types": [[14, "variables-and-data-types"]], "Very common attributes and methods with numpy arrays objects": [[31, "very-common-attributes-and-methods-with-numpy-arrays-objects"]], "Welcome to DS-1002": [[37, "welcome-to-ds-1002"]], "What is NumPy": [[31, "what-is-numpy"]], "What is Pandas?": [[33, "what-is-pandas"]], "What is a string?": [[16, "what-is-a-string"]], "What is a variable?": [[14, "what-is-a-variable"]], "What is self?": [[28, "what-is-self"]], "Writing to a File": [[29, "writing-to-a-file"]], "Your first Python program!": [[12, "your-first-python-program"]], "break - exit the loop": [[19, "break-exit-the-loop"]], "continue - stop the current iteration": [[19, "continue-stop-the-current-iteration"]], "enumerate()": [[19, "enumerate"]], "for loop": [[19, "for-loop"]], "if and else can be used for conditional processing.": [[19, "if-and-else-can-be-used-for-conditional-processing"]], "using if, elif": [[19, "using-if-elif"]], "using if, elif, else": [[19, "using-if-elif-else"]], "while-loop": [[19, "while-loop"]], "writing if and else as one-liners": [[19, "writing-if-and-else-as-one-liners"]], "zip()": [[19, "zip"]]}, "docnames": ["06_numpy_intro", "chapters/01-getting_started", "chapters/02-python-basics", "chapters/04-python-basics", "chapters/module-1/012-intro_python", "chapters/module-1/012-intro_python (copia)", "chapters/module-1/013-intro_R", "chapters/module-1/Practice", "chapters/module-1/about_course", "chapters/module-1/jupyter_notebooks", "chapters/module-1/programming", "chapters/module-1/tech_stack", "chapters/module-1/your_first_program", "chapters/module-2/02-cover", "chapters/module-2/021-variables", "chapters/module-2/022-operators", "chapters/module-2/023-strings", "chapters/module-2/024-structures", "chapters/module-2/0241-structures_exercises", "chapters/module-2/025-conditional", "chapters/module-2/0251-conditional_exercises", "chapters/module-2/026-iterables_and_iterators", "chapters/module-2/0261-functions_exercises", "chapters/module-2/027-functions", "chapters/module-3/029-packages", "chapters/module-3/03-cover", "chapters/module-3/031-errors_and_exceptions", "chapters/module-3/031-errors_and_exceptions_w_sols", "chapters/module-3/032-classes", "chapters/module-3/033-reading_writing_files", "chapters/module-3/lab-recursion", "chapters/module-4/041-numpyI", "chapters/module-4/042-numpyII", "chapters/module-4/043-PandasI-Introduction", "chapters/module-4/044-PandasII-Exploration_and_Manipulation", "chapters/module-4/07-numpy-continued", "chapters/module-4/Untitled", "index"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1}, "filenames": ["06_numpy_intro.ipynb", "chapters/01-getting_started.md", "chapters/02-python-basics.ipynb", "chapters/04-python-basics.ipynb", "chapters/module-1/012-intro_python.md", "chapters/module-1/012-intro_python (copia).md", "chapters/module-1/013-intro_R.md", "chapters/module-1/Practice.ipynb", "chapters/module-1/about_course.md", "chapters/module-1/jupyter_notebooks.ipynb", "chapters/module-1/programming.ipynb", "chapters/module-1/tech_stack.md", "chapters/module-1/your_first_program.ipynb", "chapters/module-2/02-cover.md", "chapters/module-2/021-variables.ipynb", "chapters/module-2/022-operators.ipynb", "chapters/module-2/023-strings.ipynb", "chapters/module-2/024-structures.ipynb", "chapters/module-2/0241-structures_exercises.ipynb", "chapters/module-2/025-conditional.ipynb", "chapters/module-2/0251-conditional_exercises.ipynb", "chapters/module-2/026-iterables_and_iterators.ipynb", "chapters/module-2/0261-functions_exercises.ipynb", "chapters/module-2/027-functions.ipynb", "chapters/module-3/029-packages.ipynb", "chapters/module-3/03-cover.md", "chapters/module-3/031-errors_and_exceptions.ipynb", "chapters/module-3/031-errors_and_exceptions_w_sols.ipynb", "chapters/module-3/032-classes.ipynb", "chapters/module-3/033-reading_writing_files.ipynb", "chapters/module-3/lab-recursion.ipynb", "chapters/module-4/041-numpyI.ipynb", "chapters/module-4/042-numpyII.ipynb", "chapters/module-4/043-PandasI-Introduction.ipynb", "chapters/module-4/044-PandasII-Exploration_and_Manipulation.ipynb", "chapters/module-4/07-numpy-continued.ipynb", "chapters/module-4/Untitled.ipynb", "index.md"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"": [0, 3, 9, 10, 11, 12, 14, 15, 17, 19, 21, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 35], "0": [0, 2, 3, 10, 12, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 26, 27, 30, 31, 32, 33, 34, 35], "00": [32, 33, 34], "00000": 34, "000000": 34, "00855369e": 32, "00865612": 31, "00950034": 31, "01": 32, "02": 32, "02121473": 32, "02685344": 32, "03": 32, "03255303": 31, "03428793e": 32, "05670664": 32, "057333": 34, "09663316e": 32, "0b100101": 23, "0b1101": 23, "0x1": 14, "0x7f44905c2350": [], "0x7f44ca0d9310": [], "0x7f6ddd91ff30": 32, "0x7f6ddd948f90": 32, "0x7fc3ff13fe90": 33, "0x7fc438c8a2d0": 33, "0x87f408": 23, "0x87f448": 23, "0x87f508": 14, "0x87f648": 14, "1": [0, 2, 3, 9, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37], "10": [0, 3, 9, 12, 14, 15, 17, 19, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 35], "100": [0, 3, 10, 12, 14, 15, 20, 24, 32], "100000": 34, "1002": [0, 2, 3, 12, 23, 24], "101": 33, "102": 33, "103": 33, "104": 33, "105": 34, "106": 33, "11": [0, 2, 16, 17, 23, 24, 28, 32, 33, 35], "110": 32, "111": [0, 24], "112": [0, 24, 33], "11246125": 32, "113": [0, 24, 33], "115": [0, 24], "115mmhg": 21, "117": [0, 24, 34], "11757": 34, "117570": 34, "118": [0, 24, 34], "1198364157": 32, "12": [14, 21, 28, 30, 32, 35], "120": [21, 32], "122": 34, "123": 17, "1234": [23, 29], "12345": 32, "123456789": 29, "1244706300354": 15, "125": [3, 14, 31, 33], "1271": 33, "1299": 33, "13": [0, 2, 20, 23, 24, 30, 31, 32, 34], "130": [3, 14, 32], "131": [33, 34], "132": 33, "133": 33, "135": [33, 34], "13559351": 32, "139919121178064": 14, "139919121182192": 14, "14": [0, 3, 14, 24, 32], "140": [32, 34], "141": [33, 34], "14112001": [32, 35], "1416": 14, "142": 34, "143": 34, "144": 34, "145": [33, 34], "14550003": 32, "1457317": 31, "146": [33, 34], "147": [33, 34], "148": [33, 34], "149": [33, 34], "15": [0, 10, 15, 21, 23, 24, 32, 33, 35], "150": [32, 33, 34], "151": 33, "153": 34, "156": 33, "157": 33, "16": [0, 19, 23, 24, 30, 31, 32], "160": 32, "162": 33, "16276516": 31, "166": 33, "170": 32, "176": 16, "178": 33, "18": 2, "180": 32, "18177089": 32, "183": 33, "185": 33, "19": [3, 32, 33], "190": 32, "192": 33, "1923875335537315": 31, "1934569051": 34, "198": 33, "199333": 34, "1d": [31, 32], "2": [0, 2, 3, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37], "20": [0, 3, 10, 14, 15, 21, 23, 24, 31, 32, 33, 35], "200": [0, 24, 32, 33], "201": 33, "20173122": 31, "2018": 9, "2019": [0, 24], "202": [0, 24], "2021": [0, 24], "2022": 10, "203": [0, 24], "205": [0, 24], "206": 33, "21": [0, 23, 24, 30, 33], "21537611": 32, "217": 29, "22": [0, 3, 15, 24, 33, 34], "220": 33, "223": 17, "225": 33, "22615495": 31, "22633085": 31, "23": [17, 33], "23606798": 32, "23998849": 32, "24": [0, 24, 30, 33], "244": [0, 24], "246": [0, 24], "247": [0, 24], "249": [0, 24], "25": [3, 10, 12, 14, 15, 17, 26, 27, 31, 32, 33, 34, 35], "250": [0, 24, 33], "255": [0, 24], "256": [3, 14, 23], "25715276": 32, "26": [0, 16, 17, 23, 24], "26154948": 31, "26507934": 32, "27": [17, 28, 33], "278": [3, 14], "2794155": [32, 35], "28": [17, 33], "28366219": 32, "29": [17, 34], "2905": 33, "29128784747792": 32, "29718677": 31, "29738587": 32, "2d": [31, 32, 35], "3": [0, 2, 3, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37], "30": [0, 17, 22, 23, 24, 32, 33, 34, 35], "300": [0, 15, 24], "300000": 34, "30776945": 31, "31": [0, 24], "31871048": 31, "32": [0, 24, 30, 33], "33": [0, 19, 24], "3301427": 32, "332": 17, "333333": 34, "34": [0, 24, 26, 27], "34562291": 32, "34743931": 32, "34822322": 32, "35": [0, 3, 14, 15, 24, 32, 34, 35], "350000": 34, "356": [0, 24], "357": [0, 24], "359": [0, 24], "36": [0, 24, 33], "360": [0, 24], "362": [0, 24], "366126": 34, "37": [16, 23], "375": 31, "37512356": 31, "37546207": 32, "38": [33, 34], "38456337": 31, "38672696": 31, "38905610e": 32, "38917398": 32, "39639181": 32, "39924804": 31, "3a": 20, "3b": 20, "3d": [31, 32, 35], "3foo": 14, "4": [0, 3, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37], "40": [0, 17, 21, 24, 32, 33, 35], "400": [0, 24], "400000": 34, "40320": 30, "404": [0, 24], "405": [0, 24], "406": [0, 24], "407": [0, 24], "408": [0, 24], "409": [0, 24], "41": [33, 34], "410": [0, 24], "41211849": 35, "41421356": 32, "41614684": 32, "42": [3, 31, 32, 33, 34, 35], "42263309": 31, "42844": 34, "428440": 34, "43": [3, 33], "435866": 34, "437": 33, "438": 33, "439": 33, "440": 33, "441": 33, "442": 33, "4427": 33, "44948974": 32, "45": [23, 32, 34, 35, 36], "4523099": 31, "45391605": 32, "456": 17, "45981500e": 32, "46448508": 31, "4685006": 31, "47": [33, 34], "48": 33, "48115035": 32, "48413159e": 32, "49401501": 31, "49876311": 31, "4d": 31, "5": [0, 2, 3, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37], "50": [0, 17, 24, 32, 33, 34, 35], "500": [0, 24, 32, 35], "500000": 34, "50449883": 32, "51": [0, 24], "51174724": 32, "512": 32, "51627652": 31, "52": [0, 24, 33], "53": [0, 17, 23, 24], "54030231": 32, "54402111": 35, "54999924": 32, "55": [0, 17, 24, 32], "55000000074505806": 32, "55350267": 32, "56": [0, 24], "57": 33, "57286585": 31, "5761378385558922": 31, "59": [17, 33], "59213658": 31, "5951": 33, "5dl": 21, "5mg": 21, "6": [0, 3, 9, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 30, 31, 32, 33, 34, 35, 37], "60": [23, 32, 33], "600000": 34, "60086561": 31, "61480613": 32, "625": 31, "62547267": 31, "64": [22, 23, 26, 27, 30, 32], "64575131": 32, "65": 32, "65364362": 32, "6569866": [32, 35], "67": [14, 33], "6728": 33, "68": 32, "68456316": 31, "6888893": 31, "69": 33, "7": [0, 17, 19, 21, 23, 24, 26, 27, 31, 32, 33, 34, 35], "70": [32, 33], "71": 33, "71828183e": 32, "72": [32, 33], "7225608": 31, "72865848": 31, "73205081": 32, "73572066": 31, "74": 32, "75": [15, 31, 32, 33, 34], "75390225": 32, "7568025": [32, 35], "75724747": 32, "758000": 34, "762238": 34, "765298": 34, "76912674": 31, "77": 33, "777": 17, "78": 32, "78096262": 31, "79": 33, "8": [0, 2, 3, 14, 15, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36], "80": [17, 32, 33, 34], "800": 28, "8000": 31, "800000": 34, "81": 32, "817941": 34, "81814867": 31, "82": 32, "82630603": 32, "828066": 34, "82842712": 32, "84": 33, "84057254": 31, "84147098": [32, 35], "84327582": 31, "843333": 34, "84526617": 31, "8456337": 31, "85": [32, 33], "8598": 33, "86": 32, "87": [32, 33], "871754": 34, "8722813232690143": 35, "875": 31, "88": 32, "89": [32, 33], "8903": 33, "89086505": 31, "8909800": 14, "89121924": 31, "8918": 33, "897": 17, "9": [0, 2, 3, 14, 15, 17, 20, 22, 24, 26, 27, 31, 32, 33, 34, 35], "90": 32, "900000": 34, "90929743": [32, 35], "91": [23, 32], "912": 23, "92": [32, 33], "93": 33, "94": 32, "95": [32, 33], "95892427": [32, 35], "96017029": 32, "962865": 34, "96348811": 32, "97": 33, "98095799e": 32, "9836": 33, "98935825": [32, 35], "9899925": 32, "99": [17, 33], "99217197": 32, "99394529": 31, "A": [2, 3, 9, 10, 12, 14, 15, 17, 23, 28, 29, 30, 31, 32, 33, 35], "And": [12, 16, 23, 28, 32, 34], "As": [8, 9, 10, 12, 16, 21, 26, 27, 28, 33, 34, 37], "At": 23, "Be": [19, 21, 23, 29], "But": [8, 14, 17, 19, 23, 26, 27, 28, 31, 32, 34], "By": [8, 14, 31, 32, 33], "FOR": 19, "For": [2, 3, 8, 9, 10, 12, 14, 15, 17, 18, 19, 21, 23, 28, 29, 30, 31, 32, 33], "IF": 19, "IN": 28, "If": [3, 8, 9, 10, 15, 16, 17, 19, 20, 23, 26, 27, 28, 29, 30, 31, 32, 35], "In": [0, 2, 3, 9, 10, 11, 12, 14, 15, 16, 17, 19, 21, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36], "It": [0, 2, 10, 16, 17, 19, 23, 24, 26, 27, 28, 31, 32, 33, 34], "Its": 33, "NO": 28, "NOT": 17, "No": [0, 3, 17, 24], "On": 10, "One": [16, 23, 35], "Or": [2, 14, 23, 26, 27, 31], "THE": 8, "That": [17, 28, 32], "The": [3, 8, 9, 10, 11, 12, 14, 15, 16, 17, 19, 21, 23, 26, 27, 28, 29, 30, 32, 33, 34, 35, 37], "Their": [11, 21], "Then": [7, 9, 12, 15, 23, 28, 29], "There": [3, 9, 14, 15, 21, 26, 27, 29, 32, 35], "These": [8, 9, 10, 14, 17, 23, 26, 27, 28, 29, 31, 33], "To": [0, 10, 16, 17, 19, 21, 23, 24, 28, 29, 31, 32, 33], "Will": 32, "With": [9, 23, 28, 30, 32], "_": [2, 3, 14, 23, 33], "__abs__": 23, "__add__": 23, "__and__": 23, "__array_function__": 31, "__bool__": 23, "__ceil__": 23, "__divmod__": 23, "__doc__": [23, 33], "__eq__": 23, "__float__": 23, "__floor__": 23, "__floordiv__": 23, "__format__": 23, "__ge__": 23, "__getattribute__": 23, "__getnewargs__": 23, "__gt__": 23, "__hash__": 23, "__index__": 23, "__init__": [0, 24, 28, 31], "__int__": 23, "__invert__": 23, "__le__": 23, "__lshift__": 23, "__lt__": 23, "__main__": [23, 28], "__mod__": 23, "__mul__": 23, "__ne__": 23, "__neg__": 23, "__new__": 23, "__or__": 23, "__pos__": 23, "__pow__": 23, "__radd__": 23, "__rand__": 23, "__rdivmod__": 23, "__repr__": 23, "__rfloordiv__": 23, "__rlshift__": 23, "__rmod__": 23, "__rmul__": 23, "__ror__": 23, "__round__": 23, "__rpow__": 23, "__rrshift__": 23, "__rshift__": 23, "__rsub__": 23, "__rtruediv__": 23, "__rxor__": 23, "__setattr__": 28, "__sizeof__": 23, "__sub__": 23, "__truediv__": 23, "__trunc__": 23, "__xor__": 23, "_foo": 14, "_io": [0, 24, 29], "_parse_uri": [0, 24], "_plugin": [0, 24], "a_arrai": [0, 24], "a_list": [0, 24], "ab": 23, "abil": [28, 34], "abl": 9, "about": [8, 10, 23, 28, 29, 33], "abov": [0, 7, 8, 9, 17, 18, 19, 20, 22, 23, 24, 31, 32], "absenc": 17, "absolut": 23, "absolute_valu": 23, "academi": 29, "academia": 10, "accept": [3, 15, 23, 29, 33], "access": [10, 11, 14, 16, 17, 19, 23, 28, 32, 33, 34], "access_mod": 29, "accomod": 10, "accomplish": 31, "accumul": 32, "accur": [23, 32], "achiev": [17, 30], "across": 31, "act": [15, 28, 33], "action": [9, 19, 23, 26, 27, 28], "activ": [8, 9, 29], "actual": [19, 23, 28], "acycl": [32, 35], "ad": [0, 24, 28, 29], "adapt": 12, "add": [2, 15, 17, 18, 19, 20, 23, 28, 29, 33], "add_u": 2, "addit": [3, 8, 12, 14, 15, 23, 26, 27, 28, 31, 32, 33], "addition": [8, 31, 37], "address": [10, 14, 17, 23], "adjac": 29, "administr": 10, "adopt": 29, "advanc": [19, 23, 28, 37], "advantag": [28, 31], "aei": 16, "affect": [23, 28, 32], "aforement": [9, 14], "after": [15, 16, 17, 19, 23, 26, 27, 28, 29, 31], "afton": 11, "ag": [2, 3, 26, 27, 28, 33], "again": [3, 23, 28, 29, 30, 32, 34], "against": [23, 32], "age_data": [26, 27], "agre": 29, "aim": [14, 23, 28], "aka": 12, "alert": 17, "algebra": 31, "algorithm": [3, 28, 31], "alia": [0, 24, 31, 33], "alic": 33, "all": [0, 2, 8, 9, 10, 14, 16, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 35], "allevi": 32, "allow": [0, 2, 3, 9, 10, 11, 14, 15, 16, 17, 21, 23, 24, 28, 29, 31, 33], "almost": [10, 19, 21], "along": [8, 10, 14, 32, 33], "alpha": [2, 3], "alreadi": [0, 16, 23, 24, 28, 29], "also": [0, 3, 8, 9, 10, 12, 14, 15, 16, 17, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37], "alt": 9, "altern": [10, 21, 29, 31, 32, 33], "although": [10, 16, 31], "alwai": [3, 14, 17, 23, 26, 27, 31, 32], "am": [12, 16, 21, 28], "ambigu": 31, "among": [0, 10, 24, 31], "amount": 29, "an": [0, 2, 3, 8, 9, 10, 12, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 37], "anaconda3": [0, 24], "analys": 10, "analysi": [10, 11, 29, 31, 33], "analyt": [29, 33], "angel": 33, "ani": [0, 2, 7, 9, 10, 14, 16, 17, 19, 21, 23, 24, 27, 28, 29, 31, 32, 33, 34], "anim": 28, "animal_firulai": 28, "animal_kenni": 28, "annoi": 19, "annot": 23, "anonym": 8, "anoth": [9, 10, 14, 17, 19, 21, 23, 28, 30, 31, 33, 35], "another_anim": 28, "answer": [8, 14, 15, 16, 17, 18, 19, 21, 23, 26, 28, 29, 31, 32, 33], "anybodi": 19, "anyon": 23, "anyth": [8, 12, 23, 28], "anywher": [3, 15], "apostroph": 16, "appeal": 10, "appear": [10, 17, 31], "append": [17, 21, 23, 28, 29], "appl": [2, 3, 19], "apple_index": 2, "appli": [0, 9, 10, 16, 17, 18, 24, 31, 32, 34], "applic": [9, 10, 11, 33], "approach": [8, 10, 21, 23, 33], "appropri": [3, 15, 26, 27], "approxim": 31, "ar": [2, 3, 9, 10, 11, 12, 14, 15, 16, 17, 19, 21, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], "arang": [31, 32, 35], "arbitrari": [19, 21, 23], "area": [9, 10, 29], "areascomput": 29, "arg": [0, 22, 23, 24], "arg1": 12, "arg2": 12, "arg3": 12, "arg_expansion_exampl": 23, "argu": 10, "argument": [12, 14, 17, 22, 27, 29, 31, 32, 34, 35], "argunemnt": 17, "aris": 28, "arithmet": [3, 12, 17, 26, 27, 32], "around": [8, 17], "arr": [9, 32, 35], "arr1": [31, 35], "arr1d": 32, "arr2": [31, 32, 35], "arr2_concat": 32, "arr2_copi": 32, "arr2_flip": 32, "arr2_float64": 31, "arr2_int32": 31, "arr2_max": 32, "arr2_mean": 32, "arr2_min": 32, "arr2_std": 32, "arr2d": [32, 35], "arr3": 31, "arr3d": [32, 35], "arr_exec1": 32, "arr_exec2": 32, "arr_slic": 32, "arrai": [14, 23, 33, 34], "array_lik": [31, 32], "arthimet": 20, "arthur": 17, "artifici": 10, "as_grai": [0, 24], "as_integer_ratio": 23, "asarrai": [0, 24], "ascend": 34, "ask": [8, 12, 34], "ask_and_decid": 28, "aspect": 16, "assert": [3, 14], "assign": [3, 14, 15, 16, 17, 23, 28, 31, 32, 33, 34], "assist": 8, "associ": [3, 8, 15, 17, 23, 32], "assum": [3, 31, 32], "assumpt": 8, "asterisk": 23, "astyp": [31, 35], "async": [3, 14], "athlet": 23, "attempt": [8, 26, 27, 32], "attent": [26, 27], "attribut": [16, 17, 18, 23, 34], "attributeerror": 28, "auc": 21, "author": [9, 34], "autom": 10, "automat": [3, 14, 29, 33], "avail": [9, 23, 28, 31], "averag": 32, "avoid": [10, 26, 27, 32], "await": [3, 14], "awar": 23, "ax": [0, 24, 31, 32, 33], "axi": [10, 32, 34, 35], "ayotnom": 16, "b": [3, 9, 14, 15, 17, 23, 26, 27, 28, 30, 31, 32, 33], "b_arrai": [0, 24], "b_list": [0, 24], "back": [9, 12, 16, 17, 31, 32, 35], "background": [8, 9], "bad": [26, 27], "banana": [2, 3, 19], "bar": [17, 23, 36], "bari": 19, "bark": 28, "barplot": 10, "base": [0, 8, 10, 16, 21, 24, 30, 34, 37], "bash": [10, 19], "basic": [2, 3, 8, 15, 28, 31, 37], "baz": 17, "beat": 10, "becaus": [10, 21, 23, 26, 27, 28, 31, 32], "becom": [8, 9, 19, 28], "been": [9, 10, 17, 19, 26, 27, 28, 31], "befor": [7, 8, 9, 10, 17, 23, 26, 27, 28, 29, 31, 32, 33, 35], "beforehand": 12, "begin": [16, 23, 29], "beginn": 10, "behav": [21, 28, 29, 33], "behavior": [19, 23, 28, 30, 32, 33], "being": [3, 10, 19, 20, 32], "believ": [10, 19], "bell": 21, "belong": [19, 23, 28, 29, 32], "below": [2, 7, 9, 17, 20, 23, 28, 31, 32, 33, 34], "berlin": 29, "best": [8, 23, 28, 29], "best_citi": 29, "better": [8, 10, 26, 27, 31], "betwe": 3, "between": [3, 12, 14, 16, 23, 31, 33, 34], "bewar": [9, 19], "beyond": [10, 29, 33], "big": [17, 23], "bigger": 34, "bilbao": [19, 29], "bill": 9, "billi": 17, "bin": 23, "binari": [10, 23], "birth": [17, 18], "bit": [23, 28, 29, 32, 35], "bit_count": 23, "bit_length": 23, "black": 28, "blank": 16, "blink": 9, "block": [3, 15, 19, 23, 28, 29], "blog": 9, "blue": [9, 28], "blueberri": 19, "bmi": 33, "bob": [17, 32, 33], "bodi": 23, "boo": 33, "book": [0, 24, 37], "bool": [3, 14, 22, 23, 32, 33, 34], "bool_": 31, "bool_var": [3, 14], "boolean": [2, 3, 14, 15, 16, 17, 18, 23], "boolean_arrai": 31, "border": 9, "borderand": 9, "both": [0, 8, 9, 10, 11, 14, 16, 17, 19, 20, 23, 24, 26, 27, 28, 29, 30, 32, 33, 34], "bottom": [32, 35], "bow": 28, "box": 28, "boxplot": 9, "bp": 33, "brace": [16, 17, 19], "bracket": [16, 17, 34], "break": [3, 14, 16, 17, 23, 28, 29, 33, 34], "breakthrough": 8, "brief": [8, 37], "briefli": 10, "broad": 29, "broadcast": 32, "broken": 30, "browser": 11, "bucket": 19, "budapest": 29, "buddi": 28, "buffer": 23, "build": [3, 15, 19, 21, 28], "built": [0, 14, 24, 28, 31, 32, 34], "builtin": 23, "bulk": 23, "burden": 8, "button": [3, 9, 16], "byte": [0, 23, 24, 33], "bytearrai": 23, "byteord": 23, "bytes_": 31, "c": [9, 10, 17, 19, 23, 31, 32, 33], "calcul": [12, 15, 19], "call": [0, 2, 3, 8, 12, 14, 16, 17, 19, 21, 22, 24, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36], "call_plugin": [0, 24], "camel": 14, "camelcas": 28, "can": [2, 3, 8, 9, 10, 11, 12, 14, 15, 16, 17, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], "cannot": [2, 3, 16, 23, 26, 27], "canva": 8, "capabl": [8, 21, 33], "capit": 14, "captur": 8, "care": [19, 21], "carefulli": 19, "case": [2, 3, 12, 14, 15, 16, 20, 21, 23, 26, 27, 29, 30, 31, 32], "cast": [3, 14, 31, 32], "catch": [26, 27], "catchal": 19, "categor": [10, 34], "categori": 10, "caus": [19, 23, 26, 27, 32], "cautiou": [23, 29], "ceil": 23, "cell": [0, 2, 3, 7, 12, 14, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 34, 36], "central": 34, "certain": [12, 14, 17, 23, 26, 27, 28], "challeng": [8, 33], "chang": [7, 9, 10, 17, 18, 19, 23, 28, 32, 33], "channel": [0, 24], "chapter": 24, "charact": [2, 3, 14, 16, 19], "character": 17, "characterist": 16, "charli": 33, "charlottesvil": 19, "chatch": [26, 27], "cheat": 9, "check": [0, 3, 9, 14, 15, 17, 19, 20, 21, 22, 24, 26, 27, 31, 34], "check_positive_numb": [26, 27], "cherri": 19, "chicago": 33, "child": 28, "choic": 10, "chosen": 23, "chunk": [14, 33], "cinderella": 21, "citi": [19, 29, 33], "clariti": [21, 23], "class": [3, 8, 12, 14, 16, 23, 26, 27, 31, 32, 33, 34], "classic": 9, "claus": [26, 27], "clayton": 17, "clean": [8, 10, 29, 33], "clean_word": 21, "cleaner": [26, 27], "cleanup": [26, 27], "clear": 29, "clearli": [23, 26, 27], "cli": [8, 37], "click": 9, "clint": 17, "close": 29, "club": 23, "cluster": 11, "cm": 16, "cnn": 21, "cnn_1": 21, "cnn_2": 21, "co": [9, 32, 35], "coconut": 17, "code": [0, 3, 8, 10, 11, 12, 19, 20, 21, 23, 24, 26, 27, 28, 29, 31, 37], "coff": 33, "coffe": [17, 18], "colab": [0, 24], "collect": [14, 17, 31, 33], "colon": [23, 26, 27, 29], "cols_id": 33, "column": [0, 24, 31, 32, 33, 35], "com": [10, 33, 34], "combin": [0, 3, 7, 8, 9, 11, 15, 16, 17, 19, 22, 23, 24, 29, 32, 33, 35], "come": [0, 8, 10, 12, 16, 21, 23, 24, 28, 31, 33], "comfort": 8, "comma": [14, 17, 33], "command": [0, 8, 11, 24, 31, 37], "comment": [3, 8], "common": [0, 3, 8, 10, 12, 15, 16, 21, 23, 24, 29, 32, 33, 34, 35, 37], "commonli": [8, 10, 31, 37], "commun": [8, 10], "compact": [17, 21], "compar": [10, 21, 23], "comparison": [32, 34], "compat": 31, "competit": 10, "compil": 12, "complement": 23, "complet": [8, 9, 23, 26, 27, 34], "complex": [2, 3, 8, 10, 15, 19, 23, 29, 33], "complic": [15, 21], "compon": [8, 21, 23], "compos": 11, "comprehens": [23, 31], "compris": 29, "comput": [8, 9, 10, 11, 12, 19, 20, 29, 31, 32, 34], "compute_vari": 23, "compute_variances_sort_save_print": 23, "concaten": [3, 16, 17, 26, 27, 32, 35], "concentr": 15, "concept": [8, 10, 21, 32, 37], "concis": [10, 21, 23], "conclud": [8, 37], "concret": 28, "cond": 32, "condit": [20, 21, 23, 32, 34], "conditon": 19, "conduct": 10, "confid": 8, "confirm": 30, "confus": [23, 33], "conjug": 23, "conjunct": [3, 15], "connor": 28, "consecut": 16, "consequ": 31, "consid": [21, 23, 28, 32], "consist": [9, 23, 35], "consol": 12, "construct": 21, "consum": 28, "contain": [2, 3, 9, 10, 12, 14, 17, 18, 21, 22, 23, 28, 29, 31, 32, 33, 34, 35], "content": [8, 9, 29, 31], "content_parti": 29, "context": [23, 28, 29], "continu": [3, 8, 14, 30, 32, 35], "contourpi": [0, 24], "contrast": [10, 12, 14, 17, 19, 21, 31, 33], "control": [8, 31, 35, 37], "controversi": 33, "conveni": 34, "convent": [10, 14, 23, 28], "convers": [14, 32], "convert": [9, 10, 16, 21, 23, 31], "convet": 14, "copi": [9, 16, 20, 32, 33, 34, 35], "core": [0, 24, 33, 34], "corr": 34, "correct": [19, 23, 29], "correctli": 32, "correl": 34, "correspond": [16, 17, 29, 31, 32, 33], "costli": 33, "could": [0, 17, 19, 23, 24, 27, 28, 29, 31, 32, 33, 35], "count": [10, 23, 33, 34], "countri": 19, "coupl": [9, 19], "cours": [2, 3, 10, 11, 12, 14, 21, 28, 31, 35, 37], "courses_it": 21, "cover": [8, 10, 14, 17, 21, 37], "creat": [2, 3, 7, 9, 10, 12, 14, 15, 16, 17, 20, 21, 22, 28, 29, 30, 32, 34, 35], "creation": [9, 31], "creativ": 28, "critic": [31, 33], "crucial": [32, 33], "csv": [10, 33, 34], "ctrl": 9, "cube": 31, "curli": 16, "current": [0, 9, 23, 24], "cursor": 9, "curv": 10, "custom": [31, 33], "cut": [3, 15], "cvill": 19, "cyberdyn": 28, "cycler": [0, 24], "d": [0, 2, 3, 9, 12, 15, 17, 21, 23, 24, 28, 31], "dag": [32, 35], "dai": [9, 10, 19, 23], "daili": [8, 37], "danger": 19, "darrel": 17, "dat": 33, "data": [2, 8, 9, 11, 15, 16, 19, 20, 21, 23, 28, 29, 32, 37], "data1": [31, 35], "data2": [31, 32, 35], "data3": 31, "data_dict": 33, "datafram": [10, 34], "datascience_41model": 29, "dataset": [9, 32, 33, 34], "datatyp": 17, "date": 33, "dateutil": [0, 24], "datum": 21, "david": 33, "debug": [8, 11, 26, 27, 37], "decid": 28, "decim": [3, 14, 31], "decis": [8, 28, 29], "declar": [0, 24, 31, 35], "decypher_format_arg": [0, 24], "dedic": 9, "deep": [10, 21, 33], "def": [0, 2, 3, 14, 22, 23, 24, 26, 27, 28, 30], "default": [17, 21, 26, 27, 28, 29, 31, 32, 33, 34, 35], "defend": 8, "defin": [2, 3, 14, 19, 20, 21, 22, 23, 26, 27, 29, 30, 31, 34, 36], "definit": [23, 28, 29, 31], "del": [3, 14, 34], "delet": [3, 14, 32, 35], "delight": 10, "delimit": 33, "delin": 14, "denomin": [23, 26, 27], "depend": [9, 10, 19, 31, 32, 34], "deprec": 34, "depth": 21, "descend": 34, "describ": [23, 34], "descript": [2, 3, 23], "descriptor": 23, "design": [10, 23, 28, 29, 31, 32], "desir": [31, 32], "despin": [9, 10], "detail": [9, 11, 14, 16, 21, 23, 31, 32, 34], "determin": [14, 23], "develop": [2, 8, 10, 28, 37], "deviat": [31, 32, 34, 35], "devic": 8, "df": [33, 34], "df_deep": 33, "df_drop": 34, "df_drop_al": 34, "df_drop_x": 34, "df_fill": 34, "df_miss": 34, "df_shallow": 33, "diabet": 33, "dialog": 9, "dict": [17, 19, 20, 21, 23, 33], "dict_item": 17, "dict_kei": 17, "dict_valu": [17, 28], "dictionari": [2, 16, 18, 19, 23, 28, 29, 33], "dictionary1": 17, "dictionary2": 17, "dictionary3": 17, "did": [12, 17, 20, 28], "didn": 23, "die": 16, "diff": 22, "differ": [2, 3, 8, 9, 10, 14, 16, 17, 19, 22, 23, 28, 29, 31, 33], "difficult": [30, 32, 35], "dimens": [0, 24, 31, 32, 35], "dimension": [0, 24, 31, 32, 33, 35], "dir": [0, 24], "direct": [32, 35], "directli": [10, 12, 17, 31, 32], "directori": [0, 24, 31], "dirnam": [0, 24], "disappear": 9, "discuss": [3, 8], "dispers": 34, "distinct": 34, "distinguish": 29, "distribut": [31, 34], "div": 27, "dive": [31, 33], "divers": [8, 10], "divid": [15, 26, 27, 32], "divide_numb": [26, 27], "divis": [3, 12, 15, 20, 26, 27, 32], "divisbl": 20, "divmod": 23, "dl": 21, "dn": [0, 24], "do": [3, 7, 9, 12, 14, 15, 16, 17, 19, 20, 21, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34], "doc": [23, 26, 27, 33, 35], "docencia": [0, 24], "docstr": 22, "document": [11, 21, 23, 31, 34], "documento": [0, 24], "doe": [0, 9, 16, 17, 19, 20, 21, 23, 24, 26, 27, 28, 29, 32], "dog": 28, "domain": [10, 29], "don": [8, 9, 16, 17, 19, 21, 23, 28, 29, 34], "done": [9, 12, 28, 29], "door": 8, "dot": [16, 34], "doubl": 34, "down": [9, 12, 18, 23, 28, 29, 30], "dplyr": [10, 33], "draw": [9, 31], "drawn": [29, 31], "drop": [10, 34], "drop_end": 35, "drop_second_index": 35, "drop_start": 35, "ds1002": [0, 24, 35], "dtype": [31, 32, 33, 34, 35], "due": 10, "dummi": [20, 23], "dure": [8, 21, 26, 27, 30, 37], "dynam": [3, 16], "e": [3, 12, 14, 15, 16, 17, 19, 21, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], "each": [0, 3, 9, 10, 14, 15, 17, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 32, 33, 34, 35], "earli": [8, 19], "earlier": [14, 19, 31, 32, 33], "easi": [10, 12, 28], "easier": [10, 23, 26, 27, 28, 33], "easiest": 33, "easili": [10, 33], "economi": 23, "edit": [7, 34], "edu": [11, 33], "educ": 11, "edureka": 9, "effect": [3, 33], "effici": [8, 9, 10, 21, 31, 37], "either": [7, 9, 19, 20, 23, 26, 27, 29, 31], "element": [0, 2, 3, 16, 17, 19, 21, 24, 26, 27, 29, 31, 32, 33, 35], "elementari": 3, "elementwis": 32, "elif": [3, 14, 20], "elimin": 31, "elmnt": 17, "els": [0, 3, 14, 20, 21, 22, 23, 24, 28, 30], "email": 8, "eman": 16, "embed": 9, "emphas": 31, "emploi": [3, 15], "empti": [17, 29, 31, 32], "en": 35, "enabl": [9, 11, 12, 33], "encapsul": 8, "enclos": [16, 21, 23], "encod": 29, "encount": [23, 26, 27, 32], "encourag": 8, "end": [16, 17, 19, 21, 23, 26, 27, 28, 29, 32, 35], "endpoint": 34, "endswith": [3, 16], "engin": 28, "enhanc": 12, "enjoi": 8, "enough": 19, "ensur": [26, 29, 31], "enter": [9, 17], "entir": [9, 10, 29, 31, 32, 35], "entiti": [12, 31], "entri": [23, 26, 27, 33, 34], "enumer": [21, 23, 26, 27], "environ": [8, 9, 11, 14, 34], "equal": [3, 15, 22, 23, 31, 32], "equat": [9, 11], "equival": 31, "error": [22, 23, 28, 33, 37], "esc": 9, "especi": [32, 33], "essai": 29, "essenti": [8, 10, 21, 33, 34, 37], "esssenti": 29, "etc": [2, 16, 28, 29], "eval": 12, "evalu": [3, 12, 15, 17, 19, 20, 21, 34], "even": [3, 8, 10, 14, 15, 19, 20, 23, 26, 27, 28, 29, 32, 34], "evenli": 31, "everi": [0, 3, 8, 10, 14, 16, 19, 24, 29, 32, 35], "every_oth": 35, "everyth": [9, 10, 16, 28, 31], "evolv": 29, "exactli": [23, 34], "exampl": [2, 3, 9, 10, 12, 14, 15, 19, 20, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], "excel": [10, 33], "except": [0, 3, 14, 16, 21, 24, 29, 31, 32], "exceptiontyp": [26, 27], "exceptiontype1": [26, 27], "exceptiontype2": [26, 27], "exceptiontype3": [26, 27], "excercis": 17, "exclam": 31, "exclud": [21, 32], "exclus": 34, "execut": [8, 10, 11, 12, 19, 21, 26, 27, 30], "exercis": 28, "exhibit": 23, "exist": [0, 16, 21, 24, 28, 29, 34], "exp": 32, "expect": [8, 19, 26, 27, 30, 32, 33], "experi": [8, 10, 34], "expertis": 29, "explain": [23, 28], "explan": [8, 11], "explicit": 10, "explicitli": 32, "explor": [10, 11, 23], "expon": 32, "exponenti": [3, 15], "express": [10, 16, 17, 19, 21, 34, 37], "expresss": [3, 15], "extend": 28, "extens": [0, 9, 10, 24, 28, 31], "extent": 8, "extern": [0, 24, 29, 31], "extra": [21, 32], "extract": [3, 16, 29, 32, 34], "ey": 28, "f": [0, 16, 17, 19, 21, 23, 24, 26, 27, 28, 29, 30, 31], "facilit": 11, "fact": [10, 23, 28, 33], "factori": 30, "fail": [23, 26, 27, 29], "fair": 10, "fake": 31, "fall": 21, "fals": [0, 3, 10, 14, 15, 16, 17, 21, 22, 23, 24, 28, 31, 32, 33, 34], "familiar": [9, 10, 11, 19, 31], "fan": 21, "far": [14, 16, 19, 21, 28, 29, 33, 34], "fast": [0, 24, 32], "faster": 28, "fastx": 11, "father": 16, "favorite_numb": 2, "fcn": 23, "fcn_bad_arg": 23, "fcn_force_keyword": 23, "fcn_nothing_to_return": 23, "fcn_swapped_arg": 23, "featur": [8, 10, 16, 17, 28, 31, 32, 33, 34, 37], "feed": 29, "feedback": 8, "feel": [8, 10], "fetch": 34, "few": [9, 28, 37], "fibonacci": 30, "field": [28, 29, 31, 33, 34], "fifth": 29, "figsiz": 36, "figur": 3, "file": [0, 9, 10, 11, 12, 23, 24, 31, 33, 37], "file_or_url_context": [0, 24], "filenam": 29, "filenotfounderror": [0, 24], "filepath_or_buff": 33, "filesystem": 9, "fill": 31, "fillna": 10, "film": 21, "filter": [0, 10, 21, 24], "final": [3, 10, 12, 14, 16, 28, 29, 30, 31], "find": [0, 2, 3, 8, 10, 17, 23, 24, 26, 27, 32, 33, 35], "finish": 29, "first": [0, 2, 3, 10, 16, 17, 18, 19, 23, 24, 29, 30, 31, 32, 33, 34, 35], "firstval": 2, "firulai": 28, "fit": 28, "fix": 23, "flag": [14, 32], "flatten": [31, 32], "flattened_gam": 31, "flexibl": [29, 31, 33], "flip": [32, 35], "float": [3, 14, 17, 23, 28, 31, 32, 33, 35], "float128": 31, "float16": 32, "float32": [31, 32], "float64": [31, 32, 34, 35], "float_arr": 35, "float_var": [3, 14], "floor": [3, 15, 23], "flow": 19, "flush": 23, "fn": [0, 24], "fname": [0, 24], "focu": [8, 10], "focus": [8, 10, 37], "folder": [9, 31], "follow": [3, 8, 9, 10, 12, 14, 15, 17, 19, 20, 23, 26, 27, 28, 29, 31, 32], "fonttool": [0, 24], "foo": [17, 23, 35], "foomax": 35, "foomean": 35, "foomin": 35, "foosin": 35, "foostd": 35, "forc": [23, 28], "forcibli": 23, "forget": [26, 27], "form": [12, 21, 23, 28, 32, 34, 35], "formal": 10, "format": [0, 8, 9, 11, 19, 23, 24], "format_hint": [0, 24], "format_spec": 23, "formatt": 23, "forth": [3, 15], "fortran": [10, 19, 31], "found": [19, 33], "foundat": 31, "four": [3, 15, 29, 31], "fourth": 32, "frame": [10, 34], "free": 8, "freq": 34, "frequenc": 34, "from": [0, 3, 8, 9, 10, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 30, 31, 32, 33, 34], "from_byt": 23, "fruit": 2, "ftp": 33, "full": 31, "func": [0, 24], "funciton": 34, "function": [2, 3, 8, 10, 12, 14, 16, 17, 21, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37], "function_nam": 12, "fundament": [8, 10, 21, 33, 37], "further": [21, 30, 31, 34], "futur": [8, 11, 14, 23, 26, 27, 34], "futurewarn": 34, "g": [9, 16, 17, 19, 21, 23, 26, 27, 28, 29, 30, 31, 33, 34], "game": [10, 31], "game_and_solut": 31, "games_and_solut": 31, "gaussian": 31, "gave": 23, "gbm": 23, "gener": [10, 16, 19, 23, 28, 29, 31, 34, 35], "get": [0, 2, 3, 9, 10, 11, 12, 14, 16, 17, 19, 21, 24, 29, 31, 35, 37], "getattr": 23, "ggplot2": 10, "git": 8, "github": [8, 37], "githubusercont": [33, 34], "give": [3, 8, 12, 14, 15, 16, 19, 23, 26, 27, 28, 30, 31, 32, 33, 34, 35], "given": [0, 3, 8, 9, 14, 16, 17, 18, 19, 21, 23, 24, 28, 30, 31, 32, 37], "glimps": [31, 33], "global": [3, 14], "go": [0, 8, 11, 12, 16, 21, 24, 28, 29], "goal": 8, "goe": [19, 29], "gone": 8, "goo": 35, "good": [3, 8, 15, 19, 28], "goodby": 3, "googl": [0, 24], "got": 19, "grammar": 10, "grape": 19, "graph": [32, 35], "graphic": [10, 11], "great": [10, 12, 23], "greater": [3, 10, 15, 32], "green": 9, "greet": [12, 28], "grei": 9, "group": [3, 8, 15, 23, 28], "grouped_boxplot": 9, "guess": 8, "guid": 8, "h": 10, "ha": [3, 9, 10, 12, 14, 15, 19, 21, 22, 26, 27, 28, 31, 32, 33, 34, 35], "habit": 8, "had": 20, "half": 2, "hand": [8, 10, 17], "handi": [17, 28], "handl": [8, 9, 10, 32, 33, 37], "handler": [26, 27], "happen": [2, 9, 26, 27, 32], "hard": [26, 27], "hasattr": [0, 24], "hash": [17, 23], "haskel": 10, "have": [0, 2, 3, 8, 9, 10, 11, 14, 15, 16, 17, 19, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], "head": [33, 34], "header": 28, "heard": 28, "heavi": 29, "height": 28, "hello": [3, 12, 14, 16, 23, 28], "help": [8, 9, 10, 23, 28, 29, 31, 32, 33], "here": [0, 3, 8, 9, 10, 12, 14, 15, 16, 17, 19, 21, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 35], "hesit": 8, "hex": [14, 23, 32, 33], "hexadecim": 14, "hide": 28, "hierarchi": [32, 35], "high": 10, "higher": [10, 32, 35], "highli": 10, "highlight": 31, "hint": [19, 20, 28], "hit": [3, 16], "hold": [3, 14, 23, 31, 32, 33], "home": [0, 24, 28], "horizont": 33, "host": [23, 33], "hotel": [0, 24], "hour": 8, "houston": 33, "how": [3, 10, 12, 14, 15, 19, 20, 21, 23, 26, 27, 28, 31, 32, 34], "howev": [0, 10, 14, 15, 17, 19, 23, 24, 28, 29, 31, 32, 33], "hpc": 11, "html": [9, 23, 26, 27, 33, 35], "http": [9, 10, 12, 17, 18, 23, 26, 27, 33, 34, 35], "hue": 9, "human": 28, "hundr": 31, "i": [0, 2, 3, 8, 9, 10, 11, 12, 15, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 32, 34, 35, 36, 37], "i4": 31, "id": [14, 15, 23, 32, 33], "idea": [8, 9, 16, 23], "ideal": 11, "ident": 3, "identif": 33, "identifi": [8, 26, 27, 28], "ii": [21, 26, 27, 37], "iloc": 34, "imag": [9, 23], "imageio": [0, 24], "imageio_imread": [0, 24], "imageio_plugin": [0, 24], "imagin": 20, "imaginari": 23, "img": [0, 24], "immut": [3, 17], "imopen": [0, 24], "imopen_arg": [0, 24], "imper": 19, "implement": [10, 12, 21, 28, 32], "impli": 3, "implicitli": [3, 21], "import": [3, 8, 9, 10, 14, 16, 17, 18, 19, 21, 23, 28, 29, 32, 34, 35, 37], "importantli": [9, 21], "impos": 19, "improv": 21, "imput": 34, "imread": [0, 24], "imshow": [0, 24], "inaccur": 32, "includ": [8, 9, 10, 16, 17, 19, 20, 21, 23, 26, 27, 28, 29, 31, 32, 33, 37], "inclus": 31, "incomplet": 8, "incorpor": [16, 26, 27], "incorrect": [8, 23, 26, 27], "increas": 20, "increasingli": 8, "incred": 29, "increment": [3, 15], "indent": [23, 26, 27, 28, 29], "indentationerror": [26, 27], "indenten": [26, 27], "independ": [31, 32], "index": [0, 10, 18, 19, 21, 23, 24, 33, 35], "indexerror": [0, 24], "indic": [2, 9, 16, 21, 23, 26, 27, 32, 34, 35], "individu": [9, 16], "industri": 10, "ineffici": 28, "inequ": [3, 15], "infer": [23, 29], "infinit": 27, "info": [3, 9, 16, 17, 23, 33, 34], "inform": [8, 14, 16, 23, 28, 29, 33], "inher": 16, "inherit": 23, "initi": 31, "initil": 28, "inplac": 34, "input": [9, 12, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33], "insert": [9, 17, 23, 29], "insid": [9, 16, 19, 23, 26, 28, 30], "inspir": 33, "instal": 8, "instanc": [23, 28, 31], "instanti": 28, "instead": [2, 14, 30, 31, 32, 34], "instruct": [10, 12], "instructor": [8, 12, 23], "int": [3, 14, 15, 17, 23, 26, 27, 29, 31, 32], "int16": 31, "int32": [31, 35], "int64": [31, 33, 34, 35], "int8": 31, "integ": [2, 3, 14, 15, 17, 19, 20, 22, 23, 28, 31, 32, 33, 35], "integer_var": 14, "integr": [11, 23, 29, 33], "intellig": 10, "intens": 10, "inter": [19, 21], "interact": [9, 11, 12, 14], "interchang": 23, "interdisciplinari": 29, "interest": 19, "interfac": [8, 9, 11, 28], "intermedi": [8, 32, 37], "intern": 33, "internat": 12, "interpet": 21, "interpret": [3, 8, 12], "intersect": 17, "interv": 31, "introduc": [7, 8, 9, 19, 21, 31, 33, 34, 37], "introduct": [16, 35, 37], "introductori": [8, 37], "introspect": 9, "intuit": 10, "invalid": [0, 2, 14, 24, 26, 27, 32, 34], "invalu": 8, "invers": [26, 27], "involv": 10, "io": [0, 9, 24, 33], "io_mod": [0, 24], "ipykernel_105905": 32, "ipykernel_105955": 34, "ipynb": 9, "ipython": 12, "iri": [33, 34, 36], "iris_df": 34, "is_label": [33, 34], "is_odd": 21, "is_read_request": [0, 24], "isinst": [0, 3, 14, 23, 24], "isn": [0, 23, 24], "issu": 32, "itali": 19, "item": [9, 16, 17, 19, 21, 23, 35], "iter": [17, 20, 23, 29, 32, 33], "its": [0, 2, 3, 8, 9, 10, 14, 15, 16, 17, 19, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 37], "itself": [2, 23, 28, 30, 31], "ix": 19, "i\u00f1igo": 16, "j": [9, 19, 21], "java": 10, "javascript": 10, "javi": [0, 16, 24], "javier": [12, 16, 23], "job": 11, "joe": 32, "john": [17, 28], "join": [14, 16], "jpg": [0, 24], "judgment": 8, "jupyt": [8, 11, 12, 31], "jupyter_notebook_cheatsheet_edureka": 9, "just": [3, 9, 12, 14, 15, 16, 17, 23, 26, 27, 28, 29, 30, 31, 32, 33], "k": [9, 21, 31], "kaggl": 10, "kaggle_survey_2022_respons": 10, "kb": 34, "keep": [2, 8, 17, 19, 21, 23, 26, 27, 34], "keepdim": 32, "kei": [2, 16, 17, 19, 21, 23, 33], "kenni": 28, "key_express": 21, "keyowrd": [26, 27], "keyworad": [7, 9], "keyword": [3, 15, 23, 26, 27, 28, 29, 32, 34, 35], "keywork": 23, "kg": 28, "kill": 16, "kind": [0, 12, 20, 21, 24, 26, 27, 28, 29, 30, 33], "kiwi": 19, "kiwisolv": [0, 24], "know": [2, 14, 19, 23, 28, 34], "knowledg": 29, "known": [10, 12, 19, 23, 32], "kumquat": 19, "kwarg": [0, 23, 24], "labels": 10, "lack": 28, "lambda": [3, 10, 14], "languag": [3, 8, 9, 11, 12, 19, 23, 28, 29, 33, 37], "languages_dat": 10, "larg": [21, 29, 32], "largest": [3, 15], "last": [0, 3, 14, 16, 17, 19, 21, 23, 24, 26, 27, 28, 31, 32, 34, 35, 36], "lastli": 23, "later": [3, 14, 17, 23, 28, 31, 32, 35], "latex": [9, 11], "launch": 12, "laundri": 29, "layer": 21, "layman": 14, "lazili": 21, "lazy_load": [0, 24], "lead": [16, 32, 33], "learn": [9, 10, 11, 14, 15, 16, 17, 19, 21, 23, 26, 27, 28, 29, 31, 32, 33, 34], "learnbyexampl": [17, 18], "least": [22, 23, 31], "left": [3, 9, 17, 32, 34], "legaci": [0, 24], "legacy_mod": [0, 24], "len": [16, 17, 22, 23, 34, 35], "length": [16, 22, 23, 34], "lengthi": 21, "less": [3, 10, 15, 19, 20], "lesser": 8, "lesson": [11, 12, 14, 15, 16, 17, 19, 21, 23, 26, 27, 28, 29, 31, 32, 33], "let": [9, 11, 12, 17, 19, 21, 23, 26, 27, 28, 29, 31, 32, 34], "letter": [2, 3, 14, 17, 18, 21], "level": [10, 32, 35], "leverag": 10, "lib": [0, 24], "librari": [8, 10, 23, 26, 27, 31, 32, 33, 37], "lifetim": 23, "like": [0, 2, 3, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 28, 29, 31, 32, 33], "likewis": 16, "line": [0, 2, 3, 8, 10, 11, 12, 14, 16, 17, 19, 21, 23, 24, 26, 27, 28, 29, 31, 34, 36, 37], "linear": [9, 31], "linearli": 31, "linspac": [0, 24, 31], "lisp": 10, "list": [2, 3, 9, 14, 15, 16, 18, 19, 20, 23, 26, 27, 28, 29, 31, 32, 33, 34, 35], "list1": 17, "list2": 17, "list3": 17, "list_iter": 21, "list_str": 29, "liter": [3, 14, 15, 16], "literari": 29, "littl": 23, "live": [9, 11, 21], "ll": [8, 31], "llm": 29, "lo": 33, "load": [9, 12, 34], "load_dataset": 9, "loc": [31, 34], "local": [8, 9, 33], "localhost": 33, "locat": 14, "log": [26, 27], "logic": [8, 10, 34], "logreg": 23, "logspac": 31, "london": 29, "long": [8, 14, 20], "longer": 9, "look": [10, 11, 12, 19, 21, 23, 31, 32, 34, 35], "lookfor": 19, "loop": [12, 17, 20, 22, 23, 29, 31], "lose": 29, "lot": [3, 10], "lover": 33, "low_memori": 10, "lower": [10, 14, 16, 19, 21, 32, 35], "lowercas": [14, 21], "lowest": 23, "m": [3, 9, 15, 16, 21, 31, 32], "machin": [10, 11, 12, 28], "made": [9, 32], "mai": [8, 10, 19, 21, 23, 26, 27, 28, 29, 31, 32], "main": [9, 10, 16, 31], "maintain": [8, 9, 23, 28], "major": 31, "make": [3, 8, 10, 11, 15, 18, 19, 20, 23, 28, 29, 31, 33], "maker": 29, "manag": [8, 10, 29, 31], "manage_plugin": [0, 24], "mango": 19, "mani": [3, 8, 9, 10, 12, 14, 15, 16, 19, 20, 23, 28, 31, 32, 33, 34], "manipul": [3, 8, 10, 15, 28, 32, 33], "manual": [21, 28], "manufactur": 28, "map": [9, 17, 28], "mapper": 33, "mardown": 9, "margin": 9, "mark": 31, "markdown": [7, 9, 11, 18], "mask": [0, 24, 32], "master": [33, 34], "match": [26, 27], "mate": [26, 27], "materi": 8, "math": [12, 29, 35], "mathemat": [3, 10, 12, 15, 31], "mathemt": 3, "matlab": [10, 19], "matplotlib": [0, 10, 24, 31], "matter": 23, "max": [0, 19, 24, 28, 32, 34, 35], "max_it": 20, "max_val": [19, 20], "maximum": [19, 20], "maximun": 32, "mayb": [19, 26, 27], "me": [10, 19, 28], "mea": 21, "mean": [0, 3, 9, 10, 12, 14, 15, 16, 17, 23, 24, 28, 31, 32, 33, 34, 35], "meaning": [23, 26, 27], "meas_mmhg": 21, "meas_mmhg_dl": 21, "measur": 21, "mechan": 28, "media": 9, "median": 34, "meet": 21, "membership": [3, 15], "memori": [3, 10, 14, 15, 21, 23, 31, 32, 33, 34], "mention": [12, 14, 16, 17, 21, 28, 33], "menu": 7, "menubar": 9, "messag": [12, 16, 19, 26, 27, 28], "met": [19, 21], "method": [8, 10, 18, 21, 23, 29, 32, 34], "metric": 21, "mg": 21, "microcosm": 3, "might": [2, 12, 19, 31, 33], "min": [0, 24, 32, 34, 35], "mind": [2, 21], "minim": 8, "minu": [17, 30], "miscellan": 37, "miss": [17, 23, 26, 27], "mistak": [26, 27], "mix": [16, 17, 21, 28, 34], "ml": 21, "mmhg": 21, "mmm": 28, "mod": 23, "mode": [0, 7, 12, 24], "model": [19, 21, 23, 29], "model_arch": 21, "modern": 10, "modif": 9, "modifi": [9, 14, 17, 28, 30, 32, 35], "modul": [8, 23, 24, 28, 32, 35, 37], "module1": 31, "module2": 31, "modulo": 20, "modulu": [3, 15], "moment": 31, "monitor": 11, "month": [17, 18], "montoya": 16, "mordisquito": 28, "more": [2, 3, 8, 10, 14, 15, 16, 17, 18, 21, 23, 26, 27, 28, 29, 30, 33, 34, 37], "moreov": 10, "most": [0, 3, 8, 9, 10, 11, 14, 15, 16, 17, 19, 21, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 37], "mostli": 29, "mous": 7, "move": [3, 16], "mtrand": 31, "mu": 31, "much": [8, 10, 21, 23, 28], "multi": [10, 31, 33], "multidimension": [31, 32, 35], "multipl": [3, 10, 11, 12, 14, 15, 17, 23, 26, 27, 29, 30, 31, 32], "multipli": [15, 17, 18, 30], "must": [0, 2, 3, 14, 17, 23, 24, 26, 27, 29, 31, 32, 33, 34], "mutabl": 10, "mwaskom": [33, 34], "my": [12, 16, 28, 34], "my_anim": 28, "my_arg": 23, "my_args2": 23, "my_args_dict": 23, "my_dict": 28, "my_dog": 28, "my_nam": 16, "my_rang": 21, "my_str": 28, "my_var": [3, 15], "myarr": [32, 35], "myit": 21, "mylist": 17, "myset": 21, "mystr": 16, "myvar": 14, "n": [3, 16, 23, 28, 29, 30, 31, 33, 36], "n_ob": 10, "naive_bay": 23, "name": [2, 8, 12, 16, 17, 18, 19, 22, 23, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37], "nameerror": [3, 14, 23, 26, 27, 34, 36], "nameoftheclass": 28, "nan": [32, 34], "nanmean": 32, "nanstd": 32, "nanvar": 32, "narr": [9, 11], "nativ": 23, "natur": [10, 29, 33], "navig": 8, "nbconvert": 9, "ncsu": 33, "ndarrai": [0, 24, 32, 33, 35], "ndigit": 23, "ndim": [0, 24, 31, 35], "necessari": [23, 28, 32], "need": [3, 8, 9, 10, 14, 16, 17, 21, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34], "neg": [14, 16, 23, 26, 27, 32], "negat": 32, "negate_coord": 23, "neglig": 10, "nest": [9, 17, 32, 35], "networkx": [0, 24], "nevertheless": 31, "new": [2, 8, 9, 10, 14, 16, 21, 23, 28, 29, 31, 32, 33, 34, 35], "new_anim": 28, "new_foo": 35, "new_game_and_solut": 31, "new_sudoku_gam": 31, "new_sudoku_solut": 31, "newer": 28, "newlin": 23, "next": [3, 12, 16, 17, 21, 23, 28, 29], "nice": [16, 32, 35], "nine": 31, "nlp": 21, "nobel": 8, "non": [17, 23, 32, 33, 34], "none": [0, 3, 14, 22, 23, 24, 31, 32], "nonloc": [3, 14], "nonneg": 22, "normal": [9, 26, 27, 31, 34], "notabl": [11, 33], "notat": [32, 34, 35], "note": [3, 8, 10, 15, 21, 28, 29, 31, 32, 33, 34], "notebook": [8, 11, 12, 31], "noth": [16, 19, 23, 26, 27, 29], "notic": 19, "notion": 21, "noun": [3, 15], "now": [2, 7, 9, 12, 16, 17, 19, 20, 23, 26, 27, 28, 29, 30, 32, 34], "np": [0, 24, 31, 32, 34, 35], "null": [33, 34], "num": [23, 26, 27], "num1": 14, "num2": 14, "number": [2, 3, 7, 9, 10, 14, 16, 17, 18, 19, 20, 23, 26, 27, 29, 30, 31, 32, 33], "numbers2": 17, "numbers4": 17, "numer": [2, 15, 17, 18, 19, 22, 23, 31, 34], "numeric_onli": 34, "numeric_str": [31, 35], "numpi": [8, 33, 34, 35, 37], "o": [0, 3, 16, 24, 29], "object": [9, 10, 14, 15, 16, 17, 19, 21, 23, 26, 27, 29, 32, 33, 34, 37], "object_": 31, "obs1": 33, "obs2": 33, "obs3": [33, 34], "obs4": 33, "obs_id": 33, "observ": [30, 32, 33, 34], "obtain": [19, 34], "obviou": 35, "obvious": 28, "occur": [23, 26, 27, 30], "odd": [19, 20, 21], "offer": [3, 10, 15, 31], "offic": 8, "offset": 9, "often": [8, 10, 17, 23, 29, 31], "ogi\u00f1i": 16, "ok": 28, "old": 28, "old_valu": [32, 35], "omit": [8, 16, 32, 35], "onc": [0, 17, 23, 24, 28, 29, 31, 33], "ondemand": 8, "ondex": 33, "one": [7, 8, 9, 16, 17, 18, 20, 21, 22, 23, 26, 27, 28, 31, 32, 33, 34, 35], "one_to_ten": 31, "ones": [0, 3, 15, 16, 21, 23, 24, 28, 31, 34], "onli": [2, 3, 14, 16, 17, 21, 23, 26, 27, 28, 29, 31, 32, 33, 34, 35], "onlin": 33, "ood": 11, "oof": 35, "open": 8, "oper": [8, 12, 14, 19, 20, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], "operand": [17, 26, 27], "optim": [3, 9, 14, 31], "option": [14, 16, 21, 23, 26, 27, 31, 32, 33], "orang": 2, "order": [9, 17, 19, 21, 23, 28, 31, 32, 33], "org": [9, 10, 12, 17, 18, 23, 26, 27, 33, 35], "organ": [8, 9, 10, 17, 31, 37], "orient": [8, 10, 37], "origin": [17, 23, 28, 31, 32, 33], "other": [0, 3, 8, 9, 10, 15, 16, 22, 23, 24, 26, 27, 28, 29, 31, 32, 35], "otherwis": [3, 14, 19, 20, 23, 30, 32, 33], "oti": 16, "ouput": 14, "our": [8, 11, 12, 16, 23, 28, 29, 31, 32, 33], "out": [2, 3, 17, 18, 21, 22, 23, 31, 32], "outcom": 29, "output": [9, 12, 14, 17, 21, 22, 23, 29, 31, 32], "outsid": [9, 14, 22, 23, 28], "over": [10, 19, 20, 21, 23, 29, 32, 34, 35], "overal": 32, "overflowerror": 23, "overlai": 34, "overrid": 28, "overwrit": [17, 29], "own": [2, 8, 9, 10, 28, 31, 33], "p": [3, 15], "packag": [8, 10, 23, 32, 33, 35, 37], "package_nam": 31, "packet": 28, "page": 9, "pair": [2, 17, 21, 23, 31], "palett": 9, "panda": [8, 10, 31, 32, 34, 37], "paradigm": 28, "parallel": 19, "paramet": [28, 31, 32, 33, 34], "parametr": 23, "parent": 28, "parenthes": [3, 12, 15, 17, 19, 23, 28], "parenthesi": [12, 14, 19, 23, 28], "pari": 29, "pars": [0, 24, 32, 35], "part": [2, 16, 23, 26, 27, 29, 30], "partial": 29, "particip": 8, "particular": [14, 16, 17, 28, 32, 34], "particularli": [10, 17], "pascal": 10, "pass": [2, 3, 14, 16, 17, 19, 22, 26, 27, 28, 31, 32, 33, 34], "pastel": 9, "path": [0, 24, 29, 33], "pattern": 9, "pd": [10, 33, 34], "pdf": 9, "peanut": 17, "peer": 8, "peform": 34, "peopl": [10, 23], "per": 10, "percentag": 34, "perfectli": [8, 10], "perform": [9, 14, 15, 16, 17, 23, 26, 27, 28, 29, 31, 32, 33, 35], "perhap": 31, "person": [8, 19, 28], "petal_length": [33, 34], "petal_width": [33, 34], "phonelist": 17, "photo": [0, 24], "photo_mask": [0, 24], "photo_sin": [0, 24], "phrase": 28, "physicist": 31, "piec": [12, 21, 23, 26, 27], "pillow": [0, 24], "pip": [0, 24, 31], "pitt": 19, "pixel": [0, 24], "place": [17, 21, 32], "placehold": [26, 27], "plai": 33, "plan": [26, 27], "platform": 12, "pleas": [8, 31], "plot": [9, 10, 23, 36], "plt": [0, 10, 24], "plugin": [0, 24], "plugin_arg": [0, 24], "po": 17, "point": [10, 11, 14, 17, 31, 32, 33], "polici": 8, "popul": [2, 20, 23, 31], "popular": 8, "portabl": 11, "posit": [2, 14, 17, 19, 21, 23, 26, 27, 32, 34], "possibl": [9, 23, 29, 31], "post": 8, "post0": [0, 24], "potenti": 8, "pow": 23, "power": [10, 16, 33], "pr": 23, "practic": [0, 8, 24], "pre": [0, 24, 31], "preced": [3, 15, 23], "precis": [21, 23, 32], "predefin": 28, "predominantli": 10, "prefer": 14, "prefix": 16, "prepar": [8, 16, 33], "press": 9, "pretti": 28, "pretzel": 28, "prevent": [26, 27], "previou": [8, 10, 12, 15, 16, 17, 19, 21, 28, 29, 30, 31, 32], "primari": 8, "primarili": [8, 10, 34, 37], "primit": 17, "princess": 21, "principl": 10, "print": [0, 2, 3, 12, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35], "prize": 8, "probabl": 33, "problem": [8, 10, 30, 31], "procedur": [10, 28], "process": [8, 9, 10, 31], "prod": [0, 24], "produc": [19, 21, 22, 23], "product": [10, 15, 23, 30], "profession": 10, "program": [3, 8, 11, 19, 20, 23, 29, 30, 33, 37], "progress": 8, "project": 29, "prolog": 10, "promot": 10, "prompt": 9, "proper": 29, "properli": [22, 31], "properti": [17, 28, 34], "propos": 29, "prorivd": 18, "protocol": [23, 31], "prototyp": 10, "provid": [8, 10, 17, 21, 23, 26, 27, 28, 29, 31, 32, 33, 37], "public": 10, "pull": [17, 18], "purpos": [3, 10, 14, 22, 23, 28, 29], "put": [3, 15, 21, 32, 35], "py": [0, 24, 31, 32, 34], "pydata": [9, 10, 33], "pylab": 10, "pypars": [0, 24], "pyplot": [0, 24], "pyspark": 21, "python": [0, 2, 3, 8, 10, 11, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 37], "python3": [0, 24], "pythonist": 28, "pytorch": 10, "pywavelet": [0, 24], "q12": 10, "question": [3, 8, 10, 15, 17, 18, 29], "quick": [10, 11, 31], "quicker": 21, "quietli": 8, "quit": [2, 19, 20, 21, 28, 29], "quot": [16, 17, 23], "quotat": 3, "r": [8, 10, 11, 17, 19, 23, 29, 32, 33, 37], "rais": [0, 3, 14, 23, 24, 32], "randint": 35, "random": [31, 32, 35], "random_arrai": 31, "randomst": 31, "rang": [14, 19, 29, 31, 32], "rangeindex": [33, 34], "rangi": 17, "rapidli": 10, "rapunzel": 21, "rasero": 16, "rather": [10, 17], "ratio": 23, "ration": 23, "raw": [9, 33, 34], "re": [8, 10, 17, 23, 28], "reach": [19, 21, 23, 30], "read": [0, 3, 8, 10, 12, 21, 23, 24, 26, 27, 33, 37], "read_csv": [10, 33, 34], "readabl": [10, 23], "readlin": 29, "readthedoc": 9, "real": [23, 31], "realli": [26, 27, 28], "reason": [26, 27], "reassign": [3, 14, 16, 17, 23], "recal": 21, "recent": [0, 3, 14, 16, 17, 19, 21, 23, 24, 26, 27, 28, 34, 36], "recip": 28, "recogn": 23, "recommend": 9, "record": 34, "recurs": 30, "redefin": [28, 33], "reduc": 32, "ref": [31, 32, 33, 34], "refer": [2, 3, 10, 12, 14, 15, 16, 23, 28, 31, 32, 33, 34, 35], "referenc": [0, 23, 24, 31], "regardless": 26, "regex": 10, "regress": 21, "regular": 10, "reiter": 28, "rel": 9, "relat": 32, "relationship": 3, "remain": 10, "remaind": [3, 15, 21], "rememb": [3, 10, 14, 16, 18, 21, 28, 32], "remov": [16, 23, 29, 35], "renam": 33, "repeat": [14, 16, 17, 19], "repeatedli": 31, "repetit": [3, 16, 30], "repl": 12, "replac": [0, 16, 24, 28, 34], "repli": 12, "repr": 23, "repres": [17, 21, 23, 31, 32, 33, 34], "represent": [9, 14, 23, 29], "reproduc": 11, "request": [0, 23, 24, 31], "requir": [0, 8, 10, 12, 19, 22, 23, 24, 33], "rerun": 31, "research": [10, 11, 29], "reserv": [3, 34], "reserverd": 34, "reset": 9, "reset_index": 10, "reshap": 31, "reshaped_gam": 31, "resolut": 23, "resourc": [8, 10, 29], "respect": [10, 15, 17, 34], "respons": 10, "rest": 16, "restrict": [3, 14], "result": [0, 3, 8, 10, 12, 14, 15, 16, 17, 19, 21, 23, 24, 26, 27, 29, 30, 31, 32, 34], "retain": 21, "retriev": [21, 33], "retriv": 31, "return": [0, 3, 9, 14, 15, 16, 17, 21, 22, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35], "reus": 28, "reusabl": [10, 23, 28], "revers": [16, 32, 35], "reward": 8, "rhetor": 29, "ri": [0, 24], "rich": [9, 11], "ride": 8, "right": [3, 8, 17, 19, 22, 23, 29, 34], "risk": [8, 29], "rivanna": [8, 11], "rn": 17, "rng": 17, "rnn": 21, "road": 28, "robin": 17, "role": 33, "rom": 9, "room": 21, "rot": 36, "rough": 9, "round": 23, "row": [0, 24, 31, 32, 33, 34, 35], "rstudio": 11, "rubi": 10, "rule": [2, 3, 9, 10, 14, 28, 32], "run": [7, 12, 20, 26, 27, 29, 31], "runtimeerror": [0, 24], "runtimewarn": 32, "s1": 33, "s2": 33, "s3": 33, "s4": [31, 33], "s5": 33, "s6": 33, "sai": [12, 16, 23, 28, 31], "said": 10, "salut": 28, "sam": 17, "samantha": 17, "same": [3, 7, 9, 14, 15, 16, 17, 21, 23, 26, 27, 28, 30, 31, 32, 33, 34], "sampl": 31, "sarah": 17, "satisfi": [0, 24, 32], "save": [9, 14, 16, 23, 28, 31, 32, 34, 35], "saw": [15, 16, 17], "scala": 10, "scalabl": 28, "scalar": [17, 18, 32], "scalat": 28, "scale": 31, "scenario": 32, "scheme": [17, 28, 33], "scienc": [8, 11, 29, 31, 32, 33, 37], "scientif": [11, 31], "scientist": [8, 37], "scikit": [0, 10, 24, 31], "scipi": [0, 24, 31, 35], "score": 32, "scratch": 28, "screen": 12, "script": [10, 12], "seaborn": [9, 10, 33, 34], "search": [0, 24], "searchin": 19, "second": [14, 16, 17, 23, 26, 27, 28, 30, 32], "secondv": 2, "secong": 16, "section": [8, 9, 26, 27], "see": [2, 8, 10, 14, 15, 16, 17, 18, 19, 21, 23, 26, 27, 28, 30, 31, 32, 33, 34], "seem": [26, 27], "seen": [12, 23, 30], "select": [9, 16, 28, 32, 33], "selector": 34, "self": [0, 9, 23, 24], "semest": [8, 12, 21], "sens": [3, 15], "sensibl": [22, 23], "sensit": [2, 3, 14], "sep": [23, 33], "sepal": 34, "sepal_length": [33, 34, 36], "sepal_width": [33, 34], "separ": [9, 10, 14, 17, 28, 29, 32, 33, 35], "seq": 19, "sequenc": [9, 16, 17, 21, 30], "sequenci": 31, "sequenti": [3, 15, 33, 37], "seri": [20, 21, 29, 30, 34], "series_dict": 33, "serv": [8, 31], "server": 11, "servic": 8, "session": [0, 8, 24], "set": [0, 3, 8, 9, 10, 11, 15, 19, 20, 23, 24, 28, 31, 32, 33, 34], "set1": 17, "set2": 17, "set_them": 9, "setosa": [33, 34], "sever": [8, 14, 16, 17, 19, 23, 31], "sex": 33, "shallow": 33, "shape": [0, 10, 24, 31, 32, 34, 35], "share": [9, 11, 21, 33], "sheet": 9, "shell": 12, "shift": 9, "short": [2, 3, 15, 23, 31], "shortchang": 8, "shortcut": [7, 9], "shorter": 21, "shot": 8, "should": [2, 3, 8, 9, 14, 19, 20, 21, 23, 26, 27, 28, 30, 31, 34, 35], "show": [9, 14, 22, 23, 28, 32, 34, 35], "show_arg_expans": 23, "show_entri": 23, "show_result": 23, "show_scop": 23, "shown": [7, 9, 23], "si": 16, "side": 34, "sigma": 31, "sign": 23, "signal": 31, "signatur": 23, "signific": 23, "silenc": 34, "silo": 29, "similar": [10, 16, 17, 31, 33], "similarli": [9, 16, 17, 26, 27, 32, 33, 35], "simpl": [2, 3, 9, 14, 23, 29, 32, 34], "simplest": 33, "simpli": [12, 23, 31], "simplifi": [9, 30, 32, 35], "sin": [0, 24, 32, 35], "sinc": [8, 17, 23, 26, 27, 32, 34], "singl": [3, 14, 15, 16, 17, 19, 23, 29, 31, 32, 33, 34, 35], "sir": 17, "sit": 8, "site": [0, 3, 8, 15, 24], "situat": [21, 26, 27], "six": [0, 24, 31], "size": [10, 23, 31, 32], "skill": [8, 23, 37], "skimag": [0, 24], "skip": [19, 26, 27], "slice": [18, 33, 34], "small": [8, 22], "smaller": 30, "smoker": 9, "sn": [9, 10], "snake": 14, "snoopi": 17, "so": [2, 3, 8, 9, 10, 12, 14, 15, 16, 17, 19, 21, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], "socioeconom": 29, "softwar": [8, 10, 28], "solut": [10, 31], "solv": [10, 30, 31], "some": [3, 7, 8, 9, 10, 12, 14, 15, 16, 20, 23, 28, 29, 31, 32, 34, 35, 37], "somebodi": 23, "someth": [19, 20, 26, 27], "sometim": [2, 8, 19, 26, 27, 31, 32], "soon": [12, 15, 16, 26, 27], "sort": [0, 2, 17, 24], "sort_index": 36, "sort_valu": 34, "sourc": [0, 8, 10, 11, 12, 24, 28], "space": [3, 14, 16, 19, 23, 31, 33], "spain": 19, "speci": [33, 34], "special": [15, 20, 28, 31, 32], "specif": [2, 3, 10, 14, 16, 21, 23, 26, 27, 28, 29, 31, 32, 34, 35], "specifi": [3, 14, 16, 17, 21, 23, 26, 27, 28, 31, 32, 33, 34], "speed": 10, "spell": 10, "spend": 8, "split": [16, 17, 33], "spoken": 8, "spring": 21, "sql": [10, 17], "sqrt": 32, "squar": [22, 23], "square_arg": 22, "st": 21, "stabl": [10, 33, 35], "stack": 31, "stakehold": 29, "standard": [31, 32, 34, 35], "standard_norm": [31, 32], "start": [2, 3, 8, 9, 14, 15, 16, 17, 19, 21, 23, 28, 30, 31, 32, 33, 35, 37], "startswith": [3, 16], "stat": 33, "state": [9, 10, 28, 31], "statement": [0, 19, 20, 23, 24, 26, 27, 29, 31], "static": 23, "statist": [0, 10, 24, 29], "statsmodel": 10, "std": [0, 24, 32, 34, 35], "stdev": 31, "stdout": 23, "stem": 33, "step": [11, 15, 16, 17, 22, 30, 33], "stick": 28, "still": [14, 28], "stop": [16, 17, 21, 26, 27, 32, 33], "stop_word": 21, "stopiter": 21, "storag": 31, "store": [3, 10, 14, 16, 17, 19, 20, 21, 28, 29, 31, 33], "str": [3, 14, 16, 19, 22, 26, 27, 29, 33], "str_": 31, "straightforward": 32, "stream": 23, "strictli": 28, "string": [2, 14, 17, 19, 20, 22, 23, 26, 27, 29, 31, 33, 34], "string1": [3, 16], "string2": [3, 16], "string_": [31, 35], "string_var": [3, 14, 16], "strip": 16, "strn": 21, "structru": 14, "structur": [2, 3, 8, 10, 14, 15, 16, 21, 28, 31, 32, 33, 37], "student": [8, 32, 37], "studi": [14, 16, 19, 21, 23], "style": [9, 10, 31], "sub": 32, "subarrai": 32, "subclass": 23, "subject": [3, 15, 17], "subject_id": [2, 3], "submit": 11, "subscript": 17, "subset": [3, 31, 34], "substr": 16, "substract": 12, "subtract": [3, 15, 22, 32], "successfulli": 8, "sucess": [26, 27], "sudoku": 31, "sudoku_arrai": 31, "sudoku_gam": 31, "sudoku_solut": 31, "suggest": [16, 23], "suitabl": [10, 23], "sum": [0, 10, 14, 15, 24, 30, 32, 33, 34], "suma": 23, "summar": 21, "summari": [3, 15], "super": 19, "superior": 10, "support": [3, 8, 11, 15, 16, 17, 21, 23, 26, 27, 31, 33], "suppos": 28, "sure": [9, 20, 29], "survei": 10, "survey_data": 10, "switch": [0, 24], "sy": 23, "symbol": [14, 15, 17], "syntax": [2, 3, 10, 15, 21, 26, 27, 28, 29, 34], "syntaxerror": [2, 14, 19, 23, 26, 27, 34], "system": [0, 10, 23, 24, 28, 29, 31], "t": [0, 8, 9, 16, 17, 19, 21, 23, 24, 28, 29, 31, 33, 34], "tab": [9, 19, 33, 34], "tabl": [17, 31, 32, 33], "tabular": [29, 33], "tail": 34, "tailor": [8, 31], "take": [2, 8, 12, 14, 17, 19, 21, 22, 23, 26, 28, 29, 30, 31, 32, 34, 35], "taken": 32, "talk": 28, "tall": 16, "tan": 35, "target": 28, "task": [8, 10, 23, 28, 30], "team": 23, "technic": [8, 23, 31], "techniqu": [8, 10, 30, 37], "technologi": [28, 29], "tell": 19, "templat": 28, "ten": 31, "tendenc": 34, "tensorflow": 10, "term": [10, 14, 23, 31, 33], "termin": [28, 31, 37], "test": [3, 15, 20, 22, 23, 28, 29, 30, 32], "test2": 29, "test3": 29, "test4": 29, "test5": 29, "test6": 29, "text": [7, 9, 11, 16, 22, 23, 29, 33], "textiowrapp": 29, "than": [3, 9, 10, 15, 17, 19, 20, 21, 23, 32, 33], "thei": [3, 10, 11, 14, 15, 17, 19, 21, 23, 28, 29, 31, 33, 34], "them": [2, 8, 9, 11, 12, 14, 15, 16, 17, 19, 21, 23, 26, 27, 28, 31, 32, 33, 34], "theori": 23, "therefor": [3, 15, 17, 19, 23, 26, 27, 28, 33], "thi": [0, 2, 3, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37], "thing": [3, 9, 15, 16, 19, 21, 23, 26, 27, 28], "think": [8, 10, 28, 29, 32, 33], "third": [26, 27, 32], "this_set": 19, "this_tupl": 19, "this_var": 14, "thisvar": 14, "thorugh": 9, "those": [2, 10, 16, 31, 34], "though": [14, 21, 23, 26, 27, 34], "thought": [2, 28], "thr": 23, "three": [0, 2, 3, 9, 15, 23, 24, 29, 31, 32, 34, 35], "thresh": 23, "threshold": 23, "through": [3, 8, 9, 10, 12, 14, 19, 20, 21, 23, 28, 29, 31, 32, 33], "throughout": [8, 12, 14, 17, 28, 31], "throuhg": 19, "throw": 23, "thu": 9, "tick": 9, "tick_param": 10, "tidyvers": [8, 10], "tifffil": [0, 24], "tild": 32, "time": [8, 9, 10, 17, 21, 23, 26, 27, 28, 29, 31, 32, 35], "tip": 9, "titl": 10, "tmp": [32, 34], "to_byt": 23, "to_fram": 36, "togeth": [14, 28, 29], "tok": 21, "token": 21, "tom": 17, "too": [16, 23, 26, 27, 28, 29, 34], "tool": [8, 11, 31, 33, 37], "toolbar": 9, "top": [31, 32, 34, 35], "topic": [2, 3, 35, 37], "total": [2, 33, 34], "total_bil": 9, "total_volum": [2, 3], "traceback": [0, 3, 14, 16, 17, 19, 21, 23, 24, 26, 27, 28, 34, 36], "track": [19, 21], "traffic": 29, "trail": 16, "transform": [21, 32], "translat": 10, "transpar": 8, "transpos": [0, 24, 31], "treat": [23, 29, 31], "tree": [32, 35], "tri": [26, 27], "trim": 9, "tripl": 23, "troubl": 23, "troubleshoot": 8, "true": [0, 3, 9, 10, 14, 15, 16, 17, 19, 21, 22, 23, 24, 26, 27, 28, 31, 32, 33, 34], "truncat": [23, 29], "try": [2, 3, 7, 9, 14, 19, 20, 22, 23, 28, 29, 31], "tup_metr": 21, "tupl": [2, 14, 16, 18, 19, 20, 23, 26, 27, 31, 32], "tuple0": 17, "tuple1": 17, "turn": [31, 35], "tutori": 8, "two": [2, 3, 8, 9, 10, 14, 15, 16, 17, 22, 23, 26, 27, 29, 30, 31, 32, 33, 34, 35, 37], "txt": [22, 29, 33], "type": [0, 2, 7, 8, 9, 10, 12, 15, 16, 17, 21, 23, 24, 26, 27, 28, 29, 32, 33, 34], "type1": 14, "type2": 14, "type3": 14, "typeerror": [16, 17, 23, 26, 27, 29], "typic": [10, 14, 28, 31, 32, 34], "u": [21, 23, 28, 29], "ufunc": 32, "uhoh": 23, "unboundlocalerror": 23, "uncomfort": 8, "unconnect": 29, "underscor": [2, 3, 14], "understand": [8, 10, 16, 19, 21, 23, 31, 33, 37], "undoubtedli": 8, "unend": 19, "unexpect": [9, 23], "unformat": 9, "unhandl": [26, 27], "uniniti": 31, "union": 17, "uniqu": [3, 14, 17, 28, 34], "unit": 21, "units1": 21, "units2": 21, "univers": 32, "unknown": 8, "unless": [10, 26, 27, 28], "unlik": [10, 28], "unnecessarili": 32, "unord": [17, 19], "unpack": 19, "unprepar": 8, "unspecifi": 23, "unsupport": [17, 26, 27], "unsur": 8, "until": [19, 28, 30], "up": [8, 9, 16, 26, 27], "updat": [3, 15, 17, 23, 29, 32, 33, 35], "upload": 9, "upon": 35, "upper": [16, 21, 28], "uppercas": [14, 21], "uri": [0, 24], "url": 33, "us": [0, 2, 3, 7, 8, 9, 10, 11, 12, 14, 15, 16, 18, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 33, 34, 35, 37], "usa": 19, "usabl": 23, "usag": [9, 33, 34], "user": [0, 9, 11, 24, 31, 33], "user_guid": 33, "usual": [0, 9, 10, 17, 21, 23, 24, 26, 27, 31], "utf": 29, "util": [10, 19], "uva": 8, "v": [9, 19, 21], "v2": [0, 24], "vagu": [26, 27], "val": [19, 20, 21, 22, 23], "valencia": 19, "valid": [23, 33, 34], "vals_greater_than_or_equal_to_threshold": 23, "valu": [0, 2, 3, 14, 15, 19, 20, 21, 22, 24, 28, 29, 31, 33, 34], "valuabl": 11, "value_count": [10, 34, 36], "value_express": 21, "valueerror": [0, 19, 24, 26, 27], "var": [0, 19, 21, 22, 24, 32, 33], "var_float": [3, 14], "var_int": [3, 14], "var_str": [3, 14], "vari": 32, "variabl": [15, 16, 17, 19, 20, 21, 22, 26, 27, 31, 32, 33, 34, 35, 37], "varieti": 10, "variou": [8, 10, 21, 34], "ve": [28, 30, 31], "vector": [31, 32], "verb": [3, 15], "veri": [3, 10, 15, 21, 33, 34], "verifi": [22, 23], "verify_string_length": 22, "versatil": 10, "versicolor": 34, "version": [12, 23, 34, 35], "versionad": [31, 32], "via": [11, 31], "video": [9, 10], "view": [3, 14, 32], "virginia": [3, 11, 14], "virginica": [33, 34], "visibl": [9, 23], "visit": 9, "visual": [8, 10, 11, 21, 29, 32, 33, 35], "vital": 8, "vowel": 21, "w": 29, "w3school": [3, 15], "wa": [0, 10, 17, 21, 23, 24, 26, 27, 28, 29, 31], "wahoo": 3, "wai": [2, 3, 8, 9, 10, 14, 15, 16, 17, 19, 21, 23, 26, 27, 28, 29, 31, 32, 33, 35], "wait": [26, 27], "walk": 28, "want": [9, 12, 14, 16, 17, 19, 23, 26, 27, 28, 29, 31, 32], "warn": [16, 28, 34], "wd": 21, "we": [0, 2, 3, 8, 10, 11, 12, 14, 15, 16, 17, 19, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34], "weather": 29, "web": [9, 10, 11], "week": [2, 3, 8, 17], "weight": [28, 32], "welcom": [8, 9], "well": [8, 10, 21, 23, 37], "went": [26, 27], "were": [3, 20, 21, 23, 33], "what": [0, 2, 3, 8, 12, 15, 17, 19, 20, 21, 23, 24, 26, 27, 29, 30, 32, 34], "whatev": [28, 34], "when": [0, 3, 9, 12, 14, 15, 16, 17, 19, 20, 21, 24, 26, 27, 28, 29, 30, 31, 33, 34, 35], "where": [0, 3, 8, 10, 14, 17, 21, 23, 24, 26, 27, 28, 29, 30, 32, 33], "wherea": [16, 21, 23, 33], "whether": [9, 16, 19, 23, 26, 27, 28, 31, 32], "which": [2, 8, 9, 10, 11, 12, 14, 16, 17, 19, 21, 23, 26, 27, 28, 29, 31, 32, 33, 34, 35], "while": [3, 8, 10, 14, 20, 21, 23, 28, 33], "white": [9, 14], "whitespac": 16, "who": [8, 32], "whole": [9, 16, 28, 32], "whose": [23, 29, 31, 32, 35], "why": [10, 19, 21, 23, 26, 27, 33], "wide": [10, 11, 33], "widget": 9, "wiki": 35, "wikipedia": 35, "wil": 21, "wild": 28, "wise": 32, "wish": 23, "within": [2, 8, 11, 14, 16, 20, 21, 23, 26, 27, 28, 30, 31, 32, 35], "without": [9, 12, 14, 17, 21, 28, 29], "won": 23, "wonder": 12, "woodstock": 17, "woof": 28, "word": [14, 15, 19, 21, 23, 28, 34], "work": [8, 10, 11, 16, 17, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 31, 33, 34, 37], "workflow": 9, "workhors": 35, "world": [3, 12, 31], "worri": [23, 28, 29], "would": [2, 9, 10, 14, 16, 17, 23, 26, 27, 29, 31], "wow": 28, "wp": 9, "wrangl": 29, "wrap": [0, 24], "wrapper": 32, "write": [0, 8, 9, 10, 11, 12, 14, 18, 20, 21, 22, 23, 24, 26, 28, 37], "writelin": 29, "writen": 12, "written": [10, 23, 29], "wrong": [8, 26, 27], "www": [9, 10, 17, 18, 35], "www4": 33, "x": [0, 2, 3, 9, 10, 14, 15, 17, 19, 22, 23, 24, 26, 27, 31, 32, 33, 34, 35], "x1": 23, "x_plus_i": 34, "xlabel": 10, "xx": [19, 20], "y": [2, 3, 9, 10, 14, 15, 16, 17, 23, 31, 33, 34], "y1": 23, "ye": [17, 28], "year": [26, 27, 28], "yield": [3, 12, 14, 17, 21], "ylabel": 10, "ym": 16, "york": [10, 33], "you": [2, 3, 9, 10, 11, 12, 14, 15, 16, 17, 19, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], "your": [0, 2, 3, 8, 9, 10, 14, 15, 16, 17, 18, 19, 21, 23, 24, 26, 27, 28, 29, 31, 32, 33], "yourself": [8, 26, 27, 28, 31], "z": [0, 2, 3, 9, 14, 15, 17, 19, 20, 23, 24, 33], "zero": [16, 17, 26, 27, 31, 32, 35], "zero_arrai": [31, 35], "zero_int_arrai": 35, "zerodivisionerror": [26, 27], "zeros_lik": 31, "zip": 23, "\u00f1g": 16, "\u00f1igo": 16}, "titles": ["Installing & Importing Packages", "Getting started", "Metadata", "Metadata", "<no title>", "<no title>", "<no title>", "Practice", "Introduction", "Jupyter Notebooks", "Brief introduction to programming languages", "Tech Stack", "Your first Python program!", "Python (Beginner)", "Variables and data types", "Operators and Expressions", "Strings", "Data Structures", "Data Structures Exercises", "Control Structures", "<no title>", "Iterables and Iterators", "<no title>", "Functions", "Installing & Importing Packages", "Python (Intermediate)", "Errors and Exceptions", "Errors and Exceptions", "Introduction to object-oriented programming (OOP)", "Reading and Writing Files", "<no title>", "NumPy (Part I)", "NumPy (Part II)", "Introduction to Pandas", "PandasII: Exploration and Manipulation", "PREREQUISITES", "<no title>", "Welcome to DS-1002"], "titleterms": {"1002": 37, "By": 34, "The": 31, "Will": 8, "ad": [17, 26, 27], "addit": 34, "alias": [0, 24, 31], "an": 33, "ar": [0, 8, 24], "argument": [2, 23], "arithmet": 15, "arrai": [0, 24, 31, 32, 35], "attribut": [28, 31, 33], "axi": 33, "basic": [0, 9, 24, 32, 35], "beginn": 13, "best": [26, 27], "block": [26, 27], "boolean": [32, 34], "break": 19, "brief": [10, 31], "built": [19, 23], "calcul": [32, 35], "call": 23, "can": 19, "canva": 11, "cell": 9, "check": 16, "class": 28, "code": 9, "column": 34, "command": 9, "common": 31, "comparison": [3, 15], "compil": 10, "compon": 9, "comprehens": 21, "concept": 35, "condit": 19, "construct": 17, "continu": 19, "control": 19, "convert": [3, 14], "cours": 8, "creat": [23, 31, 33], "current": 19, "d": 37, "data": [0, 3, 10, 14, 17, 18, 24, 31, 33, 34, 35], "datafram": 33, "deal": 34, "default": 23, "defin": 28, "delet": 34, "dictionari": [17, 21], "docstr": 23, "document": 9, "drop": [32, 35], "dropna": 34, "edit": 9, "editor": 9, "elif": 19, "els": [19, 26, 27], "entri": 17, "enumer": 19, "error": [26, 27], "exampl": 21, "except": [26, 27], "excersis": [26, 27, 28, 29], "exercis": [14, 15, 16, 17, 18, 19, 21, 23, 31, 32, 33], "exit": 19, "explor": 34, "express": [3, 15], "fanci": 32, "file": 29, "fillna": 34, "filter": 34, "final": [26, 27], "first": 12, "format": 16, "frame": 33, "from": 29, "function": [0, 19, 23, 24], "gener": 21, "get": 1, "global": 23, "good": 23, "guidelin": 23, "handl": [26, 27], "how": [8, 33], "i": [14, 16, 28, 31, 33], "id": 3, "ident": 15, "ii": 32, "imag": [0, 24], "immut": 16, "import": [0, 24, 31, 33], "indent": 19, "index": [2, 16, 17, 32, 34], "inherit": 28, "initi": 28, "insert": [32, 35], "inspect": 34, "instal": [0, 24, 31], "intermedi": 25, "interpret": 10, "introduct": [8, 9, 10, 19, 21, 23, 26, 27, 28, 29, 31, 33], "iter": [19, 21], "its": 34, "jupyt": 9, "jupyterlab": 11, "kernel": 9, "keyboard": 9, "keyword": 14, "know": 8, "label": [33, 34], "languag": 10, "learn": 8, "liner": 19, "list": [17, 21], "local": 23, "logic": [3, 15], "loop": [19, 21], "manipul": [34, 35], "mask": 34, "membership": 16, "menu": 9, "metadata": [2, 3], "method": [16, 17, 28, 31, 33], "miss": 34, "modal": 9, "mode": [9, 29], "modul": 31, "more": [9, 32], "mous": 9, "multipl": 19, "mutabl": 17, "name": [3, 14], "navig": 9, "ndarrai": 31, "nest": 21, "new": 17, "note": 17, "notebook": 9, "numer": [0, 3, 24], "numpi": [0, 24, 31, 32], "object": [3, 8, 28, 31, 35], "ondemand": 11, "one": 19, "oop": 28, "open": [11, 29], "oper": [3, 15, 16, 17], "orient": 28, "overview": 33, "pack": 23, "packag": [0, 24, 31], "panda": 33, "pandasii": 34, "paradigm": 10, "paramet": 23, "part": [31, 32], "pass": 23, "practic": [7, 14, 15, 16, 17, 19, 21, 23, 26, 27, 28, 29, 31, 32, 33], "prerequisit": 35, "process": 19, "program": [10, 12, 28], "properti": 33, "python": [12, 13, 25], "rais": [26, 27], "rang": [17, 21], "rank": 34, "read": 29, "remov": 34, "reserv": 14, "resourc": 9, "restart": 9, "retriev": 17, "return": 23, "run": 9, "runtim": [26, 27], "scienc": 10, "scope": 23, "select": 34, "self": 28, "seri": 33, "set": [17, 21], "slice": [16, 17, 32, 35], "some": [17, 19, 26, 27, 33], "sort": 34, "sort_index": 34, "sourc": 35, "stack": 11, "start": 1, "stop": 19, "string": [3, 16, 21], "structur": [17, 18, 19, 34], "subset": 16, "succe": 8, "summar": 34, "summari": 17, "tech": 11, "theori": 21, "thi": 8, "tip": 23, "try": [26, 27], "tupl": [17, 21], "type": [3, 14, 31, 35], "unari": [3, 15], "unpack": 23, "us": [17, 19, 32], "v": 10, "valu": [17, 23, 32, 35], "variabl": [2, 3, 14, 23], "veri": 31, "versu": 23, "welcom": 37, "what": [14, 16, 28, 31, 33], "when": 23, "while": 19, "write": [19, 29], "you": 8, "your": 12, "zip": 19}}) \ No newline at end of file +Search.setIndex({"alltitles": {"": [[9, "Practice1"], [12, "Practice2"], [14, "variables1"], [14, "variables2"], [14, "variables3"], [15, "operators1"], [15, "operators2"], [16, "strings1"], [16, "strings2"], [17, "structures1"], [17, "structures2"], [17, "structures3"], [17, "structures4"], [17, "structures5"], [17, "structures6"], [19, "conditional1"], [19, "conditional2"], [19, "conditional3"], [21, "iterables1"], [21, "iterables2"], [23, "functions1"], [23, "functions2"], [26, "exceptions1"], [26, "exceptions2"], [28, "classes1"], [28, "classes2"], [29, "files1"], [29, "files2"], [31, "numpy1"], [31, "numpy2"], [31, "numpy3"], [31, "numpy4"], [32, "numpy6"], [32, "numpy7"], [32, "numpy8"], [33, "pandas1"], [33, "pandas2"], [35, "pandas3"], [35, "pandas4"], [35, "pandas5"], [35, "pandas6"], [35, "pandas7"], [35, "pandas8"]], ".groupby()": [[37, "groupby"]], "Adding a new entry": [[17, "adding-a-new-entry"]], "Adding finally and else blocks": [[26, "adding-finally-and-else-blocks"], [27, "adding-finally-and-else-blocks"]], "Advanced Filtering and Subsetting": [[36, "advanced-filtering-and-subsetting"]], "Aggregation": [[37, "aggregation"]], "An introduction to some attributes and methods": [[33, "an-introduction-to-some-attributes-and-methods"]], "Arguments": [[2, "arguments"]], "Arguments and parameters": [[23, "arguments-and-parameters"]], "Arithmetic Operators": [[15, "arithmetic-operators"]], "Attributes and methods": [[28, "attributes-and-methods"]], "Axis Labels": [[33, "axis-labels"]], "Basic Array Manipulations + Calculations": [[38, "basic-array-manipulations-calculations"]], "Basic Indexing and Selection": [[35, "basic-indexing-and-selection"]], "Basic NumPy Array Functionality": [[0, "basic-numpy-array-functionality"], [24, "basic-numpy-array-functionality"]], "Basic calculations": [[32, "basic-calculations"]], "Basic data cleaning": [[35, "basic-data-cleaning"]], "Boolean Filtering": [[34, "boolean-filtering"], [36, "boolean-filtering"]], "Boolean slicing": [[32, "boolean-slicing"]], "Brief Introduction to Modules and Packages": [[31, "brief-introduction-to-modules-and-packages"]], "Brief introduction to programming languages": [[10, "brief-introduction-to-programming-languages"]], "Built-in functions": [[23, "built-in-functions"]], "By index: sort_index()": [[36, "by-index-sort-index"]], "By values: sort_values()": [[36, "by-values-sort-values"]], "CONCEPTS": [[38, "concepts"]], "Canvas": [[11, "canvas"]], "Cell menu": [[9, "cell-menu"]], "Classes and Objects": [[28, "classes-and-objects"]], "Command mode": [[9, "command-mode"]], "Comparison Operators": [[3, "comparison-operators"], [15, "comparison-operators"]], "Compiled vs Interpreted languages": [[10, "compiled-vs-interpreted-languages"]], "Components": [[9, "components"]], "Comprehension": [[21, "comprehension"]], "Concatenating and Merging": [[37, "concatenating-and-merging"]], "Conditions": [[19, "conditions"]], "Constructing": [[17, "constructing"]], "Constructing a dictionary": [[17, "constructing-a-dictionary"]], "Constructing a list": [[17, "constructing-a-list"]], "Control Structures": [[19, "control-structures"]], "Converting Data Types": [[3, "converting-data-types"], [14, "converting-data-types"]], "Creating and calling a function": [[23, "creating-and-calling-a-function"]], "Creating columns": [[36, "creating-columns"]], "Creating ndarrays": [[31, "creating-ndarrays"]], "Data Frames": [[33, "data-frames"]], "Data Inspection": [[34, "data-inspection"], [35, "data-inspection"]], "Data Structures": [[17, "data-structures"]], "Data Structures Exercises": [[18, "data-structures-exercises"]], "Data Types": [[3, "data-types"], [31, "data-types"], [38, "data-types"]], "Dealing with Missing Data": [[34, "dealing-with-missing-data"]], "Default Arguments": [[23, "default-arguments"]], "Defining a class": [[28, "defining-a-class"]], "Dictionaries": [[17, "dictionaries"], [21, "dictionaries"]], "Docstring": [[23, "docstring"]], "Drop missing data": [[35, "drop-missing-data"]], "Edit mode": [[9, "edit-mode"]], "Errors and Exceptions": [[26, "errors-and-exceptions"], [27, "errors-and-exceptions"]], "Examples": [[21, "examples"], [21, "id2"]], "Exploring dataframe\u2019s structure": [[35, "exploring-dataframe-s-structure"]], "Exploring its structure": [[34, "exploring-its-structure"]], "Expressions": [[3, "expressions"], [15, "expressions"]], "Fancy Indexing": [[32, "fancy-indexing"]], "File Modes": [[29, "file-modes"]], "Functions": [[23, "functions"]], "General Theory": [[21, "general-theory"]], "Getting started": [[1, "getting-started"]], "Guidelines when passing arguments:": [[23, "guidelines-when-passing-arguments"]], "Handling runtime errors with try and except": [[26, "handling-runtime-errors-with-try-and-except"], [27, "handling-runtime-errors-with-try-and-except"]], "How You Will Know You Are Learning": [[8, "how-you-will-know-you-are-learning"]], "How to create a dataframe": [[33, "how-to-create-a-dataframe"]], "How to create a series": [[33, "how-to-create-a-series"]], "How will you succeed in this course?": [[8, "how-will-you-succeed-in-this-course"]], "Identity Operators": [[15, "identity-operators"]], "Images are Numerical Data": [[0, "images-are-numerical-data"], [24, "images-are-numerical-data"]], "Immutability": [[16, "immutability"]], "Import Aliases": [[0, "import-aliases"], [24, "import-aliases"], [31, "import-aliases"]], "Importing": [[0, "importing"], [24, "importing"], [31, "importing"]], "Importing pandas": [[33, "importing-pandas"]], "Impute missing values": [[35, "impute-missing-values"]], "Indentation": [[19, "indentation"]], "Indexing": [[2, "indexing"], [16, "indexing"], [17, "indexing"]], "Indexing and Slicing": [[32, "indexing-and-slicing"]], "Inheritance": [[28, "inheritance"]], "Initialize classes": [[28, "initialize-classes"]], "Inserting + Dropping Array Values": [[32, "inserting-dropping-array-values"], [38, "inserting-dropping-array-values"]], "Installing": [[0, "installing"], [24, "installing"], [31, "installing"]], "Installing & Importing Packages": [[0, "installing-importing-packages"], [24, "installing-importing-packages"]], "Introduction": [[8, "introduction"], [9, "introduction"], [19, "introduction"], [21, "introduction"], [21, "id1"], [23, "introduction"], [26, "introduction"], [27, "introduction"], [28, "introduction"], [29, "introduction"]], "Introduction to Pandas": [[33, "introduction-to-pandas"]], "Introduction to object-oriented programming (OOP)": [[28, "introduction-to-object-oriented-programming-oop"]], "Iterables": [[21, "iterables"]], "Iterables and Iterators": [[21, "iterables-and-iterators"]], "Iterators": [[21, "iterators"]], "Jupyter Notebooks": [[9, "jupyter-notebooks"]], "JupyterLab": [[11, "jupyterlab"]], "Keyboard Navigation": [[9, "keyboard-navigation"]], "Learning Objectives": [[8, "learning-objectives"]], "Lists": [[17, "lists"], [21, "lists"]], "Local versus Global Variables": [[23, "local-versus-global-variables"]], "Logical Operators": [[3, "logical-operators"], [15, "logical-operators"]], "Loops": [[19, "loops"]], "Masking": [[34, "masking"], [36, "masking"]], "Metadata": [[2, "metadata"], [3, "metadata"]], "Modal editor": [[9, "modal-editor"]], "More Resources": [[9, "more-resources"]], "More useful calculations": [[32, "more-useful-calculations"]], "Mouse navigation": [[9, "mouse-navigation"]], "Mutability": [[17, "mutability"]], "Nested Loops": [[21, "nested-loops"]], "Notebook Basics": [[9, "notebook-basics"]], "Notebook documents": [[9, "notebook-documents"]], "Notes": [[17, "notes"]], "NumPy (Part I)": [[31, "numpy-part-i"]], "NumPy (Part II)": [[32, "numpy-part-ii"]], "Numeric Operators": [[3, "numeric-operators"]], "OBJECTIVES": [[38, "objectives"]], "Object ID": [[3, "object-id"]], "Open OnDemand": [[11, "open-ondemand"]], "Opening a File": [[29, "opening-a-file"]], "Operations on lists": [[17, "operations-on-lists"]], "Operator in: check membership": [[16, "operator-in-check-membership"]], "Operators": [[3, "operators"], [15, "operators"]], "Operators and Expressions": [[15, "operators-and-expressions"]], "PREREQUISITES": [[38, "prerequisites"]], "Packing": [[23, "packing"]], "Packing and Unpacking arguments": [[23, "packing-and-unpacking-arguments"]], "Pandas: Data Exploration": [[35, "pandas-data-exploration"]], "PandasII: Exploration": [[34, "pandasii-exploration"]], "PandasIII: Data Manipulation": [[36, "pandasiii-data-manipulation"]], "Practice": [[7, "practice"]], "Practice excersises": [[26, "practice-excersises"], [27, "practice-excersises"], [28, "practice-excersises"], [29, "practice-excersises"]], "Practice exercise": [[17, "practice-exercise"], [17, "id1"], [17, "id2"], [17, "id3"], [17, "id4"], [17, "id6"]], "Practice exercises": [[14, "practice-exercises"], [15, "practice-exercises"], [16, "practice-exercises"], [19, "practice-exercises"], [21, "practice-exercises"], [23, "practice-exercises"], [31, "practice-exercises"], [32, "practice-exercises"], [33, "practice-exercises"], [35, "practice-exercises"], [36, "practice-exercises"]], "Programming and Data Science": [[10, "programming-and-data-science"]], "Programming paradigms": [[10, "programming-paradigms"]], "Properties overview": [[33, "properties-overview"]], "Python (Beginner)": [[13, "python-beginner"]], "Python (Intermediate)": [[25, "python-intermediate"]], "Quick access to columns by name": [[35, "quick-access-to-columns-by-name"]], "Raising exceptions": [[26, "raising-exceptions"], [27, "raising-exceptions"]], "Ranges": [[17, "ranges"], [21, "ranges"]], "Reading and Writing Files": [[29, "reading-and-writing-files"]], "Reading from a File": [[29, "reading-from-a-file"]], "Removing": [[34, "removing"]], "Removing columns": [[36, "removing-columns"]], "Replace missing values": [[34, "replace-missing-values"]], "Reserved names (keywords)": [[14, "reserved-names-keywords"]], "Restarting the kernels": [[9, "restarting-the-kernels"]], "Retrieve a value": [[17, "retrieve-a-value"]], "Returning Values": [[23, "returning-values"]], "Running Code (edit mode)": [[9, "running-code-edit-mode"]], "SOURCES": [[38, "sources"]], "Selecting Data by Label: loc[]": [[35, "selecting-data-by-label-loc"]], "Selecting Data by Position: iloc[]": [[35, "selecting-data-by-position-iloc"]], "Selection": [[34, "selection"]], "Series": [[33, "series"]], "Sets": [[17, "sets"], [21, "sets"]], "Slicing": [[16, "slicing"], [17, "slicing"], [38, "slicing"]], "Some best practices": [[26, "some-best-practices"], [27, "some-best-practices"]], "Some methods": [[17, "some-methods"], [17, "id5"]], "Some useful built-in functions with loops": [[19, "some-useful-built-in-functions-with-loops"]], "Some useful methods": [[17, "some-useful-methods"]], "Sorting": [[34, "sorting"]], "Sorting Data": [[36, "sorting-data"]], "String Formatting": [[16, "string-formatting"]], "String Methods": [[16, "string-methods"]], "String Operators": [[3, "string-operators"], [16, "string-operators"]], "Strings": [[16, "strings"], [21, "strings"]], "Subsetting a string": [[16, "subsetting-a-string"]], "Summarizing data": [[34, "summarizing-data"], [35, "summarizing-data"]], "Summary": [[17, "summary"], [37, "summary"]], "Tech Stack": [[11, "tech-stack"]], "The ndarray object": [[31, "the-ndarray-object"]], "Tips for creating good functions": [[23, "tips-for-creating-good-functions"]], "Transforming your data": [[36, "transforming-your-data"]], "Tuples": [[17, "tuples"], [21, "tuples"]], "Unary Operators": [[3, "unary-operators"], [15, "unary-operators"]], "Unpacking": [[23, "unpacking"]], "Using assign": [[36, "using-assign"]], "Using brackets []": [[36, "using-brackets"]], "Using multiple conditions": [[19, "using-multiple-conditions"]], "Variable Names": [[3, "variable-names"]], "Variable Scope": [[23, "variable-scope"]], "Variable naming": [[14, "variable-naming"]], "Variable types": [[14, "variable-types"]], "Variables": [[2, "variables"], [3, "variables"]], "Variables and data types": [[14, "variables-and-data-types"]], "Very common attributes and methods with numpy arrays objects": [[31, "very-common-attributes-and-methods-with-numpy-arrays-objects"]], "Visualizing data": [[35, "visualizing-data"]], "Welcome to DS-1002": [[40, "welcome-to-ds-1002"]], "What is NumPy": [[31, "what-is-numpy"]], "What is Pandas?": [[33, "what-is-pandas"]], "What is a string?": [[16, "what-is-a-string"]], "What is a variable?": [[14, "what-is-a-variable"]], "What is self?": [[28, "what-is-self"]], "Working with columns": [[34, "working-with-columns"]], "Working with the dataframe as a whole": [[34, "working-with-the-dataframe-as-a-whole"]], "Writing to a File": [[29, "writing-to-a-file"]], "Your first Python program!": [[12, "your-first-python-program"]], "break - exit the loop": [[19, "break-exit-the-loop"]], "continue - stop the current iteration": [[19, "continue-stop-the-current-iteration"]], "dropping missing data": [[34, "dropping-missing-data"]], "enumerate()": [[19, "enumerate"]], "for loop": [[19, "for-loop"]], "if and else can be used for conditional processing.": [[19, "if-and-else-can-be-used-for-conditional-processing"]], "iloc[]: Selection by index": [[34, "iloc-selection-by-index"]], "loc[]: Selection by label": [[34, "loc-selection-by-label"]], "merge()": [[37, "merge"]], "pd.concat()": [[37, "pd-concat"]], "pd.pivot_table()": [[37, "pd-pivot-table"]], "using if, elif": [[19, "using-if-elif"]], "using if, elif, else": [[19, "using-if-elif-else"]], "while-loop": [[19, "while-loop"]], "writing if and else as one-liners": [[19, "writing-if-and-else-as-one-liners"]], "zip()": [[19, "zip"]]}, "docnames": ["06_numpy_intro", "chapters/01-getting_started", "chapters/02-python-basics", "chapters/04-python-basics", "chapters/module-1/012-intro_python", "chapters/module-1/012-intro_python (copia)", "chapters/module-1/013-intro_R", "chapters/module-1/Practice", "chapters/module-1/about_course", "chapters/module-1/jupyter_notebooks", "chapters/module-1/programming", "chapters/module-1/tech_stack", "chapters/module-1/your_first_program", "chapters/module-2/02-cover", "chapters/module-2/021-variables", "chapters/module-2/022-operators", "chapters/module-2/023-strings", "chapters/module-2/024-structures", "chapters/module-2/0241-structures_exercises", "chapters/module-2/025-conditional", "chapters/module-2/0251-conditional_exercises", "chapters/module-2/026-iterables_and_iterators", "chapters/module-2/0261-functions_exercises", "chapters/module-2/027-functions", "chapters/module-3/029-packages", "chapters/module-3/03-cover", "chapters/module-3/031-errors_and_exceptions", "chapters/module-3/031-errors_and_exceptions_w_sols", "chapters/module-3/032-classes", "chapters/module-3/033-reading_writing_files", "chapters/module-3/lab-recursion", "chapters/module-4/041-numpyI", "chapters/module-4/042-numpyII", "chapters/module-4/043-PandasI-Introduction", "chapters/module-4/044-PandasII-Exploration_and_Manipulation-Copy1", "chapters/module-4/044-PandasII-exploration", "chapters/module-4/045-PandasIII-manipulation", "chapters/module-4/046-PandasIII-Merging_Concatenating_Aggregating", "chapters/module-4/07-numpy-continued", "chapters/module-4/Untitled", "index"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1}, "filenames": ["06_numpy_intro.ipynb", "chapters/01-getting_started.md", "chapters/02-python-basics.ipynb", "chapters/04-python-basics.ipynb", "chapters/module-1/012-intro_python.md", "chapters/module-1/012-intro_python (copia).md", "chapters/module-1/013-intro_R.md", "chapters/module-1/Practice.ipynb", "chapters/module-1/about_course.md", "chapters/module-1/jupyter_notebooks.ipynb", "chapters/module-1/programming.ipynb", "chapters/module-1/tech_stack.md", "chapters/module-1/your_first_program.ipynb", "chapters/module-2/02-cover.md", "chapters/module-2/021-variables.ipynb", "chapters/module-2/022-operators.ipynb", "chapters/module-2/023-strings.ipynb", "chapters/module-2/024-structures.ipynb", "chapters/module-2/0241-structures_exercises.ipynb", "chapters/module-2/025-conditional.ipynb", "chapters/module-2/0251-conditional_exercises.ipynb", "chapters/module-2/026-iterables_and_iterators.ipynb", "chapters/module-2/0261-functions_exercises.ipynb", "chapters/module-2/027-functions.ipynb", "chapters/module-3/029-packages.ipynb", "chapters/module-3/03-cover.md", "chapters/module-3/031-errors_and_exceptions.ipynb", "chapters/module-3/031-errors_and_exceptions_w_sols.ipynb", "chapters/module-3/032-classes.ipynb", "chapters/module-3/033-reading_writing_files.ipynb", "chapters/module-3/lab-recursion.ipynb", "chapters/module-4/041-numpyI.ipynb", "chapters/module-4/042-numpyII.ipynb", "chapters/module-4/043-PandasI-Introduction.ipynb", "chapters/module-4/044-PandasII-Exploration_and_Manipulation-Copy1.ipynb", "chapters/module-4/044-PandasII-exploration.ipynb", "chapters/module-4/045-PandasIII-manipulation.ipynb", "chapters/module-4/046-PandasIII-Merging_Concatenating_Aggregating.ipynb", "chapters/module-4/07-numpy-continued.ipynb", "chapters/module-4/Untitled.ipynb", "index.md"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"": [0, 3, 9, 10, 11, 12, 14, 15, 17, 19, 21, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 36, 38], "0": [0, 2, 3, 10, 12, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 26, 27, 30, 31, 32, 33, 34, 35, 36, 37, 38], "00": [32, 33, 34, 35, 36], "000": 36, "00000": [34, 35], "000000": [34, 35], "006": 37, "00855369e": 32, "00950034": 31, "01": 32, "017400": 37, "02": [32, 34, 36], "026": 37, "02729858": 31, "03": 32, "033": 36, "03428793e": 32, "03742102": 32, "04": [34, 36], "05": 36, "05459716": 31, "057": 36, "057333": [34, 35], "058524": 37, "07586576": 32, "07873659": 31, "08": [34, 36], "08108815": 32, "09383095": 31, "09663316e": 32, "0b100101": 23, "0b1101": 23, "0th": 35, "0x1": 14, "0x7f995b817510": 33, "0x7f995b833ed0": 33, "0x7fc546428810": 32, "0x7fc546428d50": 32, "0x87f408": 23, "0x87f448": 23, "0x87f508": 14, "0x87f648": 14, "1": [0, 2, 3, 9, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], "10": [0, 3, 9, 12, 14, 15, 17, 19, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38], "100": [0, 3, 10, 12, 14, 15, 20, 24, 32], "1000": 35, "100000": [34, 35], "1002": [0, 2, 3, 12, 23, 24], "101": 33, "102": 33, "103": 33, "104": 33, "105": [34, 36], "106": 33, "1065": 35, "1066": 35, "1067": 35, "1068": 35, "1069": 35, "1070": 35, "11": [0, 2, 16, 17, 23, 24, 28, 31, 32, 33, 35, 38], "110": 32, "111": [0, 24, 36], "112": [0, 24, 33], "11208023": 31, "113": [0, 24, 33], "115": [0, 24], "115mmhg": 21, "117": [0, 24, 34, 36], "11757": [34, 35], "117570": [34, 35], "118": [0, 24, 34, 36], "11959951": 32, "1198364157": 32, "12": [14, 21, 28, 30, 32, 34, 36, 38], "120": [21, 32], "1208023": 31, "122": [34, 36], "123": 17, "1234": [23, 29], "12345": 32, "123456789": 29, "123941": 37, "1244706300354": 15, "12457198": 32, "125": [3, 14, 31, 33], "1253": 35, "1254": 35, "1256": 35, "1271": 33, "127125": 37, "1288": 35, "1289": 35, "1290": 35, "1291": 35, "1292": 35, "1299": 33, "13": [0, 2, 20, 23, 24, 30, 32, 34, 36], "130": [3, 14, 32], "130858": 37, "131": [33, 34, 36], "131013": 37, "132": 33, "1321": 35, "1323": 35, "1324": 35, "1326": 35, "1327": 35, "133": 33, "135": [33, 34, 36], "139919121178064": 14, "139919121182192": 14, "14": [0, 3, 14, 24, 32, 34, 36], "140": [32, 34, 35], "141": [33, 34, 35], "14112001": [32, 38], "1416": 14, "142": [34, 35], "143": [34, 35], "144": [34, 35], "145": [33, 34, 35, 36, 37], "14550003": 32, "146": [33, 34, 35, 36, 37], "147": [33, 34, 35, 36, 37], "148": [33, 34, 35, 36, 37], "149": [33, 34, 35, 36, 37], "15": [0, 10, 15, 21, 23, 24, 32, 33, 34, 36, 37, 38], "150": [32, 33, 34, 35, 36, 37], "151": 33, "156": 33, "157": 33, "16": [0, 19, 23, 24, 30, 32, 34], "160": 32, "162": 33, "166": 33, "17": [34, 36], "170": 32, "173": 36, "174": 36, "176": 16, "178": 33, "18": [2, 34, 36], "180": 32, "183": 33, "185": 33, "19": [3, 32, 33, 34, 36], "190": 32, "192": 33, "1923875335537315": 31, "1934569051": [34, 35], "198": 33, "199333": [34, 35], "1d": [31, 32], "2": [0, 2, 3, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], "20": [0, 3, 10, 14, 15, 21, 23, 24, 31, 32, 33, 34, 36, 38], "200": [0, 24, 32, 33], "201": 33, "20167438": 32, "2018": 9, "2019": [0, 24], "202": [0, 24], "2021": [0, 24], "2022": 10, "203": [0, 24, 36], "205": [0, 24], "206": 33, "21": [0, 23, 24, 30, 33, 34, 36], "21027777": 32, "214": 36, "217": 29, "2179810851": [34, 36], "22": [0, 3, 15, 24, 33, 34, 36, 37], "220": 33, "223": 17, "22416046": 31, "225": 33, "23": [17, 33, 34, 36], "235": 36, "23606798": 32, "24": [0, 24, 30, 33], "24326299558377007": 31, "2433657": 32, "244": [0, 24], "245466": 37, "246": [0, 24, 37], "247": [0, 24], "249": [0, 24], "25": [3, 10, 12, 14, 15, 17, 26, 27, 31, 32, 33, 34, 35, 36, 38], "250": [0, 24, 33], "255": [0, 24], "256": [3, 14, 23], "257931": 37, "259": 36, "26": [0, 16, 17, 23, 24, 34, 36], "260": 37, "261011": 37, "27": [17, 28, 33], "27298579": 31, "278": [3, 14], "2794155": [32, 38], "28": [17, 33, 34], "28328225": 31, "28366219": 32, "29": [17, 34, 36], "2905": 33, "29128784747792": 32, "29293041": 32, "29718677": 31, "2d": [31, 32, 38], "3": [0, 2, 3, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], "30": [0, 17, 22, 23, 24, 32, 33, 34, 35, 36, 38], "300": [0, 15, 24], "300000": [34, 35], "30677793": 31, "309773": 37, "31": [0, 24], "32": [0, 24, 30, 33], "326": 37, "33": [0, 19, 24], "332": 17, "333333": [34, 35], "33339607": 32, "34": [0, 24, 26, 27], "35": [0, 3, 14, 15, 24, 32, 34, 35, 36, 38], "350000": [34, 35], "350267": 37, "35331011": 32, "356": [0, 24], "357": [0, 24], "359": [0, 24], "36": [0, 24, 33], "360": [0, 24], "362": [0, 24], "36320733": 32, "366126": [34, 35], "37": [16, 23], "375": 31, "38": [33, 34, 36], "38672696": 31, "38709055": 32, "38905610e": 32, "39141742": 32, "39906275": 32, "39924804": 31, "3a": 20, "3b": 20, "3d": [31, 32, 38], "3foo": 14, "4": [0, 3, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], "40": [0, 17, 21, 24, 32, 33, 36, 38], "400": [0, 24], "400000": [34, 35], "40320": 30, "404": [0, 24], "405": [0, 24], "406": [0, 24], "407": [0, 24], "408": [0, 24], "409": [0, 24], "409508": 37, "41": [33, 34, 35, 36], "410": [0, 24], "41211849": 38, "41421356": 32, "41614684": 32, "41720455": 31, "42": [3, 31, 32, 33, 34, 36, 38], "42051231": 32, "428": 37, "42844": [34, 35], "428440": [34, 35], "43": [3, 33], "435866": [34, 35], "437": 33, "438": 33, "43879725": 32, "439": 33, "440": 33, "441": 33, "44108587": 31, "44189276": 32, "442": 33, "4427": 33, "448256": 37, "44948974": 32, "45": [23, 32, 36, 37, 38, 39], "456": 17, "45981500e": 32, "46135559": 31, "462": 37, "4685006": 31, "46915473": 31, "47": [33, 34, 36], "47968728": 32, "48": 33, "48344091": 31, "48413159e": 32, "49401501": 31, "49876311": 31, "4d": 31, "5": [0, 2, 3, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40], "50": [0, 17, 24, 32, 33, 34, 35, 36, 38], "500": [0, 24, 32, 38], "500000": [34, 35], "51": [0, 24], "512": 32, "52": [0, 24, 33], "53": [0, 17, 23, 24], "54030231": 32, "541": 36, "541922": 37, "54402111": 38, "54691547": 31, "54999924": 32, "55": [0, 17, 24, 32, 36], "55000000074505806": 32, "552": 37, "56": [0, 24, 34, 36], "56656449": 31, "57": 33, "576": 36, "57904328": 31, "588": 37, "588745": 37, "59": [17, 33], "5951": 33, "5dl": 21, "5mg": 21, "6": [0, 3, 9, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40], "60": [23, 32, 33, 36], "600000": [34, 35], "618569": 37, "625": 31, "63226768": 31, "64": [22, 23, 26, 27, 30, 32, 34], "64575131": 32, "64752804": 32, "65": [32, 36], "6516": 35, "6517": 35, "6518": 35, "65364362": 32, "6555": 35, "6556": 35, "6557": 35, "6559": 35, "6561": 35, "6562": 35, "6569866": [32, 38], "66158729": 32, "6618": 35, "6619": 35, "661988": 37, "6620": 35, "6621": 35, "6622": 35, "6623": 35, "6625": 35, "6672": 35, "6674": 35, "6675": 35, "6676": 35, "6678": 35, "6679": 35, "669": 36, "67": [14, 33], "6728": 33, "6765": 35, "6766": 35, "6767": 35, "6768": 35, "6769": 35, "68": 32, "68456316": 31, "688": 36, "6888893": 31, "69": 33, "693795": 37, "7": [0, 17, 19, 21, 23, 24, 26, 27, 31, 32, 33, 34, 35, 36, 37, 38], "70": [32, 33, 34, 36], "7099215": 32, "71": 33, "71828183e": 32, "71985611": 31, "72": [32, 33, 34], "73067779": 31, "73205081": 32, "739": 36, "74": 32, "74172046": 31, "74408967": 31, "74936841": 31, "75": [15, 31, 32, 33, 34, 35, 36], "75328513": 32, "75390225": 32, "756023": 37, "7568025": [32, 38], "758000": [34, 35, 36], "76": [34, 36], "762238": [34, 35], "765298": [34, 35], "76942668": 31, "77": 33, "770": 37, "777": 17, "78": 32, "78096262": 31, "79": 33, "8": [0, 2, 3, 14, 15, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39], "80": [17, 32, 33, 34, 35, 36], "800": 28, "8000": 31, "800000": [34, 35], "81": [32, 36], "817941": [34, 35], "817985": 37, "81814867": 31, "82": 32, "828066": [34, 35], "82842712": 32, "83282247": 31, "84": 33, "84057254": 31, "84147098": [32, 38], "843333": [34, 35, 36], "85": [32, 33, 34, 36], "85886751": 32, "8598": 33, "86": 32, "86425065": 32, "865679": 37, "87": [32, 33, 36], "871754": [34, 35], "8722813232690143": 38, "873551": 37, "875": [31, 36], "88": 32, "886349": 37, "89": [32, 33], "8903": 33, "89086505": 31, "8909800": 14, "8918": 33, "897": 17, "9": [0, 2, 3, 14, 15, 17, 20, 22, 24, 26, 27, 31, 32, 33, 34, 35, 36, 37, 38], "90": [32, 34, 36], "900000": [34, 35], "90929743": [32, 38], "91": [23, 32], "912": 23, "91276971": 32, "92": [32, 33], "921": 35, "922": 35, "923815": 37, "924": 35, "925": 35, "926": 35, "927": 35, "92955365": 32, "93": 33, "936": 37, "94": 32, "94355901": 31, "94769757": 32, "94781372": 31, "95": [32, 33, 36], "95892427": [32, 38], "96017029": 32, "962865": [34, 35], "97": 33, "974": 37, "98": 36, "98095799e": 32, "9836": 33, "98935825": [32, 38], "9899925": 32, "99": [17, 33], "99394529": 31, "A": [2, 3, 9, 10, 12, 14, 15, 17, 23, 28, 29, 30, 31, 32, 33, 38], "And": [12, 16, 23, 28, 32, 34, 36], "As": [8, 9, 10, 12, 16, 21, 26, 27, 28, 33, 34, 35, 40], "At": 23, "Be": [19, 21, 23, 29, 35], "But": [8, 14, 17, 19, 23, 26, 27, 28, 31, 32, 34, 35, 36], "By": [8, 14, 31, 32, 33, 34, 35], "FOR": 19, "For": [2, 3, 8, 9, 10, 12, 14, 15, 17, 18, 19, 21, 23, 28, 29, 30, 31, 32, 33, 34, 35], "IF": 19, "IN": 28, "If": [3, 8, 9, 10, 15, 16, 17, 19, 20, 23, 26, 27, 28, 29, 30, 31, 32, 35, 38], "In": [0, 2, 3, 9, 10, 11, 12, 14, 15, 16, 17, 19, 21, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38, 39], "It": [0, 2, 10, 16, 17, 19, 23, 24, 26, 27, 28, 31, 32, 33, 34, 35, 36], "Its": 33, "NO": 28, "NOT": 17, "No": [0, 3, 17, 24], "On": 10, "One": [16, 23, 38], "Or": [2, 14, 23, 26, 27, 31], "THE": 8, "That": [17, 28, 32], "The": [3, 8, 9, 10, 11, 12, 14, 15, 16, 17, 19, 21, 23, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 38, 40], "Their": [11, 21], "Then": [7, 9, 12, 15, 23, 28, 29, 35, 37], "There": [3, 9, 14, 15, 21, 26, 27, 29, 32, 38], "These": [8, 9, 10, 14, 17, 23, 26, 27, 28, 29, 31, 33], "To": [0, 10, 16, 17, 19, 21, 23, 24, 28, 29, 31, 32, 33, 35], "Will": 32, "With": [9, 23, 28, 30, 32, 35], "_": [2, 3, 14, 23, 33], "__abs__": 23, "__add__": 23, "__and__": 23, "__array_function__": 31, "__bool__": 23, "__ceil__": 23, "__divmod__": 23, "__doc__": [23, 33], "__eq__": 23, "__float__": 23, "__floor__": 23, "__floordiv__": 23, "__format__": 23, "__ge__": 23, "__getattribute__": 23, "__getitem__": 35, "__getnewargs__": 23, "__gt__": 23, "__hash__": 23, "__index__": 23, "__init__": [0, 24, 28, 31], "__int__": 23, "__invert__": 23, "__le__": 23, "__lshift__": 23, "__lt__": 23, "__main__": [23, 28], "__mod__": 23, "__mul__": 23, "__ne__": 23, "__neg__": 23, "__new__": 23, "__or__": 23, "__pos__": 23, "__pow__": 23, "__radd__": 23, "__rand__": 23, "__rdivmod__": 23, "__repr__": 23, "__rfloordiv__": 23, "__rlshift__": 23, "__rmod__": 23, "__rmul__": 23, "__ror__": 23, "__round__": 23, "__rpow__": 23, "__rrshift__": 23, "__rshift__": 23, "__rsub__": 23, "__rtruediv__": 23, "__rxor__": 23, "__setattr__": 28, "__sizeof__": 23, "__sub__": 23, "__truediv__": 23, "__trunc__": 23, "__xor__": 23, "_deprecated_arg": 35, "_foo": 14, "_get_axi": 35, "_get_slice_axi": 35, "_get_valu": 35, "_getbool_axi": 35, "_getitem_axi": 35, "_getitem_lowerdim": 35, "_getitem_tupl": 35, "_getitem_tuple_same_dim": 35, "_invalid_index": 35, "_io": [0, 24, 29], "_is_scalar_access": 35, "_locationindex": 35, "_locindex": 35, "_maybe_cast_slice_bound": 35, "_multi_tak": 35, "_multi_take_opportun": 35, "_parse_uri": [0, 24], "_plugin": [0, 24], "_slice": 35, "_takeabl": 35, "_validate_kei": 35, "a_arrai": [0, 24], "a_list": [0, 24], "ab": 23, "abil": [28, 34, 35], "abl": 9, "about": [8, 10, 23, 28, 29, 33, 34, 35, 36], "abov": [0, 7, 8, 9, 17, 18, 19, 20, 22, 23, 24, 31, 32, 35, 36], "absenc": 17, "absolut": 23, "absolute_valu": 23, "academi": 29, "academia": 10, "accept": [3, 15, 23, 29, 33], "access": [10, 11, 14, 16, 17, 19, 23, 28, 32, 33, 34, 36], "access_mod": 29, "accomod": 10, "accomplish": 31, "accord": 35, "accumul": 32, "accur": [23, 32], "achiev": [17, 30], "acquisit": 35, "across": 31, "act": [15, 28, 33], "action": [9, 19, 23, 26, 27, 28], "activ": [8, 9, 29], "actual": [19, 23, 28], "acycl": [32, 38], "ad": [0, 24, 28, 29], "adapt": 12, "add": [2, 15, 17, 18, 19, 20, 23, 28, 29, 33, 35], "add_u": 2, "addit": [3, 8, 12, 14, 15, 23, 26, 27, 28, 31, 32, 33], "addition": [8, 31, 40], "address": [10, 14, 17, 23], "adjac": 29, "administr": 10, "adopt": 29, "advanc": [19, 23, 28, 40], "advantag": [28, 31], "aei": 16, "affect": [23, 28, 32, 35], "aforement": [9, 14], "after": [15, 16, 17, 19, 23, 26, 27, 28, 29, 31], "afton": 11, "ag": [2, 3, 26, 27, 28, 33, 35], "again": [3, 23, 28, 29, 30, 32, 34, 35], "against": [23, 32], "age_data": [26, 27], "aggfunc": 37, "agre": 29, "aim": [14, 23, 28], "aka": 12, "alert": 17, "algebra": 31, "algorithm": [3, 28, 31], "alia": [0, 24, 31, 33], "alic": 33, "all": [0, 2, 8, 9, 10, 14, 16, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 35, 37, 38], "allevi": 32, "allow": [0, 2, 3, 9, 10, 11, 14, 15, 16, 17, 21, 23, 24, 28, 29, 31, 33, 34, 35, 36], "almost": [10, 19, 21], "along": [8, 10, 14, 32, 33, 34, 36, 37], "alpha": [2, 3], "alreadi": [0, 16, 23, 24, 28, 29], "also": [0, 3, 8, 9, 10, 12, 14, 15, 16, 17, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 40], "alt": 9, "altern": [10, 21, 29, 31, 32, 33], "although": [10, 16, 31], "alwai": [3, 14, 17, 23, 26, 27, 31, 32], "am": [12, 16, 21, 28], "ambigu": 31, "among": [0, 10, 24, 31], "amount": 29, "an": [0, 2, 3, 8, 9, 10, 12, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 40], "anaconda3": [0, 24, 35], "analys": 10, "analysi": [10, 11, 29, 31, 33, 35, 36], "analyt": [29, 33], "angel": 33, "ani": [0, 2, 7, 9, 10, 14, 16, 17, 19, 21, 23, 24, 27, 28, 29, 31, 32, 33, 34, 35, 36], "anim": 28, "animal_firulai": 28, "animal_kenni": 28, "annoi": 19, "annot": 23, "anonym": [8, 36], "anoth": [9, 10, 14, 17, 19, 21, 23, 28, 30, 31, 33, 35, 38], "another_anim": 28, "answer": [8, 14, 15, 16, 17, 18, 19, 21, 23, 26, 28, 29, 31, 32, 33, 35], "anybodi": 19, "anyon": 23, "anyth": [8, 12, 23, 28], "anywher": [3, 15], "apostroph": 16, "appar": 35, "appeal": [10, 35], "appear": [10, 17, 31], "append": [17, 21, 23, 28, 29], "appl": [2, 3, 19], "apple_index": 2, "appli": [0, 9, 10, 16, 17, 18, 24, 31, 32, 34, 35, 36, 37], "applic": [9, 10, 11, 33], "approach": [8, 10, 21, 23, 33, 35], "appropri": [3, 15, 26, 27], "approxim": 31, "ar": [2, 3, 9, 10, 11, 12, 14, 15, 16, 17, 19, 21, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38], "arang": [31, 32, 38], "arbitrari": [19, 21, 23], "area": [9, 10, 29], "areascomput": 29, "arg": [0, 22, 23, 24], "arg1": 12, "arg2": 12, "arg3": 12, "arg_expansion_exampl": 23, "argu": 10, "argument": [12, 14, 17, 22, 27, 29, 31, 32, 34, 35, 36, 38], "argunemnt": 17, "aris": [28, 36], "arithmet": [3, 12, 17, 26, 27, 32], "around": [8, 17, 35], "arr": [9, 32, 38], "arr1": [31, 38], "arr1d": 32, "arr2": [31, 32, 38], "arr2_concat": 32, "arr2_copi": 32, "arr2_flip": 32, "arr2_float64": 31, "arr2_int32": 31, "arr2_max": 32, "arr2_mean": 32, "arr2_min": 32, "arr2_std": 32, "arr2d": [32, 38], "arr3": 31, "arr3d": [32, 38], "arr_exec1": 32, "arr_exec2": 32, "arr_slic": 32, "arrai": [14, 23, 33, 34, 36], "array_lik": [31, 32], "arthimet": 20, "arthur": 17, "artifici": 10, "as_grai": [0, 24], "as_integer_ratio": 23, "asarrai": [0, 24], "ascend": [34, 36], "asher": 37, "ask": [8, 12, 34, 35], "ask_and_decid": 28, "aspect": 16, "assert": [3, 14, 35], "assign": [3, 14, 15, 16, 17, 23, 28, 31, 32, 33, 34], "assist": 8, "associ": [3, 8, 15, 17, 23, 32], "assum": [3, 31, 32, 37], "assumpt": 8, "asterisk": 23, "astyp": [31, 38], "async": [3, 14], "athlet": 23, "attempt": [8, 26, 27, 32, 36], "attent": [26, 27], "attribut": [16, 17, 18, 23, 34, 35, 36], "attributeerror": 28, "auc": 21, "author": [9, 34], "autom": 10, "automat": [3, 14, 29, 33], "avail": [9, 23, 28, 31], "averag": 32, "avg_length": 36, "avg_width": 36, "avoid": [10, 26, 27, 32], "await": [3, 14], "awar": 23, "ax": [0, 24, 31, 32, 33, 35], "axi": [10, 32, 34, 35, 36, 37, 38], "ayotnom": 16, "b": [3, 9, 14, 15, 17, 23, 26, 27, 28, 30, 31, 32, 33], "b_arrai": [0, 24], "b_list": [0, 24], "back": [9, 12, 16, 17, 31, 32, 38], "background": [8, 9], "bad": [26, 27], "banana": [2, 3, 19], "bar": [17, 23, 35, 37, 39], "bari": 19, "bark": 28, "barplot": [10, 35], "base": [0, 8, 10, 16, 21, 24, 30, 34, 35, 36, 37, 40], "bash": [10, 19], "basic": [2, 3, 8, 15, 28, 31, 36, 40], "baz": 17, "beat": 10, "becaus": [10, 21, 23, 26, 27, 28, 31, 32], "becom": [8, 9, 19, 28], "been": [9, 10, 17, 19, 26, 27, 28, 31], "befor": [7, 8, 9, 10, 17, 23, 26, 27, 28, 29, 31, 32, 33, 38], "beforehand": 12, "begin": [16, 23, 29], "beginn": 10, "behav": [21, 28, 29, 33], "behavior": [19, 23, 28, 30, 32, 33], "being": [3, 10, 19, 20, 32], "believ": [10, 19], "bell": 21, "belong": [19, 23, 28, 29, 32], "below": [2, 7, 9, 17, 20, 23, 28, 31, 32, 33, 34, 35], "berlin": 29, "best": [8, 23, 28, 29], "best_citi": 29, "better": [8, 10, 26, 27, 31], "betwe": 3, "between": [3, 12, 14, 16, 23, 31, 33, 34, 35], "bewar": [9, 19], "beyond": [10, 29, 33], "big": [17, 23], "bilbao": [19, 29], "bill": [9, 37], "billi": 17, "bin": 23, "binari": [10, 23], "birth": [17, 18], "bit": [23, 28, 29, 32, 38], "bit_count": 23, "bit_length": 23, "black": 28, "blank": [16, 35], "blink": 9, "block": [3, 15, 19, 23, 28, 29], "blog": 9, "blue": [9, 28], "blueberri": 19, "bmi": 33, "bob": [17, 32, 33], "bodi": 23, "boo": 33, "book": [0, 24, 40], "bool": [3, 14, 22, 23, 32, 33, 34, 36], "bool_": 31, "bool_var": [3, 14], "boolean": [2, 3, 14, 15, 16, 17, 18, 23], "boolean_arrai": 31, "border": 9, "borderand": 9, "both": [0, 8, 9, 10, 11, 14, 16, 17, 19, 20, 23, 24, 26, 27, 28, 29, 30, 32, 33], "bottom": [32, 38], "boundari": 35, "bow": 28, "box": [28, 35], "boxplot": [9, 35], "bp": 33, "brace": [16, 17, 19], "bracket": [16, 17, 34, 35], "break": [3, 14, 16, 17, 23, 28, 29, 33, 34, 36], "breakthrough": 8, "brief": [8, 40], "briefli": 10, "broad": 29, "broadcast": 32, "broken": 30, "browser": 11, "bucket": 19, "budapest": 29, "buddi": 28, "buffer": 23, "build": [3, 15, 19, 21, 28], "built": [0, 14, 24, 28, 31, 32, 34, 35], "builtin": 23, "bulk": 23, "burden": 8, "button": [3, 9, 16], "byte": [0, 23, 24, 33], "bytearrai": 23, "byteord": 23, "bytes_": 31, "c": [9, 10, 17, 19, 23, 31, 32, 33], "calcul": [12, 15, 19], "call": [0, 2, 3, 8, 12, 14, 16, 17, 19, 21, 22, 24, 26, 27, 28, 29, 30, 31, 33, 35, 38, 39], "call_plugin": [0, 24], "camel": 14, "camelcas": 28, "can": [2, 3, 8, 9, 10, 11, 12, 14, 15, 16, 17, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38], "cannot": [2, 3, 16, 23, 26, 27, 35], "canva": 8, "capabl": [8, 21, 33], "capit": 14, "captur": 8, "care": [19, 21], "carefulli": 19, "case": [2, 3, 12, 14, 15, 16, 20, 21, 23, 26, 27, 29, 30, 31, 32], "cast": [3, 14, 31, 32], "catch": [26, 27], "catchal": 19, "categor": [10, 34, 35], "categori": 10, "caus": [19, 23, 26, 27, 32], "cautiou": [23, 29], "ceil": 23, "cell": [0, 2, 3, 7, 12, 14, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 34, 35, 36, 39], "center": 35, "central": [34, 35], "certain": [12, 14, 17, 23, 26, 27, 28], "challeng": [8, 33], "chang": [7, 9, 10, 17, 18, 19, 23, 28, 32, 33, 36], "channel": [0, 24], "chapter": 24, "charact": [2, 3, 14, 16, 19], "character": 17, "characterist": 16, "charli": 33, "charlottesvil": 19, "chart": 35, "chatch": [26, 27], "cheat": 9, "check": [0, 3, 9, 14, 15, 17, 19, 20, 21, 22, 24, 26, 27, 31, 34, 35], "check_positive_numb": [26, 27], "cherri": 19, "chicago": 33, "child": 28, "choic": 10, "chosen": 23, "chunk": [14, 33], "cinderella": 21, "citi": [19, 29, 33], "clariti": [21, 23], "class": [3, 8, 12, 14, 16, 23, 26, 27, 31, 32, 33, 34, 35], "classic": 9, "claus": [26, 27], "clayton": 17, "clean": [8, 10, 29, 33], "clean_word": 21, "cleaner": [26, 27], "cleanup": [26, 27], "clear": 29, "clearli": [23, 26, 27], "cli": [8, 40], "click": 9, "clint": 17, "close": 29, "club": 23, "cluster": 11, "cm": 16, "cnn": 21, "cnn_1": 21, "cnn_2": 21, "co": [9, 32, 38], "coconut": 17, "code": [0, 3, 8, 10, 11, 12, 19, 20, 21, 23, 24, 26, 27, 28, 29, 31, 40], "coff": 33, "coffe": [17, 18], "colab": [0, 24], "collect": [14, 17, 31, 33], "colon": [23, 26, 27, 29], "cols_id": 33, "column": [0, 24, 31, 32, 33, 37, 38], "com": [10, 33, 34, 35, 36, 37], "combin": [0, 3, 7, 8, 9, 11, 15, 16, 17, 19, 22, 23, 24, 29, 32, 33, 36, 37, 38], "come": [0, 8, 10, 12, 16, 21, 23, 24, 28, 31, 33], "comfort": 8, "comma": [14, 17, 33], "command": [0, 8, 11, 24, 31, 40], "comment": [3, 8], "common": [0, 3, 8, 10, 12, 15, 16, 21, 23, 24, 29, 32, 33, 34, 36, 38, 40], "commonli": [8, 10, 31, 40], "commun": [8, 10], "compact": [17, 21], "compar": [10, 21, 23], "comparison": [32, 34, 36], "compat": 31, "competit": 10, "compil": 12, "complement": 23, "complet": [8, 9, 23, 26, 27, 34, 35], "complex": [2, 3, 8, 10, 15, 19, 23, 29, 33, 36], "complic": [15, 21], "compon": [8, 21, 23], "compos": 11, "comprehens": [23, 31], "compris": 29, "comput": [8, 9, 10, 11, 12, 19, 20, 29, 31, 32, 34, 35, 37], "compute_vari": 23, "compute_variances_sort_save_print": 23, "concaten": [3, 16, 17, 26, 27, 32, 38], "concentr": 15, "concept": [8, 10, 21, 32, 40], "concis": [10, 21, 23], "conclud": [8, 40], "concret": 28, "cond": 32, "condit": [20, 21, 23, 32, 34, 36], "conditon": 19, "conduct": 10, "confid": 8, "confirm": 30, "confus": [23, 33], "conjug": 23, "conjunct": [3, 15], "connor": 28, "consecut": 16, "consequ": 31, "consid": [21, 23, 28, 32], "consist": [9, 23, 38], "consol": 12, "construct": 21, "consum": 28, "contain": [2, 3, 9, 10, 12, 14, 17, 18, 21, 22, 23, 28, 29, 31, 32, 33, 34, 35, 36, 38], "content": [8, 9, 29, 31], "content_parti": 29, "context": [23, 28, 29], "continu": [3, 8, 14, 30, 32, 35, 38], "contourpi": [0, 24], "contrast": [10, 12, 14, 17, 19, 21, 31, 33], "control": [8, 31, 38, 40], "controversi": 33, "conveni": [34, 35], "convent": [10, 14, 23, 28], "convers": [14, 32], "convert": [9, 10, 16, 21, 23, 31, 35], "convet": 14, "copi": [9, 16, 20, 32, 33, 34, 35, 36, 38], "core": [0, 24, 33, 34, 35], "corr": [34, 35], "correct": [19, 23, 29], "correctli": 32, "correl": [34, 35], "correspond": [16, 17, 29, 31, 32, 33], "costli": 33, "could": [0, 17, 19, 23, 24, 27, 28, 29, 31, 32, 33, 38], "count": [10, 23, 33, 34, 35], "countri": 19, "coupl": [9, 19], "cours": [2, 3, 10, 11, 12, 14, 21, 28, 31, 38, 40], "courses_it": 21, "cover": [8, 10, 14, 17, 21, 35, 36, 40], "creat": [2, 3, 7, 9, 10, 12, 14, 15, 16, 17, 20, 21, 22, 28, 29, 30, 32, 34, 35, 37, 38], "creation": [9, 31, 36], "creativ": 28, "critic": [31, 33], "cross": 37, "crucial": [32, 33], "csv": [10, 33, 34, 35, 36, 37], "ctrl": 9, "cube": 31, "curli": 16, "current": [0, 9, 23, 24], "cursor": 9, "curv": 10, "custom": [31, 33, 34, 36], "cut": [3, 15], "cvill": 19, "cyberdyn": 28, "cycler": [0, 24], "d": [0, 2, 3, 9, 12, 15, 17, 21, 23, 24, 28, 31], "dag": [32, 38], "dai": [9, 10, 19, 23], "daili": [8, 40], "danger": 19, "darrel": 17, "dat": 33, "data": [2, 8, 9, 11, 15, 16, 19, 20, 21, 23, 28, 29, 32, 37, 40], "data1": [31, 38], "data2": [31, 32, 38], "data3": 31, "data_dict": 33, "databas": 36, "datafram": [10, 36, 37], "datascience_41model": 29, "dataset": [9, 32, 33, 34, 35, 36], "datatyp": 17, "date": 33, "datetim": 35, "datetimelik": 35, "dateutil": [0, 24], "datum": 21, "david": 33, "debug": [8, 11, 26, 27, 40], "decid": 28, "decim": [3, 14, 31], "decis": [8, 28, 29], "declar": [0, 24, 31, 38], "decypher_format_arg": [0, 24], "dedic": 9, "deep": [10, 21, 33, 35], "def": [0, 2, 3, 14, 22, 23, 24, 26, 27, 28, 30, 36], "default": [17, 21, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 37, 38], "defend": 8, "defin": [2, 3, 14, 19, 20, 21, 22, 23, 26, 27, 29, 30, 31, 39], "definit": [23, 28, 29, 31, 35], "del": [3, 14, 34, 36], "delet": [3, 14, 32, 34, 36, 38], "delight": 10, "delimit": 33, "delin": 14, "denomin": [23, 26, 27], "depend": [9, 10, 19, 31, 32, 34, 35], "deprec": [34, 35], "depth": 21, "describ": [23, 34, 35], "descript": [2, 3, 23], "descriptor": 23, "design": [10, 23, 28, 29, 31, 32], "desir": [31, 32], "despin": [9, 10], "detail": [9, 11, 14, 16, 21, 23, 31, 32, 34, 35, 36, 37], "determin": [14, 23], "develop": [2, 8, 10, 28, 40], "deviat": [31, 32, 34, 35, 38], "devic": 8, "df": [33, 34, 36, 37], "df1": 37, "df2": 37, "df3": 37, "df4": 37, "df_deep": 33, "df_drop_al": [34, 35], "df_drop_x": [34, 35], "df_fill": [34, 35], "df_miss": [34, 35], "df_shallow": 33, "diabet": 33, "dialog": 9, "dict": [17, 19, 20, 21, 23, 33], "dict_item": 17, "dict_kei": 17, "dict_valu": [17, 28], "dictionari": [2, 16, 18, 19, 23, 28, 29, 33, 35, 36], "dictionary1": 17, "dictionary2": 17, "dictionary3": 17, "did": [12, 17, 20, 28], "didn": 23, "die": 16, "diff": 22, "differ": [2, 3, 8, 9, 10, 14, 16, 17, 19, 22, 23, 28, 29, 31, 33, 35], "difficult": [30, 32, 38], "dimens": [0, 24, 31, 32, 38], "dimension": [0, 24, 31, 32, 33, 38], "dir": [0, 24], "direct": [32, 38], "directli": [10, 12, 17, 31, 32], "directori": [0, 24, 31], "dirnam": [0, 24], "disappear": 9, "discuss": [3, 8], "dispers": [34, 35], "displai": 35, "distinct": [34, 35], "distinguish": 29, "distribut": [31, 34, 35], "div": 27, "dive": [31, 33], "divers": [8, 10], "divid": [15, 26, 27, 32], "divide_numb": [26, 27], "divis": [3, 12, 15, 20, 26, 27, 32], "divisbl": 20, "divmod": 23, "dl": 21, "dn": [0, 24], "do": [3, 7, 9, 12, 14, 15, 16, 17, 19, 20, 21, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37], "doc": [23, 26, 27, 33, 34, 36, 38], "docencia": [0, 24], "docstr": 22, "document": [11, 21, 23, 31, 34, 35, 36], "documento": [0, 24], "doe": [0, 9, 16, 17, 19, 20, 21, 23, 24, 26, 27, 28, 29, 32, 35], "doesn": [34, 36], "dog": 28, "domain": [10, 29], "don": [8, 9, 16, 17, 19, 21, 23, 28, 29, 34, 35], "done": [9, 12, 28, 29], "door": 8, "dot": [16, 34, 35], "doubl": [34, 35], "down": [9, 12, 18, 23, 28, 29, 30], "dplyr": [10, 33], "draw": [9, 31], "drawn": [29, 31], "drop": [10, 36], "drop_end": 38, "drop_second_index": 38, "drop_start": 38, "dropna": [34, 35], "ds1002": [0, 24, 38], "dtype": [31, 32, 33, 34, 35, 36, 38], "due": 10, "dummi": [20, 23], "dure": [8, 21, 26, 27, 30, 40], "dynam": [3, 16], "e": [3, 12, 14, 15, 16, 17, 19, 21, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 38], "each": [0, 3, 9, 10, 14, 15, 17, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38], "earli": [8, 19], "earlier": [14, 19, 31, 32, 33], "easi": [10, 12, 28], "easier": [10, 23, 26, 27, 28, 33], "easiest": 33, "easili": [10, 33], "economi": 23, "edit": [7, 34, 35], "edu": [11, 33], "educ": 11, "edureka": 9, "effect": [3, 33], "effici": [8, 9, 10, 21, 31, 40], "either": [7, 9, 19, 20, 23, 26, 27, 29, 31], "element": [0, 2, 3, 16, 17, 19, 21, 24, 26, 27, 29, 31, 32, 33, 38], "elementari": 3, "elementwis": 32, "elif": [3, 14, 20, 35], "elimin": 31, "elmnt": 17, "els": [0, 3, 14, 20, 21, 22, 23, 24, 28, 30, 35], "email": 8, "eman": 16, "embed": 9, "emphas": 31, "emploi": [3, 15], "empti": [17, 29, 31, 32], "en": 38, "enabl": [9, 11, 12, 33, 35], "encapsul": 8, "enclos": [16, 21, 23], "encod": 29, "encount": [23, 26, 27, 32], "encourag": 8, "end": [16, 17, 19, 21, 23, 26, 27, 28, 29, 32, 35, 38], "end_slic": 35, "endpoint": [34, 35], "endswith": [3, 16], "engin": 28, "enhanc": 12, "enjoi": 8, "enough": 19, "ensur": [26, 29, 31, 35], "enter": [9, 17], "entir": [9, 10, 29, 31, 32, 38], "entiti": [12, 31], "entri": [23, 26, 27, 33, 34, 35, 36], "enumer": [21, 23, 26, 27], "environ": [8, 9, 11, 14, 34, 35], "equal": [3, 15, 22, 23, 31, 32, 34, 36], "equat": [9, 11], "equival": 31, "error": [22, 23, 28, 33, 35, 36, 40], "esc": 9, "especi": [32, 33], "essai": 29, "essenti": [8, 10, 21, 33, 35, 40], "esssenti": 29, "etc": [2, 16, 28, 29], "eval": 12, "evalu": [3, 12, 15, 17, 19, 20, 21], "even": [3, 8, 10, 14, 15, 19, 20, 23, 26, 27, 28, 29, 32], "evenli": 31, "event": 37, "everi": [0, 3, 8, 10, 14, 16, 19, 24, 29, 32, 38], "every_oth": 38, "everyth": [9, 10, 16, 28, 31], "evolv": 29, "exactli": [23, 34], "exampl": [2, 3, 9, 10, 12, 14, 15, 19, 20, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38], "excel": [10, 33], "except": [0, 3, 14, 16, 21, 24, 29, 31, 32], "exceptiontyp": [26, 27], "exceptiontype1": [26, 27], "exceptiontype2": [26, 27], "exceptiontype3": [26, 27], "excercis": 17, "exclam": 31, "exclud": [21, 32], "exclus": [34, 35], "execut": [8, 10, 11, 12, 19, 21, 26, 27, 30], "exercis": 28, "exhibit": 23, "exist": [0, 16, 21, 24, 28, 29, 36], "exp": 32, "expect": [8, 19, 26, 27, 30, 32, 33, 35], "experi": [8, 10, 34], "expertis": 29, "explain": [23, 28], "explan": [8, 11], "explicit": 10, "explicitli": 32, "explor": [10, 11, 23, 36], "expon": 32, "exponenti": [3, 15], "express": [10, 16, 17, 19, 21, 34, 36, 40], "expresss": [3, 15], "extend": 28, "extens": [0, 9, 10, 24, 28, 31], "extent": 8, "extern": [0, 24, 29, 31], "extra": [21, 32], "extract": [3, 16, 29, 32, 34, 35], "extrem": 35, "ey": 28, "f": [0, 16, 17, 19, 21, 23, 24, 26, 27, 28, 29, 30, 31, 35], "facilit": 11, "fact": [10, 23, 28, 33], "factori": 30, "fail": [23, 26, 27, 29], "fair": 10, "fake": 31, "fall": 21, "fals": [0, 3, 10, 14, 15, 16, 17, 21, 22, 23, 24, 28, 31, 32, 33, 34, 35, 36], "familiar": [9, 10, 11, 19, 31], "fan": 21, "far": [14, 16, 19, 21, 28, 29, 33, 34], "fast": [0, 24, 32], "faster": 28, "fastx": 11, "father": 16, "favorite_numb": 2, "fcn": 23, "fcn_bad_arg": 23, "fcn_force_keyword": 23, "fcn_nothing_to_return": 23, "fcn_swapped_arg": 23, "featur": [8, 10, 16, 17, 28, 31, 32, 33, 34, 36, 40], "feed": 29, "feedback": 8, "feel": [8, 10], "fetch": [34, 35], "few": [9, 28, 40], "fibonacci": 30, "field": [28, 29, 31, 33, 34, 35, 36], "fifth": 29, "figsiz": 39, "figur": 3, "file": [0, 9, 10, 11, 12, 23, 24, 31, 33, 35, 40], "file_or_url_context": [0, 24], "filenam": 29, "filenotfounderror": [0, 24], "filepath_or_buff": 33, "filesystem": 9, "fill": 31, "fillna": [10, 34, 35], "film": 21, "filter": [0, 10, 21, 24], "final": [3, 10, 12, 14, 16, 28, 29, 30, 31], "find": [0, 2, 3, 8, 10, 17, 23, 24, 26, 27, 32, 33, 38], "finish": 29, "first": [0, 2, 3, 10, 16, 17, 18, 19, 23, 24, 29, 30, 31, 32, 33, 34, 35, 38], "firstval": 2, "firulai": 28, "fit": 28, "fix": 23, "flag": [14, 32], "flatten": [31, 32], "flattened_gam": 31, "flexibl": [29, 31, 33], "flip": [32, 38], "float": [3, 14, 17, 23, 28, 31, 32, 33, 35, 38], "float128": 31, "float16": 32, "float32": [31, 32], "float64": [31, 32, 34, 35, 36, 38], "float64index": 35, "float_arr": 38, "float_var": [3, 14], "floor": [3, 15, 23], "flow": 19, "flush": 23, "fn": [0, 24], "fname": [0, 24], "focu": [8, 10], "focus": [8, 10, 40], "folder": [9, 31], "follow": [3, 8, 9, 10, 12, 14, 15, 17, 19, 20, 23, 26, 27, 28, 29, 31, 32, 34, 35, 36], "fonttool": [0, 24], "foo": [17, 23, 37, 38], "foomax": 38, "foomean": 38, "foomin": 38, "foosin": 38, "foostd": 38, "forc": [23, 28], "forcibli": 23, "forget": [26, 27], "form": [12, 21, 23, 28, 32, 34, 36, 38], "formal": 10, "format": [0, 8, 9, 11, 19, 23, 24, 35], "format_hint": [0, 24], "format_spec": 23, "formatt": 23, "forth": [3, 15], "fortran": [10, 19, 31], "found": [19, 33], "foundat": 31, "four": [3, 15, 29, 31], "fourth": 32, "frame": [10, 34, 35], "free": 8, "freq": [34, 35], "frequenc": [34, 35], "from": [0, 3, 8, 9, 10, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36, 37], "from_byt": 23, "fruit": 2, "ftp": 33, "full": 31, "func": [0, 24], "funciton": [34, 35], "function": [2, 3, 8, 10, 12, 14, 16, 17, 21, 22, 26, 27, 28, 29, 30, 31, 32, 33, 36, 37, 38, 40], "function_nam": 12, "fundament": [8, 10, 21, 33, 40], "further": [21, 30, 31, 34, 35, 36], "futur": [8, 11, 14, 23, 26, 27, 34, 35], "futurewarn": [34, 35], "g": [9, 16, 17, 19, 21, 23, 26, 27, 28, 29, 30, 31, 33, 34, 35], "game": [10, 31], "game_and_solut": 31, "games_and_solut": 31, "gaussian": 31, "gave": 23, "gbm": 23, "gener": [10, 16, 19, 23, 28, 29, 31, 34, 35, 36, 38], "get": [0, 2, 3, 9, 10, 11, 12, 14, 16, 17, 19, 21, 24, 29, 31, 35, 38, 40], "get_slice_bound": 35, "getattr": [23, 35], "ggplot2": 10, "git": 8, "github": [8, 40], "githubusercont": [33, 34, 35, 36, 37], "give": [3, 8, 12, 14, 15, 16, 19, 23, 26, 27, 28, 30, 31, 32, 33, 34, 35, 38], "given": [0, 3, 8, 9, 14, 16, 17, 18, 19, 21, 23, 24, 28, 30, 31, 32, 36, 40], "glimps": [31, 33], "global": [3, 14], "go": [0, 8, 11, 12, 16, 21, 24, 28, 29, 35], "goal": 8, "goe": [19, 29], "gone": 8, "goo": 38, "good": [3, 8, 15, 19, 28], "goodby": 3, "googl": [0, 24], "got": 19, "grammar": 10, "grape": 19, "graph": [32, 38], "graphic": [10, 11], "great": [10, 12, 23], "greater": [3, 10, 15, 32], "green": 9, "greet": [12, 28], "grei": 9, "group": [3, 8, 15, 23, 28, 37], "grouped_boxplot": 9, "guess": 8, "guid": 8, "h": 10, "ha": [3, 9, 10, 12, 14, 15, 19, 21, 22, 26, 27, 28, 31, 32, 33, 34, 35, 36, 38], "habit": 8, "had": 20, "half": 2, "hand": [8, 10, 17], "handi": [17, 28], "handl": [8, 9, 10, 32, 33, 35, 40], "handler": [26, 27], "happen": [2, 9, 26, 27, 32, 35], "hard": [26, 27], "hasattr": [0, 24], "hash": [17, 23], "haskel": 10, "have": [0, 2, 3, 8, 9, 10, 11, 14, 15, 16, 17, 19, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38], "head": [33, 34, 35, 36, 37], "header": [28, 35], "heard": 28, "heavi": 29, "height": 28, "hello": [3, 12, 14, 16, 23, 28], "help": [8, 9, 10, 23, 28, 29, 31, 32, 33, 36], "here": [0, 3, 8, 9, 10, 12, 14, 15, 16, 17, 19, 21, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38], "hesit": 8, "hex": [14, 23, 32, 33], "hexadecim": 14, "hide": 28, "hierarchi": [32, 38], "high": 10, "higher": [10, 32, 38], "highli": 10, "highlight": 31, "hint": [19, 20, 28], "hist": 35, "histori": 36, "hit": [3, 16, 36], "hold": [3, 14, 23, 31, 32, 33], "home": [0, 24, 28], "horizont": 33, "host": [23, 33], "hotel": [0, 24], "hour": 8, "houston": 33, "how": [3, 10, 12, 14, 15, 19, 20, 21, 23, 26, 27, 28, 31, 32, 34, 35, 36, 37], "howev": [0, 10, 14, 15, 17, 19, 23, 24, 28, 29, 31, 32, 33], "hpc": 11, "html": [9, 23, 26, 27, 33, 34, 36, 38], "http": [9, 10, 12, 17, 18, 23, 26, 27, 33, 34, 35, 36, 37, 38], "hue": 9, "human": 28, "hundr": 31, "i": [0, 2, 3, 8, 9, 10, 11, 12, 15, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 29, 30, 32, 34, 35, 36, 37, 38, 39, 40], "i4": 31, "id": [14, 15, 23, 32, 33, 37], "idea": [8, 9, 16, 23, 35], "ideal": 11, "ident": 3, "identif": 33, "identifi": [8, 26, 27, 28, 34, 36], "ii": [21, 26, 27, 35, 40], "imag": [9, 23], "imageio": [0, 24], "imageio_imread": [0, 24], "imageio_plugin": [0, 24], "imagin": 20, "imaginari": 23, "img": [0, 24], "immut": [3, 17], "imopen": [0, 24], "imopen_arg": [0, 24], "imper": 19, "implement": [10, 12, 21, 28, 32], "impli": 3, "implicitli": [3, 21], "import": [3, 8, 9, 10, 14, 16, 17, 18, 19, 21, 23, 28, 29, 32, 34, 35, 36, 37, 38, 40], "importantli": [9, 21], "impos": 19, "improv": 21, "imput": 34, "imread": [0, 24], "imshow": [0, 24], "inaccur": 32, "includ": [8, 9, 10, 16, 17, 19, 20, 21, 23, 26, 27, 28, 29, 31, 32, 33, 34, 35, 37, 40], "inclus": 31, "incomplet": 8, "incorpor": [16, 26, 27], "incorrect": [8, 23, 26, 27], "increas": 20, "increasingli": 8, "incred": 29, "increment": [3, 15], "indent": [23, 26, 27, 28, 29], "indentationerror": [26, 27], "indenten": [26, 27], "independ": [31, 32], "index": [0, 10, 18, 19, 21, 23, 24, 33, 37, 38], "indexerror": [0, 24], "indic": [2, 9, 16, 21, 23, 26, 27, 32, 34, 35, 38], "individu": [9, 16], "industri": 10, "ineffici": 28, "inequ": [3, 15], "infer": [23, 29], "infinit": 27, "info": [3, 9, 16, 17, 23, 33, 34, 35], "inform": [8, 14, 16, 23, 28, 29, 33, 34, 35], "inher": 16, "inherit": 23, "initi": 31, "initil": 28, "inner": 37, "inplac": [34, 36], "input": [9, 12, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33, 35], "insert": [9, 17, 23, 29], "insid": [9, 16, 19, 23, 26, 28, 30, 35], "insight": 35, "inspect": 36, "inspir": 33, "instal": 8, "instanc": [23, 28, 31], "instanti": 28, "instead": [2, 14, 30, 31, 32, 34, 35], "instruct": [10, 12], "instructor": [8, 12, 23], "int": [3, 14, 15, 17, 23, 26, 27, 29, 31, 32, 35], "int16": 31, "int32": [31, 38], "int64": [31, 33, 34, 35, 38], "int8": 31, "integ": [2, 3, 14, 15, 17, 19, 20, 22, 23, 28, 31, 32, 33, 38], "integer_var": 14, "integr": [11, 23, 29, 33], "intellig": 10, "intens": 10, "inter": [19, 21], "interact": [9, 11, 12, 14], "interchang": 23, "interdisciplinari": 29, "interest": 19, "interfac": [8, 9, 11, 28], "intermedi": [8, 32, 40], "intern": 33, "internat": 12, "interpet": 21, "interpret": [3, 8, 12], "intersect": 17, "interv": 31, "introduc": [7, 8, 9, 19, 21, 31, 33, 35, 40], "introduct": [16, 38, 40], "introductori": [8, 40], "introspect": 9, "intuit": 10, "invalid": [0, 2, 14, 24, 26, 27, 32, 34, 36], "invalu": 8, "invers": [26, 27], "involv": [10, 35, 37], "io": [0, 9, 24, 33], "io_mod": [0, 24], "ip_address": 35, "ipykernel_14437": 32, "ipykernel_15133": 34, "ipykernel_15214": [], "ipykernel_17310": [], "ipykernel_17540": 35, "ipykernel_21005": 36, "ipykernel_26349": 34, "ipynb": 9, "ipython": 12, "iri": [33, 34, 35, 36, 37, 39], "iris_copi": 35, "iris_df": [34, 35, 36, 37], "iris_df_drop": [34, 36], "is_bool_index": 35, "is_float": 35, "is_integ": 35, "is_label": 33, "is_null_slic": 35, "is_odd": 21, "is_read_request": [0, 24], "is_scalar": 35, "isinst": [0, 3, 14, 23, 24, 35], "isn": [0, 23, 24], "issu": 32, "itali": 19, "item": [9, 16, 17, 19, 21, 23, 38], "iter": [17, 20, 23, 29, 32, 33], "its": [0, 2, 3, 8, 9, 10, 14, 15, 16, 17, 19, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 40], "itself": [2, 23, 28, 30, 31], "ix": 19, "i\u00f1igo": 16, "j": [9, 19, 21], "jami": 37, "java": 10, "javascript": 10, "javi": [0, 16, 24], "javier": [12, 16, 23], "job": 11, "joe": 32, "john": [17, 28], "join": [14, 16, 37], "jpg": [0, 24], "judgment": 8, "jupyt": [8, 11, 12, 31], "jupyter_notebook_cheatsheet_edureka": 9, "just": [3, 9, 12, 14, 15, 16, 17, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35], "k": [9, 21, 31], "kaggl": 10, "kaggle_survey_2022_respons": 10, "kb": [34, 35], "keep": [2, 8, 17, 19, 21, 23, 26, 27, 34, 36], "keepdim": 32, "kei": [2, 16, 17, 19, 21, 23, 33, 35, 37], "kenni": 28, "key_express": 21, "keyowrd": [26, 27], "keyworad": [7, 9], "keyword": [3, 15, 23, 26, 27, 28, 29, 32, 34, 35, 36, 38], "keywork": 23, "kg": 28, "kill": 16, "kind": [0, 12, 20, 21, 24, 26, 27, 28, 29, 30, 33, 35], "kiwi": 19, "kiwisolv": [0, 24], "know": [2, 14, 19, 23, 28, 34, 36], "knowledg": 29, "known": [10, 12, 19, 23, 32], "kumquat": 19, "kwarg": [0, 23, 24], "labels": 10, "lack": 28, "lambda": [3, 10, 14, 36], "languag": [3, 8, 9, 11, 12, 19, 23, 28, 29, 33, 40], "languages_dat": 10, "larg": [21, 29, 32], "largest": [3, 15], "last": [0, 3, 14, 16, 17, 19, 21, 23, 24, 26, 27, 28, 31, 32, 34, 35, 38, 39], "lastli": 23, "later": [3, 14, 17, 23, 28, 31, 32, 38], "latex": [9, 11], "launch": 12, "laundri": 29, "layer": 21, "layman": 14, "lazili": 21, "lazy_load": [0, 24], "lead": [16, 32, 33], "learn": [9, 10, 11, 14, 15, 16, 17, 19, 21, 23, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36], "learnbyexampl": [17, 18], "least": [22, 23, 31], "left": [3, 9, 17, 32, 34, 35, 36, 37], "legaci": [0, 24], "legacy_mod": [0, 24], "len": [16, 17, 22, 23, 34, 35, 38], "length": [16, 22, 23, 34, 35, 36, 37], "lengthi": 21, "less": [3, 10, 15, 19, 20], "lesser": 8, "lesson": [11, 12, 14, 15, 16, 17, 19, 21, 23, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36], "let": [9, 11, 12, 17, 19, 21, 23, 26, 27, 28, 29, 31, 32, 35, 36], "letter": [2, 3, 14, 17, 18, 21], "level": [10, 32, 38], "leverag": 10, "lib": [0, 24, 35], "librari": [8, 10, 23, 26, 27, 31, 32, 33, 35, 40], "lifetim": 23, "like": [0, 2, 3, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 28, 29, 31, 32, 33, 34, 35, 36], "likewis": 16, "line": [0, 2, 3, 8, 10, 11, 12, 14, 16, 17, 19, 21, 23, 24, 26, 27, 28, 29, 31, 34, 35, 36, 39, 40], "linear": [9, 31], "linearli": 31, "linspac": [0, 24, 31], "lisp": 10, "list": [2, 3, 9, 14, 15, 16, 18, 19, 20, 23, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38], "list1": 17, "list2": 17, "list3": 17, "list_iter": 21, "list_str": 29, "liter": [3, 14, 15, 16], "literari": 29, "littl": 23, "live": [9, 11, 21], "ll": [8, 31, 36], "llm": 29, "lo": 33, "load": [9, 12], "load_dataset": 9, "loc": [31, 36], "local": [8, 9, 33], "localhost": 33, "locat": 14, "log": [26, 27], "logic": [8, 10, 34, 36], "logreg": 23, "logspac": 31, "london": 29, "long": [8, 14, 20], "longer": 9, "look": [10, 11, 12, 19, 21, 23, 31, 32, 34, 35, 36, 38], "lookfor": 19, "loop": [12, 17, 20, 22, 23, 29, 31], "lose": 29, "lot": [3, 10], "lover": 33, "low_memori": 10, "lower": [10, 14, 16, 19, 21, 32, 38], "lowercas": [14, 21], "lowest": 23, "lval": 37, "m": [3, 9, 15, 16, 21, 31, 32], "machin": [10, 11, 12, 28], "made": [9, 32], "mai": [8, 10, 19, 21, 23, 26, 27, 28, 29, 31, 32, 35, 36], "main": [9, 10, 16, 31], "maintain": [8, 9, 23, 28, 36], "major": 31, "make": [3, 8, 10, 11, 15, 18, 19, 20, 23, 28, 29, 31, 33, 35], "maker": 29, "manag": [8, 10, 29, 31], "manage_plugin": [0, 24], "mango": 19, "mani": [3, 8, 9, 10, 12, 14, 15, 16, 19, 20, 23, 28, 31, 32, 33, 34, 35], "manipul": [3, 8, 10, 15, 28, 32, 33, 34], "manual": [21, 28], "manufactur": 28, "map": [9, 17, 28], "mapper": 33, "mardown": 9, "margin": 9, "mark": 31, "markdown": [7, 9, 11, 18], "mask": [0, 24, 32], "master": [33, 34, 35, 36, 37], "match": [26, 27, 37], "mate": [26, 27], "materi": 8, "math": [12, 29, 38], "mathemat": [3, 10, 12, 15, 31], "mathemt": 3, "matlab": [10, 19], "matplotlib": [0, 10, 24, 31, 35], "matter": 23, "max": [0, 19, 24, 28, 32, 34, 35, 38], "max_it": 20, "max_val": [19, 20], "maximum": [19, 20], "maximun": 32, "mayb": [19, 26, 27], "me": [10, 19, 28], "mea": 21, "mean": [0, 3, 9, 10, 12, 14, 15, 16, 17, 23, 24, 28, 31, 32, 33, 34, 35, 36, 37, 38], "meaning": [23, 26, 27, 35], "meas_mmhg": 21, "meas_mmhg_dl": 21, "measur": 21, "mechan": 28, "media": 9, "median": [34, 35], "meet": 21, "membership": [3, 15], "memori": [3, 10, 14, 15, 21, 23, 31, 32, 33, 34, 35], "mention": [12, 14, 16, 17, 21, 28, 33], "menu": 7, "menubar": 9, "messag": [12, 16, 19, 26, 27, 28], "met": [19, 21], "method": [8, 10, 18, 21, 23, 29, 32, 34, 35, 36], "metric": 21, "mg": 21, "microcosm": 3, "might": [2, 12, 19, 31, 33], "min": [0, 24, 32, 34, 35, 38], "mind": [2, 21], "minim": 8, "minu": [17, 30], "miscellan": 40, "miss": [17, 23, 26, 27], "mistak": [26, 27], "mix": [16, 17, 21, 28, 34, 35], "ml": 21, "mmhg": 21, "mmm": 28, "mock_df": 35, "mockaroo": 35, "mod": 23, "mode": [0, 7, 12, 24], "model": [19, 21, 23, 29], "model_arch": 21, "modern": 10, "modif": 9, "modifi": [9, 14, 17, 28, 30, 32, 38], "modul": [8, 23, 24, 28, 32, 38, 40], "module1": 31, "module2": 31, "modulo": 20, "modulu": [3, 15], "moment": 31, "monitor": 11, "month": [17, 18], "montoya": 16, "mordisquito": 28, "more": [2, 3, 8, 10, 14, 15, 16, 17, 18, 21, 23, 26, 27, 28, 29, 30, 33, 34, 35, 36, 37, 40], "moreov": 10, "most": [0, 3, 8, 9, 10, 11, 14, 15, 16, 17, 19, 21, 23, 24, 26, 27, 28, 29, 31, 32, 33, 35, 36, 38, 39, 40], "mostli": 29, "mous": 7, "move": [3, 16], "mtrand": 31, "mu": 31, "much": [8, 10, 21, 23, 28], "multi": [10, 31, 33], "multidimension": [31, 32, 38], "multipl": [3, 10, 11, 12, 14, 15, 17, 23, 26, 27, 29, 30, 31, 32, 35], "multipli": [15, 17, 18, 30, 34, 36], "must": [0, 2, 3, 14, 17, 23, 24, 26, 27, 29, 31, 32, 33, 34, 35, 36], "mutabl": 10, "mwaskom": [33, 34, 35, 36, 37], "my": [12, 16, 28, 34], "my_anim": 28, "my_arg": 23, "my_args2": 23, "my_args_dict": 23, "my_dict": 28, "my_dog": 28, "my_func": 36, "my_nam": 16, "my_rang": 21, "my_str": 28, "my_var": [3, 15], "myarr": [32, 38], "myit": 21, "mylist": 17, "myset": 21, "mystr": 16, "myvar": 14, "n": [3, 16, 23, 28, 29, 30, 31, 33, 37, 39], "n_ob": 10, "naive_bay": 23, "name": [2, 8, 12, 16, 17, 18, 19, 22, 23, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 39, 40], "nameerror": [3, 14, 23, 26, 27, 39], "nameoftheclass": 28, "nan": [32, 34, 35, 37], "nanmean": 32, "nanstd": 32, "nanvar": 32, "narr": [9, 11], "nativ": 23, "natur": [10, 29, 33], "navig": 8, "nbconvert": 9, "ncsu": 33, "ndarrai": [0, 24, 32, 33, 38], "ndigit": 23, "ndim": [0, 24, 31, 35, 38], "necessari": [23, 28, 32], "need": [3, 8, 9, 10, 14, 16, 17, 21, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], "neg": [14, 16, 23, 26, 27, 32], "negat": 32, "negate_coord": 23, "neglig": 10, "nest": [9, 17, 32, 38], "networkx": [0, 24], "never": 35, "nevertheless": 31, "new": [2, 8, 9, 10, 14, 16, 21, 23, 28, 29, 31, 32, 33, 34, 35, 36, 38], "new_anim": 28, "new_foo": 38, "new_game_and_solut": 31, "new_sudoku_gam": 31, "new_sudoku_solut": 31, "newer": 28, "newlin": 23, "next": [3, 12, 16, 17, 21, 23, 28, 29], "nice": [16, 32, 38], "nine": 31, "nlp": 21, "nobel": 8, "non": [17, 23, 32, 33, 34, 35], "none": [0, 3, 14, 22, 23, 24, 31, 32, 35], "nonloc": [3, 14], "nonneg": 22, "normal": [9, 26, 27, 31, 34, 35], "notabl": [11, 33], "notat": [32, 34, 35, 36, 38], "note": [3, 8, 10, 15, 21, 28, 29, 31, 32, 33, 34, 36], "notebook": [8, 11, 12, 31, 35], "noth": [16, 19, 23, 26, 27, 29], "notic": 19, "notion": 21, "noun": [3, 15], "now": [2, 7, 9, 12, 16, 17, 19, 20, 23, 26, 27, 28, 29, 30, 32, 34, 36], "np": [0, 24, 31, 32, 34, 35, 36, 37, 38], "null": [33, 34, 35], "num": [23, 26, 27], "num1": 14, "num2": 14, "number": [2, 3, 7, 9, 10, 14, 16, 17, 18, 19, 20, 23, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36], "numbers2": 17, "numbers4": 17, "numer": [2, 15, 17, 18, 19, 22, 23, 31, 34, 35, 36], "numeric_onli": [34, 35], "numeric_str": [31, 38], "numpi": [8, 33, 34, 35, 36, 37, 38, 40], "o": [0, 3, 16, 24, 29], "ob": 35, "obj": 35, "object": [9, 10, 14, 15, 16, 17, 19, 21, 23, 26, 27, 29, 32, 33, 34, 35, 37, 40], "object_": 31, "obs0": 35, "obs1": [33, 35], "obs2": [33, 35], "obs3": [33, 35], "obs4": [33, 35], "obs_id": 33, "observ": [30, 32, 33, 34, 35, 36], "obtain": [19, 34, 35], "obviou": 38, "obvious": 28, "occur": [23, 26, 27, 30], "odd": [19, 20, 21], "offer": [3, 10, 15, 31], "offic": 8, "offset": 9, "often": [8, 10, 17, 23, 29, 31, 35], "ogi\u00f1i": 16, "ok": [28, 35], "old": 28, "old_valu": [32, 38], "omit": [8, 16, 32, 38], "onc": [0, 17, 23, 24, 28, 29, 31, 33], "ondemand": 8, "ondex": 33, "one": [7, 8, 9, 16, 17, 18, 20, 21, 22, 23, 26, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38], "one_to_ten": 31, "ones": [0, 3, 15, 16, 21, 23, 24, 28, 31, 34, 36], "onli": [2, 3, 14, 16, 17, 21, 23, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38], "onlin": 33, "ood": 11, "oof": 38, "open": 8, "oper": [8, 12, 14, 19, 20, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38], "operand": [17, 26, 27], "operationalerror": 36, "optim": [3, 9, 14, 31], "option": [14, 16, 21, 23, 26, 27, 31, 32, 33], "orang": 2, "order": [9, 17, 19, 21, 23, 28, 31, 32, 33], "org": [9, 10, 12, 17, 18, 23, 26, 27, 33, 34, 36, 38], "organ": [8, 9, 10, 17, 31, 40], "orient": [8, 10, 40], "origin": [17, 23, 28, 31, 32, 33, 35, 36], "original_label": 35, "other": [0, 3, 8, 9, 10, 15, 16, 22, 23, 24, 26, 27, 28, 29, 31, 32, 38], "otherwis": [3, 14, 19, 20, 23, 30, 32, 33], "oti": 16, "ouput": 14, "our": [8, 11, 12, 16, 23, 28, 29, 31, 32, 33], "out": [2, 3, 17, 18, 21, 22, 23, 31, 32], "outcom": 29, "outer": 37, "output": [9, 12, 14, 17, 21, 22, 23, 29, 31, 32], "outsid": [9, 14, 22, 23, 28], "over": [10, 19, 20, 21, 23, 29, 32, 34, 36, 38], "overal": 32, "overflowerror": 23, "overlai": [34, 36], "overrid": [28, 35], "overwrit": [17, 29], "own": [2, 8, 9, 10, 28, 31, 33], "p": [3, 15], "packag": [8, 10, 23, 32, 33, 35, 38, 40], "package_nam": 31, "packet": 28, "page": 9, "pair": [2, 17, 21, 23, 31], "palett": 9, "panda": [8, 10, 31, 32, 34, 36, 37, 40], "paradigm": 28, "parallel": 19, "paramet": [28, 31, 32, 33, 34, 35, 36, 37], "parametr": 23, "parent": 28, "parenthes": [3, 12, 15, 17, 19, 23, 28], "parenthesi": [12, 14, 19, 23, 28], "pari": 29, "pars": [0, 24, 32, 38], "part": [2, 16, 23, 26, 27, 29, 30, 36], "partial": 29, "particip": 8, "particular": [14, 16, 17, 28, 32, 34, 36], "particularli": [10, 17, 36], "pascal": 10, "pass": [2, 3, 14, 16, 17, 19, 22, 26, 27, 28, 31, 32, 33, 34, 35, 36], "pastel": 9, "path": [0, 24, 29, 33], "pattern": 9, "pd": [10, 33, 34, 35, 36], "pdf": 9, "peanut": 17, "peer": 8, "peform": [34, 35], "peopl": [10, 23], "per": 10, "percentag": [34, 35], "perfectli": [8, 10], "perform": [9, 14, 15, 16, 17, 23, 26, 27, 28, 29, 31, 32, 33, 38], "perhap": 31, "person": [8, 19, 28], "petal_length": [33, 34, 35, 36, 37], "petal_width": [33, 34, 35, 36, 37], "phonelist": 17, "photo": [0, 24], "photo_mask": [0, 24], "photo_sin": [0, 24], "phrase": 28, "physicist": 31, "pie": 35, "piec": [12, 21, 23, 26, 27], "pillow": [0, 24], "pip": [0, 24, 31], "pitt": 19, "pixel": [0, 24], "place": [17, 21, 32], "placehold": [26, 27], "plai": [33, 35], "plain": 35, "plan": [26, 27], "platform": 12, "pleas": [8, 31], "plot": [9, 10, 23, 35, 39], "plt": [0, 10, 24], "plugin": [0, 24], "plugin_arg": [0, 24], "po": 17, "point": [10, 11, 14, 17, 31, 32, 33], "polici": 8, "popul": [2, 20, 23, 31, 35], "popular": 8, "portabl": 11, "posit": [2, 14, 17, 19, 21, 23, 26, 27, 32, 34], "possibl": [9, 23, 29, 31], "post": 8, "post0": [0, 24], "potenti": 8, "pow": 23, "power": [10, 16, 33], "pr": 23, "practic": [0, 8, 24], "pre": [0, 24, 31], "preced": [3, 15, 23], "precis": [21, 23, 32], "predefin": 28, "predominantli": 10, "prefer": [14, 35], "prefix": 16, "prepar": [8, 16, 33, 36], "preprocess": 36, "press": 9, "pretti": 28, "pretzel": 28, "prevent": [26, 27], "previou": [8, 10, 12, 15, 16, 17, 19, 21, 28, 29, 30, 31, 32, 36], "primari": [8, 35], "primarili": [8, 10, 34, 35, 40], "primit": 17, "princess": 21, "principl": 10, "print": [0, 2, 3, 12, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38], "prize": 8, "probabl": 33, "problem": [8, 10, 30, 31], "procedur": [10, 28], "process": [8, 9, 10, 31, 35, 36], "prod": [0, 24], "produc": [19, 21, 22, 23, 35], "product": [10, 15, 23, 30], "profession": [10, 35], "program": [3, 8, 11, 19, 20, 23, 29, 30, 33, 40], "progress": 8, "project": 29, "prolog": 10, "promot": 10, "prompt": 9, "proper": 29, "properli": [22, 31], "properti": [17, 28], "propos": 29, "prorivd": 18, "protocol": [23, 31], "prototyp": 10, "provid": [8, 10, 17, 21, 23, 26, 27, 28, 29, 31, 32, 33, 40], "public": 10, "pull": [17, 18], "purpos": [3, 10, 14, 22, 23, 28, 29, 35], "put": [3, 15, 21, 32, 38], "py": [0, 24, 31, 32, 34, 35, 36], "pydata": [9, 10, 33, 34, 36], "pylab": 10, "pypars": [0, 24], "pyplot": [0, 24], "pyspark": 21, "python": [0, 2, 3, 8, 10, 11, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 40], "python3": [0, 24, 35], "pythonist": 28, "pytorch": 10, "pywavelet": [0, 24], "q12": 10, "question": [3, 8, 10, 15, 17, 18, 29], "quick": [10, 11, 31], "quicker": 21, "quietli": 8, "quit": [2, 19, 20, 21, 28, 29], "quot": [16, 17, 23], "quotat": 3, "r": [8, 10, 11, 17, 19, 23, 29, 32, 33, 40], "rais": [0, 3, 14, 23, 24, 32, 35], "randint": 38, "randn": 37, "random": [31, 32, 37, 38], "random_arrai": 31, "randomst": 31, "rang": [14, 19, 29, 31, 32], "rangeindex": [33, 34, 35], "rangi": 17, "rank": 34, "rapidli": 10, "rapunzel": 21, "rasero": 16, "rather": [10, 17], "ratio": 23, "ration": 23, "raw": [9, 33, 34, 35, 36, 37], "re": [8, 10, 17, 23, 28], "reach": [19, 21, 23, 30], "read": [0, 3, 8, 10, 12, 21, 23, 24, 26, 27, 33, 40], "read_csv": [10, 33, 34, 35, 36, 37], "readabl": [10, 23], "readi": 35, "readlin": 29, "readonli": 36, "readthedoc": 9, "real": [23, 31], "realli": [26, 27, 28], "reason": [26, 27], "reassign": [3, 14, 16, 17, 23], "recal": 21, "recent": [0, 3, 14, 16, 17, 19, 21, 23, 24, 26, 27, 28, 35, 39], "recip": 28, "recogn": 23, "recommend": 9, "record": [34, 35, 37], "recurs": 30, "redefin": [28, 33, 35], "reduc": 32, "ref": [31, 32, 33, 34, 35, 36, 37], "refer": [2, 3, 10, 12, 14, 15, 16, 23, 28, 31, 32, 33, 38], "referenc": [0, 23, 24, 31], "regardless": 26, "regex": 10, "regress": 21, "regular": 10, "reiter": 28, "reject": 35, "rel": 9, "relat": [32, 35], "relationship": [3, 35], "remain": 10, "remaind": [3, 15, 21], "rememb": [3, 10, 14, 16, 18, 21, 28, 32], "remov": [16, 23, 29, 38], "renam": 33, "repeat": [14, 16, 17, 19], "repeatedli": 31, "repetit": [3, 16, 30], "repl": 12, "replac": [0, 16, 24, 28, 35], "repli": 12, "repr": 23, "repres": [17, 21, 23, 31, 32, 33, 34, 35, 37], "represent": [9, 14, 23, 29], "reproduc": 11, "request": [0, 23, 24, 31], "requir": [0, 8, 10, 12, 19, 22, 23, 24, 33], "rerun": 31, "research": [10, 11, 29], "reserv": [3, 34, 35], "reserverd": [34, 36], "reset": 9, "reset_index": 10, "reshap": 31, "reshaped_gam": 31, "resolut": [23, 35], "resourc": [8, 10, 29], "respect": [10, 15, 17, 34, 35], "respons": 10, "rest": 16, "restrict": [3, 14], "result": [0, 3, 8, 10, 12, 14, 15, 16, 17, 19, 21, 23, 24, 26, 27, 29, 30, 31, 32, 34, 35, 37], "retain": 21, "retriev": [21, 33], "retriv": 31, "return": [0, 3, 9, 14, 15, 16, 17, 21, 22, 24, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 38], "retval": 35, "reus": 28, "reusabl": [10, 23, 28], "revers": [16, 32, 38], "reward": 8, "rhetor": 29, "ri": [0, 24], "rich": [9, 11], "ride": 8, "right": [3, 8, 17, 19, 22, 23, 29, 34, 35, 36, 37], "risk": [8, 29], "rivanna": [8, 11, 35], "rn": 17, "rng": 17, "rnn": 21, "road": 28, "robin": 17, "role": 33, "rom": 9, "room": 21, "rot": 39, "rough": 9, "round": 23, "row": [0, 24, 31, 32, 33, 34, 35, 36, 37, 38], "rstudio": 11, "rubi": 10, "rule": [2, 3, 9, 10, 14, 28, 32], "run": [7, 12, 20, 26, 27, 29, 31], "runtimeerror": [0, 24], "runtimewarn": 32, "rval": 37, "s1": 33, "s2": 33, "s3": 33, "s4": [31, 33], "s5": 33, "s6": 33, "sai": [12, 16, 23, 28, 31], "said": 10, "salut": 28, "sam": 17, "samantha": 17, "same": [3, 7, 9, 14, 15, 16, 17, 21, 23, 26, 27, 28, 30, 31, 32, 33, 34, 36], "sampl": 31, "sarah": 17, "satisfi": [0, 24, 32, 36], "save": [9, 14, 16, 23, 28, 31, 32, 34, 36, 38], "saw": [15, 16, 17], "scala": 10, "scalabl": 28, "scalar": [17, 18, 32], "scalat": 28, "scale": 31, "scatter": 35, "scatterplot": 35, "scenario": 32, "scheme": [17, 28, 33], "scienc": [8, 11, 29, 31, 32, 33, 40], "scientif": [11, 31], "scientist": [8, 40], "scikit": [0, 10, 24, 31], "scipi": [0, 24, 31, 38], "score": 32, "scratch": 28, "screen": 12, "script": [10, 12], "seaborn": [9, 10, 33, 34, 35, 36, 37], "seamlessli": 35, "search": [0, 24], "searchin": 19, "second": [14, 16, 17, 23, 26, 27, 28, 30, 32], "secondv": 2, "secong": 16, "section": [8, 9, 26, 27], "see": [2, 8, 10, 14, 15, 16, 17, 18, 19, 21, 23, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36], "seem": [26, 27], "seen": [12, 23, 30], "select": [9, 16, 28, 32, 33, 37], "selector": [34, 36], "self": [0, 9, 23, 24, 35], "semest": [8, 12, 21], "sens": [3, 15], "sensibl": [22, 23], "sensit": [2, 3, 14], "sep": [23, 33], "sepal": [34, 36, 37], "sepal_length": [33, 34, 35, 36, 37, 39], "sepal_volum": [34, 36], "sepal_volume_2": [34, 36], "sepal_volume_3": [34, 36], "sepal_volume_4": 36, "sepal_width": [33, 34, 35, 36, 37], "separ": [9, 10, 14, 17, 28, 29, 32, 33, 35, 37, 38], "seq": 19, "sequenc": [9, 16, 17, 21, 30], "sequenci": 31, "sequenti": [3, 15, 33, 40], "seri": [20, 21, 29, 30, 34, 35], "series_dict": 33, "serv": [8, 31], "server": 11, "servic": 8, "session": [0, 8, 24], "set": [0, 3, 8, 9, 10, 11, 15, 19, 20, 23, 24, 28, 31, 32, 33, 34, 35, 36], "set1": 17, "set2": 17, "set_them": 9, "setosa": [33, 34, 35, 36, 37], "sever": [8, 14, 16, 17, 19, 23, 31], "sex": 33, "shallow": 33, "shape": [0, 10, 24, 31, 32, 34, 35, 36, 38], "share": [9, 11, 21, 33, 37], "sheet": 9, "shell": 12, "shift": 9, "short": [2, 3, 15, 23, 31], "shortchang": 8, "shortcut": [7, 9], "shorter": 21, "shot": 8, "should": [2, 3, 8, 9, 14, 19, 20, 21, 23, 26, 27, 28, 30, 31, 34, 35, 38], "show": [9, 14, 22, 23, 28, 32, 34, 35, 38], "show_arg_expans": 23, "show_entri": 23, "show_result": 23, "show_scop": 23, "shown": [7, 9, 23], "si": 16, "side": [34, 35, 36], "sigma": 31, "sign": 23, "signal": 31, "signatur": 23, "signific": 23, "silenc": [34, 35], "silo": 29, "similar": [10, 16, 17, 31, 33, 34, 35], "similarli": [9, 16, 17, 26, 27, 32, 33, 36, 38], "simpl": [2, 3, 9, 14, 23, 29, 32, 34, 35], "simplest": 33, "simpli": [12, 23, 31], "simplifi": [9, 30, 32, 38], "sin": [0, 24, 32, 38], "sinc": [8, 17, 23, 26, 27, 32, 34, 35], "singl": [3, 14, 15, 16, 17, 19, 23, 29, 31, 32, 33, 34, 35, 36, 38], "sir": 17, "sit": 8, "site": [0, 3, 8, 15, 24, 35], "situat": [21, 26, 27], "six": [0, 24, 31], "size": [10, 23, 31, 32], "skill": [8, 23, 40], "skimag": [0, 24], "skip": [19, 26, 27], "slice": [18, 33, 34, 35], "slice_index": 35, "slice_loc": 35, "slice_obj": 35, "small": [8, 22, 36], "smaller": 30, "smoker": 9, "sn": [9, 10], "snake": 14, "snoopi": 17, "so": [2, 3, 8, 9, 10, 12, 14, 15, 16, 17, 19, 21, 23, 26, 27, 28, 29, 30, 31, 32, 33, 38], "socioeconom": 29, "softwar": [8, 10, 28], "solut": [10, 31], "solv": [10, 30, 31], "some": [3, 7, 8, 9, 10, 12, 14, 15, 16, 20, 23, 28, 29, 31, 32, 34, 35, 36, 38, 40], "somebodi": 23, "someth": [19, 20, 26, 27], "sometim": [2, 8, 19, 26, 27, 31, 32, 36], "soon": [12, 15, 16, 26, 27], "sort": [0, 2, 17, 24, 35], "sort_index": [34, 39], "sort_valu": 34, "sourc": [0, 8, 10, 11, 12, 24, 28], "space": [3, 14, 16, 19, 23, 31, 33, 35], "spain": 19, "speci": [33, 34, 35, 36, 37], "special": [15, 20, 28, 31, 32, 35], "specif": [2, 3, 10, 14, 16, 21, 23, 26, 27, 28, 29, 31, 32, 34, 35, 36, 37, 38], "specifi": [3, 14, 16, 17, 21, 23, 26, 27, 28, 31, 32, 33, 34, 35, 36], "speed": 10, "spell": 10, "spend": 8, "split": [16, 17, 33, 37], "spoken": 8, "spring": 21, "sql": [10, 17, 37], "sqrt": 32, "squar": [22, 23], "square_arg": 22, "st": 21, "stabl": [10, 33, 34, 36, 38], "stack": [31, 37], "stage": 35, "stakehold": 29, "standard": [31, 32, 34, 35, 38], "standard_norm": [31, 32], "start": [2, 3, 8, 9, 14, 15, 16, 17, 19, 21, 23, 28, 30, 31, 32, 33, 35, 38, 40], "start_slic": 35, "startswith": [3, 16], "stat": 33, "state": [9, 10, 28, 31], "statement": [0, 19, 20, 23, 24, 26, 27, 29, 31], "static": 23, "statist": [0, 10, 24, 29], "statsmodel": 10, "std": [0, 24, 32, 34, 35, 38], "stdev": 31, "stdout": 23, "stem": 33, "step": [11, 15, 16, 17, 22, 30, 33, 35], "stick": 28, "still": [14, 28, 35], "stop": [16, 17, 21, 26, 27, 32, 33, 35], "stop_word": 21, "stopiter": 21, "storag": 31, "store": [3, 10, 14, 16, 17, 19, 20, 21, 28, 29, 31, 33, 35], "str": [3, 14, 16, 19, 22, 26, 27, 29, 33], "str_": 31, "straightforward": [32, 36], "stream": 23, "strictli": 28, "string": [2, 14, 17, 19, 20, 22, 23, 26, 27, 29, 31, 33, 34, 35, 36], "string1": [3, 16], "string2": [3, 16], "string_": [31, 38], "string_var": [3, 14, 16], "strip": 16, "strn": 21, "structru": 14, "structur": [2, 3, 8, 10, 14, 15, 16, 21, 28, 31, 32, 33, 36, 40], "student": [8, 32, 40], "studi": [14, 16, 19, 21, 23, 36], "style": [9, 10, 31, 37], "sub": [32, 35], "subarrai": 32, "subclass": 23, "subject": [3, 15, 17], "subject_id": [2, 3], "submit": 11, "subscript": 17, "subset": [3, 31, 34, 35], "substr": 16, "substract": 12, "subtract": [3, 15, 22, 32], "successfulli": 8, "sucess": [26, 27], "sudoku": 31, "sudoku_arrai": 31, "sudoku_gam": 31, "sudoku_solut": 31, "suggest": [16, 23], "suitabl": [10, 23], "sum": [0, 10, 14, 15, 24, 30, 32, 33, 36], "suma": 23, "summar": 21, "summari": [3, 15], "super": 19, "superior": 10, "support": [3, 8, 11, 15, 16, 17, 21, 23, 26, 27, 31, 33], "suppos": 28, "sure": [9, 20, 29, 35], "survei": 10, "survey_data": 10, "switch": [0, 24], "sy": 23, "symbol": [14, 15, 17], "syntax": [2, 3, 10, 15, 21, 26, 27, 28, 29, 34, 36], "syntaxerror": [2, 14, 19, 23, 26, 27, 34, 36], "system": [0, 10, 23, 24, 28, 29, 31], "t": [0, 8, 9, 16, 17, 19, 21, 23, 24, 28, 29, 31, 33, 34, 35, 36], "tab": [9, 19, 33, 34, 35], "tabl": [17, 31, 32, 33, 37], "tabular": [29, 33], "tail": [34, 35], "tailor": [8, 31], "take": [2, 8, 12, 14, 17, 19, 21, 22, 23, 26, 28, 29, 30, 31, 32, 34, 35, 36, 38], "takeabl": 35, "taken": 32, "talk": 28, "tall": 16, "tan": 38, "target": 28, "task": [8, 10, 23, 28, 30], "team": 23, "technic": [8, 23, 31], "techniqu": [8, 10, 30, 40], "technologi": [28, 29], "tell": 19, "templat": 28, "ten": 31, "tendenc": [34, 35], "tensorflow": 10, "term": [10, 14, 23, 31, 33], "termin": [28, 31, 40], "test": [3, 15, 20, 22, 23, 28, 29, 30, 32], "test2": 29, "test3": 29, "test4": 29, "test5": 29, "test6": 29, "text": [7, 9, 11, 16, 22, 23, 29, 33], "textiowrapp": 29, "than": [3, 9, 10, 15, 17, 19, 20, 21, 23, 32, 33, 34, 36], "thei": [3, 10, 11, 14, 15, 17, 19, 21, 23, 28, 29, 31, 33, 34, 35], "them": [2, 8, 9, 11, 12, 14, 15, 16, 17, 19, 21, 23, 26, 27, 28, 31, 32, 33, 34, 35, 36, 37], "theori": 23, "therefor": [3, 15, 17, 19, 23, 26, 27, 28, 33], "thi": [0, 2, 3, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40], "thing": [3, 9, 15, 16, 19, 21, 23, 26, 27, 28, 37], "think": [8, 10, 28, 29, 32, 33], "third": [26, 27, 32], "this_set": 19, "this_tupl": 19, "this_var": 14, "thisvar": 14, "thorugh": 9, "those": [2, 10, 16, 31, 34, 36], "though": [14, 21, 23, 26, 27], "thought": [2, 28], "thr": 23, "thread": 36, "three": [0, 2, 3, 9, 15, 23, 24, 29, 31, 32, 34, 35, 38], "thresh": 23, "threshold": 23, "through": [3, 8, 9, 10, 12, 14, 19, 20, 21, 23, 28, 29, 31, 32, 33, 36], "throughout": [8, 12, 14, 17, 28, 31], "throuhg": 19, "throw": 23, "thu": 9, "tick": 9, "tick_param": 10, "tidyvers": [8, 10], "tifffil": [0, 24], "tild": 32, "time": [8, 9, 10, 17, 21, 23, 26, 27, 28, 29, 31, 32, 38], "tip": 9, "titl": [10, 35], "tmp": [32, 34, 35, 36], "to_byt": 23, "to_fram": 39, "togeth": [14, 28, 29, 34, 36], "tok": 21, "token": 21, "tom": 17, "too": [16, 23, 26, 27, 28, 29, 34, 35], "tool": [8, 11, 31, 33, 40], "toolbar": 9, "top": [31, 32, 34, 35, 38], "topic": [2, 3, 38, 40], "total": [2, 33, 34, 35], "total_bil": 9, "total_volum": [2, 3], "traceback": [0, 3, 14, 16, 17, 19, 21, 23, 24, 26, 27, 28, 35, 39], "track": [19, 21], "traffic": 29, "trail": 16, "transform": [21, 32], "translat": 10, "transpar": 8, "transpos": [0, 24, 31], "treat": [23, 29, 31], "treatment": 35, "tree": [32, 38], "tri": [26, 27], "trim": 9, "tripl": 23, "troubl": 23, "troubleshoot": 8, "true": [0, 3, 9, 10, 14, 15, 16, 17, 19, 21, 22, 23, 24, 26, 27, 28, 31, 32, 33, 34, 35, 36], "truncat": [23, 29], "try": [2, 3, 7, 9, 14, 19, 20, 22, 23, 28, 29, 31, 35], "tup": 35, "tup_metr": 21, "tupl": [2, 14, 16, 18, 19, 20, 23, 26, 27, 31, 32], "tuple0": 17, "tuple1": 17, "turn": [31, 38], "tutori": 8, "two": [2, 3, 8, 9, 10, 14, 15, 16, 17, 22, 23, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40], "txt": [22, 29, 33], "type": [0, 2, 7, 8, 9, 10, 12, 15, 16, 17, 21, 23, 24, 26, 27, 28, 29, 32, 33, 34, 35, 36, 37], "type1": 14, "type2": 14, "type3": 14, "typeerror": [16, 17, 23, 26, 27, 29, 35], "typic": [10, 14, 28, 31, 32, 36], "u": [21, 23, 28, 29], "ufunc": 32, "uhoh": 23, "unboundlocalerror": 23, "uncomfort": 8, "unconnect": 29, "underscor": [2, 3, 14], "understand": [8, 10, 16, 19, 21, 23, 31, 33, 40], "undoubtedli": 8, "unend": 19, "unexpect": [9, 23, 36], "unformat": 9, "unhandl": [26, 27], "uniniti": 31, "union": 17, "uniqu": [3, 14, 17, 28, 34, 35], "unit": 21, "units1": 21, "units2": 21, "univers": 32, "unknown": 8, "unless": [10, 26, 27, 28], "unlik": [10, 28], "unnecessarili": 32, "unord": [17, 19], "unpack": 19, "unprepar": 8, "unspecifi": 23, "unsupport": [17, 26, 27], "unsur": 8, "until": [19, 28, 30], "up": [8, 9, 16, 26, 27, 35, 37], "updat": [3, 15, 17, 23, 29, 32, 33, 38], "upload": [9, 35], "upon": 38, "upper": [16, 21, 28], "uppercas": [14, 21], "uri": [0, 24], "url": 33, "us": [0, 2, 3, 7, 8, 9, 10, 11, 12, 14, 15, 16, 18, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 33, 34, 35, 37, 38, 40], "usa": 19, "usabl": 23, "usag": [9, 33, 34, 35], "user": [0, 9, 11, 24, 31, 33], "user_guid": 33, "userwarn": [34, 36], "usual": [0, 9, 10, 17, 21, 23, 24, 26, 27, 31], "utf": 29, "util": [10, 19], "uva": 8, "v": [9, 19, 21], "v2": [0, 24], "vagu": [26, 27], "val": [19, 20, 21, 22, 23], "valencia": 19, "valid": [23, 33, 34, 35], "vals_greater_than_or_equal_to_threshold": 23, "valu": [0, 2, 3, 14, 15, 19, 20, 21, 22, 24, 28, 29, 31, 33, 37], "valuabl": 11, "value_count": [10, 34, 35, 39], "value_express": 21, "valueerror": [0, 19, 24, 26, 27], "var": [0, 19, 21, 22, 24, 32, 33], "var_float": [3, 14], "var_int": [3, 14], "var_str": [3, 14], "vari": 32, "variabl": [15, 16, 17, 19, 20, 21, 22, 26, 27, 31, 32, 33, 34, 35, 38, 40], "varieti": 10, "variou": [8, 10, 21, 34, 35], "ve": [28, 30, 31], "vector": [31, 32], "verb": [3, 15], "veri": [3, 10, 15, 21, 33, 34, 35, 36, 37], "verifi": [22, 23], "verify_string_length": 22, "versatil": 10, "versicolor": [34, 35, 37], "version": [12, 23, 34, 35, 38], "versionad": [31, 32], "vertic": 37, "via": [11, 31, 34, 36], "video": [9, 10], "view": [3, 14, 32], "virginia": [3, 11, 14], "virginica": [33, 34, 35, 36, 37], "visibl": [9, 23], "visit": 9, "visual": [8, 10, 11, 21, 29, 32, 33, 38], "vital": 8, "vowel": 21, "w": 29, "w3school": [3, 15], "wa": [0, 10, 17, 21, 23, 24, 26, 27, 28, 29, 31, 36], "wahoo": 3, "wai": [2, 3, 8, 9, 10, 14, 15, 16, 17, 19, 21, 23, 26, 27, 28, 29, 31, 32, 33, 35, 36, 38], "wait": [26, 27], "walk": 28, "want": [9, 12, 14, 16, 17, 19, 23, 26, 27, 28, 29, 31, 32, 35, 36], "warn": [16, 28, 34, 35], "wd": 21, "we": [0, 2, 3, 8, 10, 11, 12, 14, 15, 16, 17, 19, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36], "weather": 29, "web": [9, 10, 11], "week": [2, 3, 8, 17], "weight": [28, 32], "welcom": [8, 9], "well": [8, 10, 21, 23, 40], "went": [26, 27], "were": [3, 20, 21, 23, 33], "what": [0, 2, 3, 8, 12, 15, 17, 19, 20, 21, 23, 24, 26, 27, 29, 30, 32, 34, 36], "whatev": [28, 34, 35], "when": [0, 3, 9, 12, 14, 15, 16, 17, 19, 20, 21, 24, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 38], "where": [0, 3, 8, 10, 14, 17, 21, 23, 24, 26, 27, 28, 29, 30, 32, 33], "wherea": [16, 21, 23, 33], "whether": [9, 16, 19, 23, 26, 27, 28, 31, 32], "which": [2, 8, 9, 10, 11, 12, 14, 16, 17, 19, 21, 23, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38], "while": [3, 8, 10, 14, 20, 21, 23, 28, 33, 36], "white": [9, 14], "whitespac": 16, "who": [8, 32], "whole": [9, 16, 28, 32, 35], "whose": [23, 29, 31, 32, 38], "why": [10, 19, 21, 23, 26, 27, 33], "wide": [10, 11, 33], "widget": 9, "wiki": 38, "wikipedia": 38, "wil": 21, "wild": 28, "wise": 32, "wish": [23, 35], "within": [2, 8, 11, 14, 16, 20, 21, 23, 26, 27, 28, 30, 31, 32, 38], "without": [9, 12, 14, 17, 21, 28, 29], "won": 23, "wonder": 12, "woodstock": 17, "woof": 28, "word": [14, 15, 19, 21, 23, 28, 34], "work": [8, 10, 11, 16, 17, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 31, 33, 35, 36, 40], "workflow": 9, "workhors": 38, "world": [3, 12, 31], "worri": [23, 28, 29], "would": [2, 9, 10, 14, 16, 17, 23, 26, 27, 29, 31], "wow": 28, "wp": 9, "wrangl": 29, "wrap": [0, 24], "wrapper": 32, "write": [0, 8, 9, 10, 11, 12, 14, 18, 20, 21, 22, 23, 24, 26, 28, 36, 40], "writelin": 29, "writen": 12, "written": [10, 23, 29, 36], "wrong": [8, 26, 27], "www": [9, 10, 17, 18, 38], "www4": 33, "x": [0, 2, 3, 9, 10, 14, 15, 17, 19, 22, 23, 24, 26, 27, 31, 32, 33, 34, 35, 36, 38], "x1": 23, "xlabel": [10, 35], "xx": [19, 20], "y": [2, 3, 9, 10, 14, 15, 16, 17, 23, 31, 33, 34, 35], "y1": 23, "ye": [17, 28], "year": [26, 27, 28], "yield": [3, 12, 14, 17, 21], "ylabel": [10, 35], "ym": 16, "york": [10, 33], "you": [2, 3, 9, 10, 11, 12, 14, 15, 16, 17, 19, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38], "your": [0, 2, 3, 8, 9, 10, 14, 15, 16, 17, 18, 19, 21, 23, 24, 26, 27, 28, 29, 31, 32, 33, 35], "yourself": [8, 26, 27, 28, 31, 35], "z": [0, 2, 3, 9, 14, 15, 17, 19, 20, 23, 24, 33], "zero": [16, 17, 26, 27, 31, 32, 38], "zero_arrai": [31, 38], "zero_int_arrai": 38, "zerodivisionerror": [26, 27], "zeros_lik": 31, "zip": 23, "\u00f1g": 16, "\u00f1igo": 16}, "titles": ["Installing & Importing Packages", "Getting started", "Metadata", "Metadata", "<no title>", "<no title>", "<no title>", "Practice", "Introduction", "Jupyter Notebooks", "Brief introduction to programming languages", "Tech Stack", "Your first Python program!", "Python (Beginner)", "Variables and data types", "Operators and Expressions", "Strings", "Data Structures", "Data Structures Exercises", "Control Structures", "<no title>", "Iterables and Iterators", "<no title>", "Functions", "Installing & Importing Packages", "Python (Intermediate)", "Errors and Exceptions", "Errors and Exceptions", "Introduction to object-oriented programming (OOP)", "Reading and Writing Files", "<no title>", "NumPy (Part I)", "NumPy (Part II)", "Introduction to Pandas", "PandasII: Exploration", "Pandas: Data Exploration", "PandasIII: Data Manipulation", "Concatenating and Merging", "PREREQUISITES", "<no title>", "Welcome to DS-1002"], "titleterms": {"": 35, "1002": 40, "By": 36, "The": 31, "Will": 8, "access": 35, "ad": [17, 26, 27], "advanc": 36, "aggreg": 37, "alias": [0, 24, 31], "an": 33, "ar": [0, 8, 24], "argument": [2, 23], "arithmet": 15, "arrai": [0, 24, 31, 32, 38], "assign": 36, "attribut": [28, 31, 33], "axi": 33, "basic": [0, 9, 24, 32, 35, 38], "beginn": 13, "best": [26, 27], "block": [26, 27], "boolean": [32, 34, 36], "bracket": 36, "break": 19, "brief": [10, 31], "built": [19, 23], "calcul": [32, 38], "call": 23, "can": 19, "canva": 11, "cell": 9, "check": 16, "class": 28, "clean": 35, "code": 9, "column": [34, 35, 36], "command": 9, "common": 31, "comparison": [3, 15], "compil": 10, "compon": 9, "comprehens": 21, "concat": 37, "concaten": 37, "concept": 38, "condit": 19, "construct": 17, "continu": 19, "control": 19, "convert": [3, 14], "cours": 8, "creat": [23, 31, 33, 36], "current": 19, "d": 40, "data": [0, 3, 10, 14, 17, 18, 24, 31, 33, 34, 35, 36, 38], "datafram": [33, 34, 35], "deal": 34, "default": 23, "defin": 28, "dictionari": [17, 21], "docstr": 23, "document": 9, "drop": [32, 34, 35, 38], "edit": 9, "editor": 9, "elif": 19, "els": [19, 26, 27], "entri": 17, "enumer": 19, "error": [26, 27], "exampl": 21, "except": [26, 27], "excersis": [26, 27, 28, 29], "exercis": [14, 15, 16, 17, 18, 19, 21, 23, 31, 32, 33, 35, 36], "exit": 19, "explor": [34, 35], "express": [3, 15], "fanci": 32, "file": 29, "filter": [34, 36], "final": [26, 27], "first": 12, "format": 16, "frame": 33, "from": 29, "function": [0, 19, 23, 24], "gener": 21, "get": 1, "global": 23, "good": 23, "groupbi": 37, "guidelin": 23, "handl": [26, 27], "how": [8, 33], "i": [14, 16, 28, 31, 33], "id": 3, "ident": 15, "ii": 32, "iloc": [34, 35], "imag": [0, 24], "immut": 16, "import": [0, 24, 31, 33], "imput": 35, "indent": 19, "index": [2, 16, 17, 32, 34, 35, 36], "inherit": 28, "initi": 28, "insert": [32, 38], "inspect": [34, 35], "instal": [0, 24, 31], "intermedi": 25, "interpret": 10, "introduct": [8, 9, 10, 19, 21, 23, 26, 27, 28, 29, 31, 33], "iter": [19, 21], "its": 34, "jupyt": 9, "jupyterlab": 11, "kernel": 9, "keyboard": 9, "keyword": 14, "know": 8, "label": [33, 34, 35], "languag": 10, "learn": 8, "liner": 19, "list": [17, 21], "loc": [34, 35], "local": 23, "logic": [3, 15], "loop": [19, 21], "manipul": [36, 38], "mask": [34, 36], "membership": 16, "menu": 9, "merg": 37, "metadata": [2, 3], "method": [16, 17, 28, 31, 33], "miss": [34, 35], "modal": 9, "mode": [9, 29], "modul": 31, "more": [9, 32], "mous": 9, "multipl": 19, "mutabl": 17, "name": [3, 14, 35], "navig": 9, "ndarrai": 31, "nest": 21, "new": 17, "note": 17, "notebook": 9, "numer": [0, 3, 24], "numpi": [0, 24, 31, 32], "object": [3, 8, 28, 31, 38], "ondemand": 11, "one": 19, "oop": 28, "open": [11, 29], "oper": [3, 15, 16, 17], "orient": 28, "overview": 33, "pack": 23, "packag": [0, 24, 31], "panda": [33, 35], "pandasii": 34, "pandasiii": 36, "paradigm": 10, "paramet": 23, "part": [31, 32], "pass": 23, "pd": 37, "pivot_t": 37, "posit": 35, "practic": [7, 14, 15, 16, 17, 19, 21, 23, 26, 27, 28, 29, 31, 32, 33, 35, 36], "prerequisit": 38, "process": 19, "program": [10, 12, 28], "properti": 33, "python": [12, 13, 25], "quick": 35, "rais": [26, 27], "rang": [17, 21], "read": 29, "remov": [34, 36], "replac": 34, "reserv": 14, "resourc": 9, "restart": 9, "retriev": 17, "return": 23, "run": 9, "runtim": [26, 27], "scienc": 10, "scope": 23, "select": [34, 35], "self": 28, "seri": 33, "set": [17, 21], "slice": [16, 17, 32, 38], "some": [17, 19, 26, 27, 33], "sort": [34, 36], "sort_index": 36, "sort_valu": 36, "sourc": 38, "stack": 11, "start": 1, "stop": 19, "string": [3, 16, 21], "structur": [17, 18, 19, 34, 35], "subset": [16, 36], "succe": 8, "summar": [34, 35], "summari": [17, 37], "tech": 11, "theori": 21, "thi": 8, "tip": 23, "transform": 36, "try": [26, 27], "tupl": [17, 21], "type": [3, 14, 31, 38], "unari": [3, 15], "unpack": 23, "us": [17, 19, 32, 36], "v": 10, "valu": [17, 23, 32, 34, 35, 36, 38], "variabl": [2, 3, 14, 23], "veri": 31, "versu": 23, "visual": 35, "welcom": 40, "what": [14, 16, 28, 31, 33], "when": 23, "while": 19, "whole": 34, "work": 34, "write": [19, 29], "you": 8, "your": [12, 36], "zip": 19}}) \ No newline at end of file