diff --git a/weekly_task_2_M_Hazmi_Cokro_Mandiri.ipynb b/weekly_task_2_M_Hazmi_Cokro_Mandiri.ipynb new file mode 100644 index 0000000..474df49 --- /dev/null +++ b/weekly_task_2_M_Hazmi_Cokro_Mandiri.ipynb @@ -0,0 +1,1025 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "weekly_task_2_M Hazmi Cokro Mandiri.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true, + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "55nDd6qHBX8x" + }, + "source": [ + "# SF Salaries Exercise\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ohprVbEsCPOB" + }, + "source": [ + "Selamat datang di latihan untuk melatih keterampilan pandas Anda! Kami menggunakan Dataset SF Salaries dari [Kaggle!](https://www.kaggle.com/kaggle/sf-salaries) Cukup ikuti dan selesaikan tugas yang diuraikan di bawah ini. Tugas akan semakin sulit seiring berjalannya waktu." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LRcRePdACo9h" + }, + "source": [ + "**import library pandas as pd**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "RiIRhjEl842B" + }, + "source": [ + "import pandas as pd" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vcJ83G_DKIG2", + "outputId": "d993a8ca-7854-4a6e-e06c-6c0497c00eb0" + }, + "source": [ + "from google.colab import drive\r\n", + "drive.mount('/content/drive')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Mounted at /content/drive\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6AulZq_2Kcmn", + "outputId": "2052b19c-095b-47d2-fe3f-41a1a181bbb0" + }, + "source": [ + "%cd '/content/drive/MyDrive/weekly_task_2_M Hazmi Cokro Mandiri'" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/content/drive/MyDrive/weekly_task_2_M Hazmi Cokro Mandiri\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bhNHc4Y0il2D", + "outputId": "37de83ca-3242-49fc-f962-2e74d86d6ac5" + }, + "source": [ + "!unzip 'Lemonade Final - Hazmi Cokro Weekly 2.zip' &> /dev/null && rm *.zip" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Archive: Lemonade Final - Hazmi Cokro Weekly 2.zip\n", + " inflating: Lemonade Final - Hazmi Cokro Weekly 2.xlsx \n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DUofQr2iL7Nj", + "outputId": "7636bb37-86a5-45db-f7a3-e73a0b22565b" + }, + "source": [ + "!unzip Salaries.zip &> /dev/null && rm *.zip" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Archive: Salaries.zip\n", + " inflating: Salaries.csv \n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "siD7Mh5LC6ir" + }, + "source": [ + "**baca file Salaries.csv sebagai dataframe bernama sal.**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "TzJcLfZuC5xC" + }, + "source": [ + "sal=pd.read_csv('/content/drive/MyDrive/weekly_task_2_M Hazmi Cokro Mandiri/Salaries.csv')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZPQNo-f9FNLE" + }, + "source": [ + "**cek head dari DataFrame**" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 479 + }, + "id": "0fG0zqGgDVsy", + "outputId": "eeba04f3-1e50-43fb-a02d-afa4883d1363" + }, + "source": [ + "sal.head(5)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011NaNSan FranciscoNaN
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011NaNSan FranciscoNaN
\n", + "
" + ], + "text/plain": [ + " Id EmployeeName ... Agency Status\n", + "0 1 NATHANIEL FORD ... San Francisco NaN\n", + "1 2 GARY JIMENEZ ... San Francisco NaN\n", + "2 3 ALBERT PARDINI ... San Francisco NaN\n", + "3 4 CHRISTOPHER CHONG ... San Francisco NaN\n", + "4 5 PATRICK GARDNER ... San Francisco NaN\n", + "\n", + "[5 rows x 13 columns]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 9 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XhdtT6arFh94" + }, + "source": [ + "Gunakan .info() untuk mengetahui berapa banyak entri yang ada" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-QTvAf-fFXNY", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "e127591e-f49c-4c6b-aa26-8c7ee7d01c7f" + }, + "source": [ + "sal.info()" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 148654 entries, 0 to 148653\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Id 148654 non-null int64 \n", + " 1 EmployeeName 148654 non-null object \n", + " 2 JobTitle 148654 non-null object \n", + " 3 BasePay 148045 non-null float64\n", + " 4 OvertimePay 148650 non-null float64\n", + " 5 OtherPay 148650 non-null float64\n", + " 6 Benefits 112491 non-null float64\n", + " 7 TotalPay 148654 non-null float64\n", + " 8 TotalPayBenefits 148654 non-null float64\n", + " 9 Year 148654 non-null int64 \n", + " 10 Notes 0 non-null float64\n", + " 11 Agency 148654 non-null object \n", + " 12 Status 0 non-null float64\n", + "dtypes: float64(8), int64(2), object(3)\n", + "memory usage: 14.7+ MB\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4mi5RdU5GpT9" + }, + "source": [ + "**Berapakah rata-rata dari BasePay?**" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "z4FX0iVvFs9i", + "outputId": "59b3e73d-70b0-4267-cf7b-6bb8834a465a" + }, + "source": [ + "sal['BasePay'].mean()" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "66325.44884050643" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 11 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SVPwdt9SG4yv" + }, + "source": [ + "**Berapakah nilai tertinggi dari OvertimePay?**" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "c-8V5PHxGzvW", + "outputId": "5572fa0e-bb21-4820-def3-9b924e5dd542" + }, + "source": [ + "sal['OvertimePay'].max()" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "245131.88" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 12 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rf9GCqp_HUKY" + }, + "source": [ + "**Apa pekerjaan dari JOSEPH DRISCOLL?**\n", + "\n", + "*Catatan: Gunakan huruf besar semua, jika tidak, Anda mungkin mendapatkan jawaban yang tidak cocok (ada juga huruf kecil Joseph Driscoll)*" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JEBNcMwdHKet", + "outputId": "c58a208f-d5b4-4d63-d2ed-23526043dd98" + }, + "source": [ + "sal[sal['EmployeeName']=='JOSEPH DRISCOLL']['JobTitle']" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "24 CAPTAIN, FIRE SUPPRESSION\n", + "Name: JobTitle, dtype: object" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 13 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fjBlDB6aH0Gu" + }, + "source": [ + "**Berapa penghasilan JOSEPH DRISCOLL (termasuk keuntungannya)?**\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gOseiJ2jHt1L", + "outputId": "88e05f8f-00fb-40c1-b5c5-b3b1591afd0c" + }, + "source": [ + "sal[sal['EmployeeName']=='JOSEPH DRISCOLL']['TotalPayBenefits']" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "24 270324.91\n", + "Name: TotalPayBenefits, dtype: float64" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 14 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uaGLsoe8ISUT" + }, + "source": [ + "**Siapa nama orang yang dibayar paling tinggi (termasuk tunjangan)?**" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 168 + }, + "id": "aykjyLytH-2w", + "outputId": "49a586b1-0fa1-4336-cf75-ea1061f26474" + }, + "source": [ + "sal[sal['TotalPayBenefits']==sal['TotalPayBenefits'].max()]" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatustitle_length
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.0400184.25NaN567595.43567595.432011NaNSan FranciscoNaN46
\n", + "
" + ], + "text/plain": [ + " Id EmployeeName ... Status title_length\n", + "0 1 NATHANIEL FORD ... NaN 46\n", + "\n", + "[1 rows x 14 columns]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 33 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zONu75_IJisH" + }, + "source": [ + "**Siapa nama orang yang dibayar paling rendah (termasuk tunjangan)? Apakah Anda memperhatikan sesuatu yang aneh tentang berapa banyak dia dibayar?**" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 134 + }, + "id": "zpKxDBTAJIus", + "outputId": "0c565d20-7f14-46e5-d22e-0eadf1c56c37" + }, + "source": [ + "sal[sal['TotalPayBenefits']==sal['TotalPayBenefits'].min()]" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatustitle_length
148653148654Joe LopezCounselor, Log Cabin Ranch0.00.0-618.130.0-618.13-618.132014NaNSan FranciscoNaN26
\n", + "
" + ], + "text/plain": [ + " Id EmployeeName ... Status title_length\n", + "148653 148654 Joe Lopez ... NaN 26\n", + "\n", + "[1 rows x 14 columns]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 34 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4Qd_45phJ3Eh" + }, + "source": [ + "**Berapa rata-rata BasePay dari semua karyawan per tahun?**" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "iN_Cqb7pJpYf", + "outputId": "83c3e780-47ad-429a-f37b-e157a3aa5ce1" + }, + "source": [ + "sal.groupby('Year').mean()['BasePay']" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Year\n", + "2011 63595.956517\n", + "2012 65436.406857\n", + "2013 69630.030216\n", + "2014 66564.421924\n", + "Name: BasePay, dtype: float64" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 29 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L2VqNnXeKVjr" + }, + "source": [ + "**Ada berapa banyak job titles?**\n", + "\n", + "*Catatan : Unique job titles*\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "T5lfSPocKDn8", + "outputId": "d7befdb2-8dc8-454b-e125-f251c811e714" + }, + "source": [ + "sal['JobTitle'].nunique()" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2159" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 19 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bH66DQQELjca" + }, + "source": [ + "**Apa 5 pekerjaan paling umum teratas?**" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "41TtWCy7LbLy", + "outputId": "3c36bd41-2183-4971-e417-9bcbd3af39d5" + }, + "source": [ + "sal['JobTitle'].value_counts().head(5)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Transit Operator 7036\n", + "Special Nurse 4389\n", + "Registered Nurse 3736\n", + "Public Svc Aide-Public Works 2518\n", + "Police Officer 3 2421\n", + "Name: JobTitle, dtype: int64" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 20 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FoHXuxO2L8uS" + }, + "source": [ + "**Berapa Pekerjaan yang diwakili oleh hanya satu orang pada tahun 2013?**\n", + "\n", + "*CATATAN: Pekerjaan dengan hanya satu kemunculan di 2013*" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "CRWDIanKLq4j", + "outputId": "cadea424-e4a5-4757-f712-0489ebc6592f" + }, + "source": [ + "sum(sal[sal['Year']==2013]['JobTitle'].value_counts()==1)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "202" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 21 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "n5R3eBaNMXLk" + }, + "source": [ + "**Berapa banyak orang yang memiliki kata Chief dalam jabatannya?**" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-DC5CnOJMd82", + "outputId": "0a165661-82f3-4619-c9e8-ff26a9059408" + }, + "source": [ + "def kata_chief(kc):\r\n", + " if 'chief' in kc.lower():\r\n", + " return True\r\n", + " else:\r\n", + " return False\r\n", + "\r\n", + "total_Chief = sum(sal['JobTitle'].apply(lambda kc: kata_chief(kc)))\r\n", + "print(f\"Jumlah Chief = {total_Chief}\")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Jumlah Chief = 627\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yGrZ6kokM2gO" + }, + "source": [ + "**Bonus: Apakah ada korelasi antara panjang string Pekerjaan dan Gaji?**" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 111 + }, + "id": "bhil_hLJOFHn", + "outputId": "95beb540-3763-4aae-db84-fcfc4e5c0897" + }, + "source": [ + "sal['title_length']=sal['JobTitle'].apply(len)\r\n", + "sal[['TotalPayBenefits','title_length']].corr()" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TotalPayBenefitstitle_length
TotalPayBenefits1.000000-0.036878
title_length-0.0368781.000000
\n", + "
" + ], + "text/plain": [ + " TotalPayBenefits title_length\n", + "TotalPayBenefits 1.000000 -0.036878\n", + "title_length -0.036878 1.000000" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 24 + } + ] + } + ] +} \ No newline at end of file