From 18b249f9f4fcf311d71d8ac80ce1e8d5671d865b Mon Sep 17 00:00:00 2001 From: Gracechung-sw Date: Wed, 26 Jun 2024 17:10:20 +0900 Subject: [PATCH] add code about clarify fairness bias and explanability of sagemaker --- .../clip07_fairness_bias_clarify.ipynb | 1067 +++++++++++++++++ 1 file changed, 1067 insertions(+) create mode 100644 aws-sagemaker/clarify_fairness_explainability/clip07_fairness_bias_clarify.ipynb diff --git a/aws-sagemaker/clarify_fairness_explainability/clip07_fairness_bias_clarify.ipynb b/aws-sagemaker/clarify_fairness_explainability/clip07_fairness_bias_clarify.ipynb new file mode 100644 index 0000000..c240069 --- /dev/null +++ b/aws-sagemaker/clarify_fairness_explainability/clip07_fairness_bias_clarify.ipynb @@ -0,0 +1,1067 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "5a5cce68-132e-459a-8b5c-5e16d2d4607f", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "!{sys.executable} -m pip uninstall -y sagemaker\n", + "!{sys.executable} -m pip install --upgrade pip\n", + "!{sys.executable} -m pip install --upgrade boto3 --no-cache-dir\n", + "!{sys.executable} -m pip install --upgrade sagemaker==2.123.0 --no-cache-dir" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "713eba58-5769-44f4-bc0e-dda7ebdce879", + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker import Session\n", + "from sagemaker import get_execution_role\n", + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "import boto3\n", + "from datetime import datetime\n", + "\n", + "\n", + "session = Session()\n", + "default_bucket = session.default_bucket()\n", + "default_prefix = \"sagemaker/fastcampus-sagemaker-clarify-bias-practice\"\n", + "region = session.boto_region_name\n", + "\n", + "role = get_execution_role()\n", + "s3_client = boto3.client(\"s3\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "1463073d-f37d-4f07-9d44-238184ca998c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "us-east-2\n", + "arn:aws:iam::767397847434:role/service-role/AmazonSageMaker-ExecutionRole-20240111T201222\n" + ] + } + ], + "source": [ + "print(session)\n", + "print(region)\n", + "print(role)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "fb5ff674-2e9f-489e-af8c-1a705bacc8fa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "adult.data saved!\n", + "adult.test saved!\n" + ] + } + ], + "source": [ + "adult_columns = [\n", + " \"Age\",\n", + " \"Workclass\",\n", + " \"fnlwgt\",\n", + " \"Education\",\n", + " \"Education-Num\",\n", + " \"Marital Status\",\n", + " \"Occupation\",\n", + " \"Relationship\",\n", + " \"Ethnic group\",\n", + " \"Sex\",\n", + " \"Capital Gain\",\n", + " \"Capital Loss\",\n", + " \"Hours per week\",\n", + " \"Country\",\n", + " \"Target\",\n", + "]\n", + "if not os.path.isfile(\"adult.data\"):\n", + " s3_client.download_file(\n", + " f\"sagemaker-example-files-prod-{session.boto_region_name}\",\n", + " \"datasets/tabular/uci_adult/adult.data\",\n", + " \"adult.data\",\n", + " )\n", + " print(\"adult.data saved!\")\n", + "else:\n", + " print(\"adult.data already on disk.\")\n", + "\n", + "if not os.path.isfile(\"adult.test\"):\n", + " s3_client.download_file(\n", + " f\"sagemaker-example-files-prod-{session.boto_region_name}\",\n", + " \"datasets/tabular/uci_adult/adult.test\",\n", + " \"adult.test\",\n", + " )\n", + " print(\"adult.test saved!\")\n", + "else:\n", + " print(\"adult.test already on disk.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d6e8d2da-2fdb-4548-a44e-7e653f1be0e8", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from sagemaker.session import Session\n", + "from sagemaker import get_execution_role\n", + "from sagemaker.experiments.run import Run\n", + "from sagemaker.utils import unique_name_from_base\n", + "\n", + "role = get_execution_role()\n", + "sagemaker_session = Session()\n", + "\n", + "experiment_name = \"fc-bias-fairness-clarify-practice-{}\".format(\n", + " datetime.now().strftime(\"%d-%m-%Y-%H-%M-%S\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "73a85060-e223-4ba3-9cb8-8cc058950b82", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeWorkclassfnlwgtEducationEducation-NumMarital StatusOccupationRelationshipEthnic groupSexCapital GainCapital LossHours per weekCountryTarget
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50K
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50K
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50K
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50K
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50K
\n", + "
" + ], + "text/plain": [ + " Age Workclass fnlwgt Education Education-Num \\\n", + "0 39 State-gov 77516 Bachelors 13 \n", + "1 50 Self-emp-not-inc 83311 Bachelors 13 \n", + "2 38 Private 215646 HS-grad 9 \n", + "3 53 Private 234721 11th 7 \n", + "4 28 Private 338409 Bachelors 13 \n", + "\n", + " Marital Status Occupation Relationship Ethnic group Sex \\\n", + "0 Never-married Adm-clerical Not-in-family White Male \n", + "1 Married-civ-spouse Exec-managerial Husband White Male \n", + "2 Divorced Handlers-cleaners Not-in-family White Male \n", + "3 Married-civ-spouse Handlers-cleaners Husband Black Male \n", + "4 Married-civ-spouse Prof-specialty Wife Black Female \n", + "\n", + " Capital Gain Capital Loss Hours per week Country Target \n", + "0 2174 0 40 United-States <=50K \n", + "1 0 0 13 United-States <=50K \n", + "2 0 0 40 United-States <=50K \n", + "3 0 0 40 United-States <=50K \n", + "4 0 0 40 Cuba <=50K " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "training_data = pd.read_csv(\n", + " \"adult.data\", names=adult_columns, sep=r\"\\s*,\\s*\", engine=\"python\", na_values=\"?\"\n", + ").dropna()\n", + "\n", + "testing_data = pd.read_csv(\n", + " \"adult.test\", names=adult_columns, sep=r\"\\s*,\\s*\", engine=\"python\", na_values=\"?\", skiprows=1\n", + ").dropna()\n", + "\n", + "training_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cc962d4a-8d59-4d17-9768-54398fcdc84d", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import preprocessing\n", + "\n", + "\n", + "def number_encode_features(df):\n", + " result = df.copy()\n", + " encoders = {}\n", + " for column in result.columns:\n", + " if result.dtypes[column] == object:\n", + " encoders[column] = preprocessing.LabelEncoder()\n", + " result[column] = encoders[column].fit_transform(\n", + " result[column].fillna(\"None\"))\n", + " return result, encoders\n", + "\n", + "\n", + "training_data = pd.concat(\n", + " [training_data[\"Target\"], training_data.drop([\"Target\"], axis=1)], axis=1)\n", + "training_data, _ = number_encode_features(training_data)\n", + "training_data.to_csv(\"train_data.csv\", index=False, header=False)\n", + "\n", + "testing_data, _ = number_encode_features(testing_data)\n", + "test_features = testing_data.drop([\"Target\"], axis=1)\n", + "test_target = testing_data[\"Target\"]\n", + "test_features.to_csv(\"test_features.csv\", index=False, header=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "bd13da22-a648-442e-bd5b-3d79bdf1b00a", + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.s3 import S3Uploader\n", + "from sagemaker.inputs import TrainingInput\n", + "\n", + "train_uri = S3Uploader.upload(\n", + " \"train_data.csv\", \"s3://{}/{}\".format(default_bucket, default_prefix))\n", + "train_input = TrainingInput(train_uri, content_type=\"text/csv\")\n", + "test_uri = S3Uploader.upload(\n", + " \"test_features.csv\", \"s3://{}/{}\".format(default_bucket, default_prefix)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a9a1243b-9038-45c6-9416-126b832e241a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.\n" + ] + }, + { + "data": { + "text/plain": [ + "'257758044811.dkr.ecr.us-east-2.amazonaws.com/sagemaker-xgboost:1.2-1'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Estimator 기반의 XGBoost 모델 생성\n", + "\n", + "from sagemaker.image_uris import retrieve\n", + "from sagemaker.estimator import Estimator\n", + "\n", + "# Creates an XGBoost estimator based on the provided region.\n", + "# region (str): The AWS region to retrieve the XGBoost container for.\n", + "container = retrieve(\"xgboost\", region, version=\"1.2-1\")\n", + "container # '257758044811.dkr.ecr.us-east-2.amazonaws.com/sagemaker-xgboost:1.2-1' 이렇게 ECR 에서 xgboost SageMaker 이미지를 가져옴" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "1e87f62b-b3da-44ad-8b52-84c69f81b40a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.\n", + "INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2024-01-17-10-09-29-060\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "2024-01-17 10:09:29 Starting - Starting the training job..\n", + "2024-01-17 10:09:44 Starting - Preparing the instances for training...........\n", + "2024-01-17 10:10:45 Downloading - Downloading input data.....\n", + "2024-01-17 10:11:15 Downloading - Downloading the training image.....\n", + "2024-01-17 10:11:40 Training - Training image download completed. Training in progress.....\n", + "2024-01-17 10:12:06 Uploading - Uploading generated training model..\n", + "2024-01-17 10:12:22 Completed - Training job completed\n" + ] + } + ], + "source": [ + "container = retrieve(\"xgboost\", region, version=\"1.2-1\")\n", + "\n", + "xgb = Estimator(\n", + " container,\n", + " role,\n", + " instance_count=1,\n", + " instance_type=\"ml.m5.xlarge\",\n", + " disable_profiler=True,\n", + " sagemaker_session=session,\n", + ")\n", + "\n", + "# Hyperparameters 설정\n", + "xgb.set_hyperparameters(\n", + " max_depth=5,\n", + " eta=0.2,\n", + " gamma=4,\n", + " min_child_weight=6,\n", + " subsample=0.8,\n", + " objective=\"binary:logistic\",\n", + " num_round=800,\n", + ")\n", + "\n", + "with Run(\n", + " experiment_name=experiment_name,\n", + " # create a experiment run with only the model training on it\n", + " run_name=\"Lecture-model-train-only\",\n", + " sagemaker_session=sagemaker_session,\n", + ") as run:\n", + " xgb.fit({\"train\": train_input}, logs=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "d17e0f65-21e4-4510-87ee-9987aa812cde", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:sagemaker:Creating model with name: Lecture-DEMO-clarify-bias-model-17-01-2024-10-12-26\n" + ] + }, + { + "data": { + "text/plain": [ + "'Lecture-DEMO-clarify-bias-model-17-01-2024-10-12-26'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "# model 생성하고, SageMaker에 저장\n", + "model_name = \"Lecture-DEMO-clarify-bias-model-{}\".format(\n", + " datetime.now().strftime(\"%d-%m-%Y-%H-%M-%S\"))\n", + "model = xgb.create_model(name=model_name)\n", + "container_def = model.prepare_container_def()\n", + "session.create_model(model_name, role, container_def)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "0ebbaa3b-5002-44be-969a-86177809900a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: 1.0.\n", + "INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.\n" + ] + } + ], + "source": [ + "# Clarify 정의해서 Fairness와 Explainability 분석\n", + "from sagemaker import clarify\n", + "\n", + "clarify_processor = clarify.SageMakerClarifyProcessor(\n", + " role=role,\n", + " instance_count=1,\n", + " instance_type=\"ml.m5.xlarge\",\n", + " sagemaker_session=session\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "6e7b68e7-21b1-4990-9bce-0686da69eb1c", + "metadata": {}, + "outputs": [], + "source": [ + "bias_report_output_path = \"s3://{}/{}/clarify-bias\".format(\n", + " default_bucket, default_prefix)\n", + "\n", + "# DataConfig: 어떤 데이터를 써서 Fairness를 판별할지 정의\n", + "bias_data_config = clarify.DataConfig(\n", + " s3_data_input_path=train_uri,\n", + " s3_output_path=bias_report_output_path,\n", + " label=\"Target\",\n", + " headers=training_data.columns.to_list(),\n", + " dataset_type=\"text/csv\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "746a05f1-6bf6-41e5-9af3-80fc3267234f", + "metadata": {}, + "outputs": [], + "source": [ + "# ModelConfig: 어떤 모델의 Fairness를 판단할지 정의\n", + "model_config = clarify.ModelConfig(\n", + " model_name=model_name,\n", + " instance_type=\"ml.m5.xlarge\",\n", + " instance_count=1,\n", + " accept_type=\"text/csv\",\n", + " content_type=\"text/csv\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "1b97f08b-617f-4be3-a84d-94729d6d935c", + "metadata": {}, + "outputs": [], + "source": [ + "# Prediction을 하는데, 어느 값을 기준으로 probability를 0 or 1로 볼지 정의\n", + "predictions_config = clarify.ModelPredictedLabelConfig(\n", + " probability_threshold=0.75)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "45a152c7-b83d-43d9-8b2c-92baebacd885", + "metadata": {}, + "outputs": [], + "source": [ + "# BiasConfig: Bias는 pre-training bias(학습 전 데이터만으로 bias를 판별하는 것), post-training bias(training후 모델의 추론 결과를 통해 bias를 측정하는 것) 를 판별할 수 있는데,\n", + "\n", + "\n", + "bias_config = clarify.BiasConfig(\n", + " label_values_or_threshold=[1],\n", + " facet_name=\"Sex\",\n", + " facet_values_or_threshold=[0],\n", + " group_name=\"Age\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "003e6676-c8af-4ef1-b2c7-57fbce292f52", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:sagemaker.clarify:Analysis Config: {'dataset_type': 'text/csv', 'headers': ['Target', 'Age', 'Workclass', 'fnlwgt', 'Education', 'Education-Num', 'Marital Status', 'Occupation', 'Relationship', 'Ethnic group', 'Sex', 'Capital Gain', 'Capital Loss', 'Hours per week', 'Country'], 'label': 'Target', 'label_values_or_threshold': [1], 'facet': [{'name_or_index': 'Sex', 'value_or_threshold': [0]}], 'group_variable': 'Age', 'methods': {'report': {'name': 'report', 'title': 'Analysis Report'}, 'pre_training_bias': {'methods': 'all'}, 'post_training_bias': {'methods': 'all'}}, 'predictor': {'model_name': 'Lecture-DEMO-clarify-bias-model-17-01-2024-10-12-26', 'instance_type': 'ml.m5.xlarge', 'initial_instance_count': 1, 'accept_type': 'text/csv', 'content_type': 'text/csv'}, 'probability_threshold': 0.75}\n", + "INFO:sagemaker:Creating processing-job with name Clarify-Bias-2024-01-17-10-18-43-197\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Job Name: Clarify-Bias-2024-01-17-10-18-43-197\n", + "Inputs: [{'InputName': 'dataset', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-2-767397847434/sagemaker/fastcampus-sagemaker-clarify-bias-practice/train_data.csv', 'LocalPath': '/opt/ml/processing/input/data', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'analysis_config', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-2-767397847434/sagemaker/fastcampus-sagemaker-clarify-bias-practice/clarify-bias/analysis_config.json', 'LocalPath': '/opt/ml/processing/input/config', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]\n", + "Outputs: [{'OutputName': 'analysis_result', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-us-east-2-767397847434/sagemaker/fastcampus-sagemaker-clarify-bias-practice/clarify-bias', 'LocalPath': '/opt/ml/processing/output', 'S3UploadMode': 'EndOfJob'}}]\n", + "..............................\u001b[34m2024-01-17 10:23:34,743 logging.conf not found when configuring logging, using default logging configuration.\u001b[0m\n", + "\u001b[34m2024-01-17 10:23:34,743 Starting SageMaker Clarify Processing job\u001b[0m\n", + "\u001b[34m2024-01-17 10:23:34,744 Analysis config path: /opt/ml/processing/input/config/analysis_config.json\u001b[0m\n", + "\u001b[34m2024-01-17 10:23:34,744 Analysis result path: /opt/ml/processing/output\u001b[0m\n", + "\u001b[34m2024-01-17 10:23:34,745 This host is algo-1.\u001b[0m\n", + "\u001b[34m2024-01-17 10:23:34,745 This host is the leader.\u001b[0m\n", + "\u001b[34m2024-01-17 10:23:34,745 Number of hosts in the cluster is 1.\u001b[0m\n", + "\u001b[34m2024-01-17 10:23:35,002 Running Python / Pandas based analyzer.\u001b[0m\n", + "\u001b[34m2024-01-17 10:23:35,002 Dataset type: text/csv uri: /opt/ml/processing/input/data\u001b[0m\n", + "\u001b[34m2024-01-17 10:23:35,012 Loading dataset...\u001b[0m\n", + "\u001b[34m/usr/local/lib/python3.9/site-packages/analyzer/data_loading/csv_data_loader.py:336: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " df = df.append(df_tmp, ignore_index=True)\u001b[0m\n", + "\u001b[34m2024-01-17 10:23:35,047 Loaded dataset. Dataset info:\u001b[0m\n", + "\u001b[34m\u001b[0m\n", + "\u001b[34mRangeIndex: 30162 entries, 0 to 30161\u001b[0m\n", + "\u001b[34mData columns (total 14 columns):\n", + " # Column Non-Null Count Dtype\u001b[0m\n", + "\u001b[34m--- ------ -------------- -----\n", + " 0 Age 30162 non-null int64\n", + " 1 Workclass 30162 non-null int64\n", + " 2 fnlwgt 30162 non-null int64\n", + " 3 Education 30162 non-null int64\n", + " 4 Education-Num 30162 non-null int64\n", + " 5 Marital Status 30162 non-null int64\n", + " 6 Occupation 30162 non-null int64\n", + " 7 Relationship 30162 non-null int64\n", + " 8 Ethnic group 30162 non-null int64\n", + " 9 Sex 30162 non-null int64\n", + " 10 Capital Gain 30162 non-null int64\n", + " 11 Capital Loss 30162 non-null int64\n", + " 12 Hours per week 30162 non-null int64\n", + " 13 Country 30162 non-null int64\u001b[0m\n", + "\u001b[34mdtypes: int64(14)\u001b[0m\n", + "\u001b[34mmemory usage: 3.2 MB\u001b[0m\n", + "\u001b[34m2024-01-17 10:23:35,137 Spinning up shadow endpoint\u001b[0m\n", + "\u001b[34m2024-01-17 10:23:35,137 Creating endpoint-config with name sm-clarify-config-1705487015-d3ea\u001b[0m\n", + "\u001b[34m2024-01-17 10:23:35,501 Creating endpoint: 'sm-clarify-Lecture-DEMO-clarify-bias-model-17-0-1705487015-4ff9'\u001b[0m\n", + "\u001b[34m2024-01-17 10:23:35,559 No endpoints ruleset found for service sagemaker-internal, falling back to legacy endpoint routing.\u001b[0m\n", + "\u001b[34m2024-01-17 10:23:35,988 Using endpoint name: sm-clarify-Lecture-DEMO-clarify-bias-model-17-0-1705487015-4ff9\u001b[0m\n", + "\u001b[34m2024-01-17 10:23:35,988 Waiting for endpoint ...\u001b[0m\n", + "\u001b[34m2024-01-17 10:23:35,988 Checking endpoint status:\u001b[0m\n", + "\u001b[34mLegend:\u001b[0m\n", + "\u001b[34m(OutOfService: x, Creating: -, Updating: -, InService: !, RollingBack: <, Deleting: o, Failed: *)\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:36,488 Endpoint is in service after 180 seconds\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:36,488 Endpoint ready.\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:36,488 ======================================\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:36,488 Calculating post-training bias metrics\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:36,488 ======================================\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:36,488 Getting predictions from the endpoint\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:38,991 We assume a prediction above 0.750 indicates 1 and below or equal indicates 0.\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:38,991 Column Target with data uniqueness fraction 6.630860022544923e-05 is classifed as a CATEGORICAL column\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:38,994 Column Sex with data uniqueness fraction 6.630860022544923e-05 is classifed as a CATEGORICAL column\u001b[0m\n", + "\u001b[34m/usr/local/lib/python3.9/site-packages/smclarify/bias/report.py:591: FutureWarning: In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.\n", + " df = df.drop(facet_column.name, 1)\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:38,997 Column Target with data uniqueness fraction 6.630860022544923e-05 is classifed as a CATEGORICAL column\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:38,999 Column None with data uniqueness fraction 6.630860022544923e-05 is classifed as a CATEGORICAL column\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,070 Calculated global analysis with predictor\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,070 Stop using endpoint: sm-clarify-Lecture-DEMO-clarify-bias-model-17-0-1705487015-4ff9\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,070 Deleting endpoint configuration with name: sm-clarify-config-1705487015-d3ea\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,208 Deleting endpoint with name: sm-clarify-Lecture-DEMO-clarify-bias-model-17-0-1705487015-4ff9\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,315 Model endpoint delivered 0.55831 requests per second and a total of 2 requests over 4 seconds\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,315 =====================================\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,315 Calculating pre-training bias metrics\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,315 =====================================\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,316 Column Target with data uniqueness fraction 6.630860022544923e-05 is classifed as a CATEGORICAL column\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,318 Column Sex with data uniqueness fraction 6.630860022544923e-05 is classifed as a CATEGORICAL column\u001b[0m\n", + "\u001b[34m/usr/local/lib/python3.9/site-packages/smclarify/bias/report.py:591: FutureWarning: In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.\n", + " df = df.drop(facet_column.name, 1)\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,321 Column Target with data uniqueness fraction 6.630860022544923e-05 is classifed as a CATEGORICAL column\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,569 ======================================\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,569 Calculating bias statistics for report\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,569 ======================================\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,569 Column Target with data uniqueness fraction 6.630860022544923e-05 is classifed as a CATEGORICAL column\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,571 Column Sex with data uniqueness fraction 6.630860022544923e-05 is classifed as a CATEGORICAL column\u001b[0m\n", + "\u001b[34m/usr/local/lib/python3.9/site-packages/smclarify/bias/report.py:591: FutureWarning: In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.\n", + " df = df.drop(facet_column.name, 1)\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,574 Column Target with data uniqueness fraction 6.630860022544923e-05 is classifed as a CATEGORICAL column\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,576 Column None with data uniqueness fraction 6.630860022544923e-05 is classifed as a CATEGORICAL column\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,584 Column Target with data uniqueness fraction 6.630860022544923e-05 is classifed as a CATEGORICAL column\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,586 Column None with data uniqueness fraction 6.630860022544923e-05 is classifed as a CATEGORICAL column\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:40,601 Converting Pandas DataFrame to SparkDataFrame for computing report metadata\u001b[0m\n", + "\u001b[34m--!10:26:42.247 [main] WARN o.a.hadoop.util.NativeCodeLoader - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\u001b[0m\n", + "\u001b[34m#015[Stage 0:> (0 + 4) / 4]#015#015 #015#015[Stage 3:> (0 + 4) / 4]#015#015 #0152024-01-17 10:26:54,103 Calculated global analysis without predictor\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:54,103 Stop using endpoint: None\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:56,379 ['jupyter', 'nbconvert', '--to', 'html', '--output', '/opt/ml/processing/output/report.html', '/opt/ml/processing/output/report.ipynb', '--template', 'sagemaker-xai']\u001b[0m\n", + "\u001b[34m[NbConvertApp] Converting notebook /opt/ml/processing/output/report.ipynb to html\u001b[0m\n", + "\u001b[34m[NbConvertApp] Writing 823004 bytes to /opt/ml/processing/output/report.html\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:57,456 ['wkhtmltopdf', '-q', '--enable-local-file-access', '/opt/ml/processing/output/report.html', '/opt/ml/processing/output/report.pdf']\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:58,555 Collected analyses: \u001b[0m\n", + "\u001b[34m{\n", + " \"version\": \"1.0\",\n", + " \"post_training_bias_metrics\": {\n", + " \"label\": \"Target\",\n", + " \"facets\": {\n", + " \"Sex\": [\n", + " {\n", + " \"value_or_threshold\": \"0\",\n", + " \"metrics\": [\n", + " {\n", + " \"name\": \"AD\",\n", + " \"description\": \"Accuracy Difference (AD)\",\n", + " \"value\": -0.10472421457047243\n", + " },\n", + " {\n", + " \"name\": \"CDDPL\",\n", + " \"description\": \"Conditional Demographic Disparity in Predicted Labels (CDDPL)\",\n", + " \"value\": 0.17524085448088114\n", + " },\n", + " {\n", + " \"name\": \"DAR\",\n", + " \"description\": \"Difference in Acceptance Rates (DAR)\",\n", + " \"value\": -0.004519732836182344\n", + " },\n", + " {\n", + " \"name\": \"DCA\",\n", + " \"description\": \"Difference in Conditional Acceptance (DCA)\",\n", + " \"value\": -0.15231911077048244\n", + " },\n", + " {\n", + " \"name\": \"DCR\",\n", + " \"description\": \"Difference in Conditional Rejection (DCR)\",\n", + " \"value\": 0.12028471242426919\n", + " },\n", + " {\n", + " \"name\": \"DI\",\n", + " \"description\": \"Disparate Impact (DI)\",\n", + " \"value\": 0.33622152955999146\n", + " },\n", + " {\n", + " \"name\": \"DPPL\",\n", + " \"description\": \"Difference in Positive Proportions in Predicted Labels (DPPL)\",\n", + " \"value\": 0.10575508800386199\n", + " },\n", + " {\n", + " \"name\": \"DRR\",\n", + " \"description\": \"Difference in Rejection Rates (DRR)\",\n", + " \"value\": 0.12672179028324737\n", + " },\n", + " {\n", + " \"name\": \"FT\",\n", + " \"description\": \"Flip Test (FT)\",\n", + " \"value\": -0.0014312001635657329\n", + " },\n", + " {\n", + " \"name\": \"GE\",\n", + " \"description\": \"Generalized Entropy (GE)\",\n", + " \"value\": 0.07818223296686777\n", + " },\n", + " {\n", + " \"name\": \"RD\",\n", + " \"description\": \"Recall Difference (RD)\",\n", + " \"value\": 0.0326136839121573\n", + " },\n", + " {\n", + " \"name\": \"SD\",\n", + " \"description\": \"Specificity Difference (SD)\",\n", + " \"value\": 0.008260569337440238\n", + " },\n", + " {\n", + " \"name\": \"TE\",\n", + " \"description\": \"Treatment Equality (TE)\",\n", + " \"value\": 5.872968091511137\n", + " }\n", + " ]\n", + " }\n", + " ]\n", + " },\n", + " \"label_value_or_threshold\": \"1\"\n", + " },\n", + " \"pre_training_bias_metrics\": {\n", + " \"label\": \"Target\",\n", + " \"facets\": {\n", + " \"Sex\": [\n", + " {\n", + " \"value_or_threshold\": \"0\",\n", + " \"metrics\": [\n", + " {\n", + " \"name\": \"CDDL\",\n", + " \"description\": \"Conditional Demographic Disparity in Labels (CDDL)\",\n", + " \"value\": 0.214915908649356\n", + " },\n", + " {\n", + " \"name\": \"CI\",\n", + " \"description\": \"Class Imbalance (CI)\",\n", + " \"value\": 0.3513692725946555\n", + " },\n", + " {\n", + " \"name\": \"DPL\",\n", + " \"description\": \"Difference in Positive Proportions in Labels (DPL)\",\n", + " \"value\": 0.20015891077100018\n", + " },\n", + " {\n", + " \"name\": \"JS\",\n", + " \"description\": \"Jensen-Shannon Divergence (JS)\",\n", + " \"value\": 0.03075614465977302\n", + " },\n", + " {\n", + " \"name\": \"KL\",\n", + " \"description\": \"Kullback-Liebler Divergence (KL)\",\n", + " \"value\": 0.14306865156306428\n", + " },\n", + " {\n", + " \"name\": \"KS\",\n", + " \"description\": \"Kolmogorov-Smirnov Distance (KS)\",\n", + " \"value\": 0.20015891077100018\n", + " },\n", + " {\n", + " \"name\": \"LP\",\n", + " \"description\": \"L-p Norm (LP)\",\n", + " \"value\": 0.2830674462421746\n", + " },\n", + " {\n", + " \"name\": \"TVD\",\n", + " \"description\": \"Total Variation Distance (TVD)\",\n", + " \"value\": 0.20015891077100015\n", + " }\n", + " ]\n", + " }\n", + " ]\n", + " },\n", + " \"label_value_or_threshold\": \"1\"\n", + " }\u001b[0m\n", + "\u001b[34m}\u001b[0m\n", + "\u001b[34m2024-01-17 10:26:58,556 exit_message: Completed: SageMaker XAI Analyzer ran successfully\u001b[0m\n", + "\n" + ] + } + ], + "source": [ + "with Run(\n", + " experiment_name=experiment_name,\n", + " run_name=\"bias-only\", # create a experiment run with only the bias analysis on it\n", + " sagemaker_session=sagemaker_session,\n", + ") as run:\n", + " clarify_processor.run_bias(\n", + " data_config=bias_data_config,\n", + " bias_config=bias_config,\n", + " model_config=model_config,\n", + " model_predicted_label_config=predictions_config,\n", + " pre_training_methods=\"all\",\n", + " post_training_methods=\"all\",\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "47890439-ca47-4fea-99ae-501ede19aa3b", + "metadata": {}, + "outputs": [], + "source": [ + "shap_config = clarify.SHAPConfig(\n", + " baseline=[test_features.iloc[0].values.tolist()],\n", + " num_samples=15,\n", + " agg_method=\"mean_abs\",\n", + " save_local_shap_values=True,\n", + ")\n", + "\n", + "explainability_output_path = \"s3://{}/{}/clarify-explainability\".format(\n", + " default_bucket, default_prefix\n", + ")\n", + "explainability_data_config = clarify.DataConfig(\n", + " s3_data_input_path=train_uri,\n", + " s3_output_path=explainability_output_path,\n", + " label=\"Target\",\n", + " headers=training_data.columns.to_list(),\n", + " dataset_type=\"text/csv\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "4eb95897-a60a-4416-b49d-cd539c043fdd", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:sagemaker.clarify:Analysis Config: {'dataset_type': 'text/csv', 'headers': ['Target', 'Age', 'Workclass', 'fnlwgt', 'Education', 'Education-Num', 'Marital Status', 'Occupation', 'Relationship', 'Ethnic group', 'Sex', 'Capital Gain', 'Capital Loss', 'Hours per week', 'Country'], 'label': 'Target', 'predictor': {'model_name': 'Lecture-DEMO-clarify-bias-model-17-01-2024-10-12-26', 'instance_type': 'ml.m5.xlarge', 'initial_instance_count': 1, 'accept_type': 'text/csv', 'content_type': 'text/csv'}, 'methods': {'report': {'name': 'report', 'title': 'Analysis Report'}, 'shap': {'use_logit': False, 'save_local_shap_values': True, 'baseline': [[25, 2, 226802, 1, 7, 4, 6, 3, 2, 1, 0, 0, 40, 37]], 'num_samples': 15, 'agg_method': 'mean_abs'}}}\n", + "INFO:sagemaker:Creating processing-job with name Clarify-Explainability-2024-01-17-10-27-29-944\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Job Name: Clarify-Explainability-2024-01-17-10-27-29-944\n", + "Inputs: [{'InputName': 'dataset', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-2-767397847434/sagemaker/fastcampus-sagemaker-clarify-bias-practice/train_data.csv', 'LocalPath': '/opt/ml/processing/input/data', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'analysis_config', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-2-767397847434/sagemaker/fastcampus-sagemaker-clarify-bias-practice/clarify-explainability/analysis_config.json', 'LocalPath': '/opt/ml/processing/input/config', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]\n", + "Outputs: [{'OutputName': 'analysis_result', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-us-east-2-767397847434/sagemaker/fastcampus-sagemaker-clarify-bias-practice/clarify-explainability', 'LocalPath': '/opt/ml/processing/output', 'S3UploadMode': 'EndOfJob'}}]\n", + ".............................\u001b[34m2024-01-17 10:32:05,324 logging.conf not found when configuring logging, using default logging configuration.\u001b[0m\n", + "\u001b[34m2024-01-17 10:32:05,325 Starting SageMaker Clarify Processing job\u001b[0m\n", + "\u001b[34m2024-01-17 10:32:05,325 Analysis config path: /opt/ml/processing/input/config/analysis_config.json\u001b[0m\n", + "\u001b[34m2024-01-17 10:32:05,325 Analysis result path: /opt/ml/processing/output\u001b[0m\n", + "\u001b[34m2024-01-17 10:32:05,326 This host is algo-1.\u001b[0m\n", + "\u001b[34m2024-01-17 10:32:05,326 This host is the leader.\u001b[0m\n", + "\u001b[34m2024-01-17 10:32:05,326 Number of hosts in the cluster is 1.\u001b[0m\n", + "\u001b[34m2024-01-17 10:32:05,594 Running Python / Pandas based analyzer.\u001b[0m\n", + "\u001b[34m2024-01-17 10:32:05,594 Dataset type: text/csv uri: /opt/ml/processing/input/data\u001b[0m\n", + "\u001b[34m2024-01-17 10:32:05,604 Loading dataset...\u001b[0m\n", + "\u001b[34m/usr/local/lib/python3.9/site-packages/analyzer/data_loading/csv_data_loader.py:336: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " df = df.append(df_tmp, ignore_index=True)\u001b[0m\n", + "\u001b[34m2024-01-17 10:32:05,639 Loaded dataset. Dataset info:\u001b[0m\n", + "\u001b[34m\u001b[0m\n", + "\u001b[34mRangeIndex: 30162 entries, 0 to 30161\u001b[0m\n", + "\u001b[34mData columns (total 14 columns):\n", + " # Column Non-Null Count Dtype\u001b[0m\n", + "\u001b[34m--- ------ -------------- -----\n", + " 0 Age 30162 non-null int64\n", + " 1 Workclass 30162 non-null int64\n", + " 2 fnlwgt 30162 non-null int64\n", + " 3 Education 30162 non-null int64\n", + " 4 Education-Num 30162 non-null int64\n", + " 5 Marital Status 30162 non-null int64\n", + " 6 Occupation 30162 non-null int64\n", + " 7 Relationship 30162 non-null int64\n", + " 8 Ethnic group 30162 non-null int64\n", + " 9 Sex 30162 non-null int64\n", + " 10 Capital Gain 30162 non-null int64\n", + " 11 Capital Loss 30162 non-null int64\n", + " 12 Hours per week 30162 non-null int64\n", + " 13 Country 30162 non-null int64\u001b[0m\n", + "\u001b[34mdtypes: int64(14)\u001b[0m\n", + "\u001b[34mmemory usage: 3.2 MB\u001b[0m\n", + "\u001b[34m2024-01-17 10:32:05,730 Spinning up shadow endpoint\u001b[0m\n", + "\u001b[34m2024-01-17 10:32:05,730 Creating endpoint-config with name sm-clarify-config-1705487525-78e8\u001b[0m\n", + "\u001b[34m2024-01-17 10:32:06,100 Creating endpoint: 'sm-clarify-Lecture-DEMO-clarify-bias-model-17-0-1705487526-7abe'\u001b[0m\n", + "\u001b[34m2024-01-17 10:32:06,158 No endpoints ruleset found for service sagemaker-internal, falling back to legacy endpoint routing.\u001b[0m\n", + "\u001b[34m2024-01-17 10:32:06,621 Using endpoint name: sm-clarify-Lecture-DEMO-clarify-bias-model-17-0-1705487526-7abe\u001b[0m\n", + "\u001b[34m2024-01-17 10:32:06,621 Waiting for endpoint ...\u001b[0m\n", + "\u001b[34m2024-01-17 10:32:06,621 Checking endpoint status:\u001b[0m\n", + "\u001b[34mLegend:\u001b[0m\n", + "\u001b[34m(OutOfService: x, Creating: -, Updating: -, InService: !, RollingBack: <, Deleting: o, Failed: *)\u001b[0m\n", + "\u001b[34m2024-01-17 10:35:07,138 Endpoint is in service after 181 seconds\u001b[0m\n", + "\u001b[34m2024-01-17 10:35:07,138 Endpoint ready.\u001b[0m\n", + "\u001b[34m2024-01-17 10:35:07,238 Clarify Kernel SHAP n_coalitions: 15, n_instances: 1, n_features_to_explain: 14, model_output_size: 1\u001b[0m\n", + "\u001b[34m2024-01-17 10:35:07,238 =====================================================\u001b[0m\n", + "\u001b[34m2024-01-17 10:35:07,238 Shap analyzer: explaining 30162 rows, 14 columns...\u001b[0m\n", + "\u001b[34m2024-01-17 10:35:07,238 =====================================================\n", + " 0% (0 of 30162) | | Elapsed Time: 0:00:00 ETA: --:--:--\u001b[0m\n", + "\u001b[34m 10% (3201 of 30162) |## | Elapsed Time: 0:00:30 ETA: 0:04:12\u001b[0m\n", + "\u001b[34m 22% (6656 of 30162) |#### | Elapsed Time: 0:01:00 ETA: 0:03:24\u001b[0m\n", + "\u001b[34m 33% (10190 of 30162) |###### | Elapsed Time: 0:01:30 ETA: 0:02:49\u001b[0m\n", + "\u001b[34m 45% (13642 of 30162) |######## | Elapsed Time: 0:02:00 ETA: 0:02:23\u001b[0m\n", + "\u001b[34m 56% (17137 of 30162) |########## | Elapsed Time: 0:02:30 ETA: 0:01:51\u001b[0m\n", + "\u001b[34m 68% (20561 of 30162) |############ | Elapsed Time: 0:03:00 ETA: 0:01:24\u001b[0m\n", + "\u001b[34m 79% (23975 of 30162) |############## | Elapsed Time: 0:03:30 ETA: 0:00:54\u001b[0m\n", + "\u001b[34m 90% (27407 of 30162) |################ | Elapsed Time: 0:04:00 ETA: 0:00:24\u001b[0m\n", + "\u001b[34m100% (30162 of 30162) |##################| Elapsed Time: 0:04:23 Time: 0:04:23\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:31,250 getting explanations took 264.01 seconds.\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:31,251 ===================================================\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:31,251 Falling back to generic labels: label0, label1, ...\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:33,195 converting explanations to tabular took 1.94 seconds.\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:33,195 ===================================================\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:33,198 Wrote baseline used to compute explanations to: /opt/ml/processing/output/explanations_shap/baseline.csv\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:33,880 Wrote 30162 local explanations to: /opt/ml/processing/output/explanations_shap/out.csv\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:33,880 writing local explanations took 0.68 seconds.\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:33,880 ===================================================\u001b[0m\n", + "\u001b[34m/usr/local/lib/python3.9/site-packages/numpy/core/fromnumeric.py:3430: FutureWarning: In a future version, DataFrame.mean(axis=None) will return a scalar mean over the entire DataFrame. To retain the old behavior, use 'frame.mean(axis=0)' or just 'frame.mean()'\n", + " return mean(axis=axis, dtype=dtype, out=out, **kwargs)\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:33,884 aggregating local explanations took 0.00 seconds.\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:33,884 ===================================================\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:33,884 Shap analysis finished.\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:33,885 Calculated global analysis with predictor\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:33,885 Stop using endpoint: sm-clarify-Lecture-DEMO-clarify-bias-model-17-0-1705487526-7abe\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:33,885 Deleting endpoint configuration with name: sm-clarify-config-1705487525-78e8\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:34,073 Deleting endpoint with name: sm-clarify-Lecture-DEMO-clarify-bias-model-17-0-1705487526-7abe\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:34,191 Model endpoint delivered 113.08124 requests per second and a total of 30164 requests over 267 seconds\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:34,191 Calculated global analysis without predictor\u001b[0m\n", + "\u001b[34m2024-01-17 10:39:52,900 Stop using endpoint: None\u001b[0m\n", + "\u001b[34m2024-01-17 10:40:01,343 ['jupyter', 'nbconvert', '--to', 'html', '--output', '/opt/ml/processing/output/report.html', '/opt/ml/processing/output/report.ipynb', '--template', 'sagemaker-xai']\u001b[0m\n", + "\u001b[34m[NbConvertApp] Converting notebook /opt/ml/processing/output/report.ipynb to html\u001b[0m\n", + "\u001b[34m[NbConvertApp] Writing 535378 bytes to /opt/ml/processing/output/report.html\u001b[0m\n", + "\u001b[34m2024-01-17 10:40:02,325 ['wkhtmltopdf', '-q', '--enable-local-file-access', '/opt/ml/processing/output/report.html', '/opt/ml/processing/output/report.pdf']\u001b[0m\n", + "\u001b[34m2024-01-17 10:40:02,991 Collected analyses: \u001b[0m\n", + "\u001b[34m{\n", + " \"version\": \"1.0\",\n", + " \"explanations\": {\n", + " \"kernel_shap\": {\n", + " \"label0\": {\n", + " \"global_shap_values\": {\n", + " \"Age\": 0.03641889311978478,\n", + " \"Workclass\": 0.018266321647830524,\n", + " \"fnlwgt\": 0.021253751560726748,\n", + " \"Education\": 0.018540646066814045,\n", + " \"Education-Num\": 0.037158876677162146,\n", + " \"Marital Status\": 0.028987967748786588,\n", + " \"Occupation\": 0.026774353559974554,\n", + " \"Relationship\": 0.03638060910768331,\n", + " \"Ethnic group\": 0.019755784723812633,\n", + " \"Sex\": 0.017970951336745932,\n", + " \"Capital Gain\": 0.03342374795175269,\n", + " \"Capital Loss\": 0.0196018076330827,\n", + " \"Hours per week\": 0.021397514319347735,\n", + " \"Country\": 0.04666602078225621\n", + " },\n", + " \"expected_value\": 0.0006380207487381995\n", + " }\n", + " }\n", + " }\u001b[0m\n", + "\u001b[34m}\u001b[0m\n", + "\u001b[34m2024-01-17 10:40:02,992 exit_message: Completed: SageMaker XAI Analyzer ran successfully\u001b[0m\n", + "\u001b[34m--!\u001b[0m\n", + "\n" + ] + } + ], + "source": [ + "with Run(\n", + " experiment_name=experiment_name,\n", + " # create a experiment run with only the model explainabilit on it\n", + " run_name=\"explainabilit-only\",\n", + " sagemaker_session=sagemaker_session,\n", + ") as run:\n", + " clarify_processor.run_explainability(\n", + " data_config=explainability_data_config,\n", + " model_config=model_config,\n", + " explainability_config=shap_config,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a9192ba-52f8-4a43-aabd-5ff618fee629", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}