Skip to content

Commit

Permalink
update analysis tools
Browse files Browse the repository at this point in the history
  • Loading branch information
DonaldLamNL committed Sep 5, 2024
1 parent fafd225 commit 9df1b61
Show file tree
Hide file tree
Showing 4 changed files with 835 additions and 12 deletions.
314 changes: 314 additions & 0 deletions Analysis/plot.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,314 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Settings"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import pandas as pd\n",
"\n",
"from visualize import *\n",
"\n",
"my_dict = {'t1': 'T1','t2': 'T2','t3': 'T3','t4': 'T4','t5': 'T5',\n",
" 'v1': 'V1','v2': 'V2','v3': 'V3','v4': 'V4','v5': 'V5',\n",
" 'Zh':'Zh', 'Ko':'Ko', 'Es':'Es', 'Fr':'Fr', 'De':'De',\n",
" 'It':'It', 'Ar':'Ar', 'Ru':'Ru', 'Ja':'Ja', 'En':'En',\n",
" 'n':'Arabic Numeral', 'al':'Lowercase Latin', 'au':'Uppercase Latin',\n",
" 'rl':'Lowercase Roman', 'ru':'Uppercase Roman',\n",
" 'f':'Ascending', 'r':'Descending',\n",
" 'a person of routine and familiarity': 'Routine',\n",
" 'a more spontaneous and less reliable person': 'Spontaneous',\n",
" 'a person with reserved and lower energy levels': 'Reserved',\n",
" \"a competitive person, sometimes skeptical of others' intentions\": 'Competitive',\n",
" 'a person with emotional stability and consistent moods': 'Stability',\n",
" 'an adventurous and creative person': 'Adventurous',\n",
" 'an organized person, mindful of details': 'Organized',\n",
" 'a person full of energy and positive emotions': 'Energy',\n",
" 'a compassionate and cooperative person': 'Compassionate',\n",
" 'a person with emotional instability and diverse negative feelings': 'Instability',\n",
" }\n",
"\n",
"my_colors = ['#e6194B', '#42d4f4', '#ffe119', '#3cb44b', '#f032e6', '#fabed4', '#469990', '#dcbeff',\n",
" '#9A6324', '#fffac8', '#800000', '#aaffc3', '#000075', '#a9a9a9', '#000000']\n",
"\n",
"markers = [\"^\", \"o\", \"s\", \"p\", \"*\", \"P\", \"X\", \"h\", \"8\", \"d\"]\n",
"\n",
"default_color = '#D9DDDC'\n",
"\n",
"def cstm_color(x, min_x, max_x):\n",
" return plt.cm.plasma_r((np.clip(x,min_x,max_x)-min_x)/(max_x - min_x))\n",
"\n",
"def get_colors(n):\n",
" colors = [\"white\"] + [cstm_color(x, 1, n) for x in range(1,n+1)]\n",
" return colors"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load the saved documents"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Section 3 - Default Experiments\n",
"chatgpt_data, info = extract_data('save/gpt-3.5-turbo-1106.json')\n",
"gpt4_data, _ = extract_data('save/gpt-4-1106.json')\n",
"gemini_data, _ = extract_data('save/gemini-1.0-pro.json')\n",
"llama_data, _ = extract_data('save/llama-3.1-8b.json')\n",
"\n",
"# Section 4 - Representing Diverse Groups\n",
"## Create an Environment (low directive)\n",
"environment, _ = extract_data('save/environment.json')\n",
"\n",
"## Embodying a Character (high directive)\n",
"character_cot, _ = extract_data('save/character_cot.json')\n",
"character_no, _ = extract_data('save/character.json')\n",
"character = pd.concat([character_cot, character_no], ignore_index=True)\n",
"\n",
"## Assigning a Personality (moderate directive)\n",
"### Biography\n",
"biography_cot, _ = extract_data('save/personality_biography_cot.json')\n",
"biography_no, _ = extract_data('save/personality_biography.json')\n",
"biography = pd.concat([biography_cot, biography_no], ignore_index=True)\n",
"### Portray\n",
"portray_cot, _ = extract_data('save/personality_portray_cot.json')\n",
"portray_no, _ = extract_data('save/personality_portray.json')\n",
"portray = pd.concat([portray_cot, portray_no], ignore_index=True)\n",
"### Question Answering\n",
"qa_cot, _ = extract_data('save/personality_qa_cot.json')\n",
"qa_no, _ = extract_data('save/personality_qa.json')\n",
"qa = pd.concat([qa_cot, qa_no], ignore_index=True)\n",
"personality = pd.concat([biography, portray, qa], ignore_index=True)\n",
"\n",
"# CoT\n",
"cot = pd.concat([character_cot, biography_cot, portray_cot, qa_cot])\n",
"no_cot = pd.concat([character_no, biography_no, portray_no, qa_no])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create PCA Reference"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"conner = extract_reference('reference/conner.json')\n",
"reference = pd.concat([gemini_data, gpt4_data, llama_data, chatgpt_data], ignore_index=True)\n",
"reference = reference[[\"Openness\",\"Conscientiousness\",\"Extraversion\",\"Agreeableness\",\"Neuroticism\"]]\n",
"reference = pd.concat([reference, conner], ignore_index=True)\n",
"vis = Visualize('BFI', reference)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plot Prompt Sensitivity Results"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"models = [chatgpt_data, gpt4_data, gemini_data, llama_data]\n",
"model_names = [\"gpt-3.5-turbo\", \"gpt-4-turbo\", \"gemini-1.0-pro\", \"llama-3.1-8b\"]\n",
"\n",
"for model_idx, model in enumerate(models):\n",
" for aspect in info:\n",
" vis.add(conner, color=\"white\", marker=markers[4], alpha=0)\n",
" for index, value in enumerate(model[aspect].unique()):\n",
" vis.add(model[model[aspect] == value], my_colors[index], my_dict[value], marker=markers[index])\n",
" vis.plot(f'{model_names[model_idx]}-{aspect}', format=\"svg\", random_zorder=True, plot_reference=True, rotation_angle=52.5)\n",
" vis.clean()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from matplotlib.colors import LinearSegmentedColormap\n",
"\n",
"models = [chatgpt_data, gpt4_data, gemini_data, llama_data]\n",
"model_names = [\"gpt-3.5-turbo\", \"gpt-4-turbo\", \"gemini-1.0-pro\", \"llama-3.1-8b\"]\n",
"\n",
"def rotate(x, y, angle):\n",
" angle = np.deg2rad(angle)\n",
" x_rotated = x * np.cos(angle) - y * np.sin(angle)\n",
" y_rotated = x * np.sin(angle) + y * np.cos(angle)\n",
" return x_rotated, y_rotated\n",
"\n",
"for index, model in enumerate(models):\n",
" pca_data = vis.pca.transform(vis.extract(model))[:, :2]\n",
" x, y = pca_data[:, 0], pca_data[:, 1]\n",
" \n",
" data_outliers = vis.detect_outlier(model, 0.30, 20)\n",
" outliers_data = vis.pca.transform(vis.extract(data_outliers[model[\"Label\"] == -1]))[:, :2]\n",
" inliers_data = vis.pca.transform(vis.extract(data_outliers[model[\"Label\"] != -1]))[:, :2]\n",
" reference_data = vis.pca.transform(vis.extract(reference))[:, :2]\n",
"\n",
" pca_x, pca_y = rotate(pca_data[:, 0], pca_data[:, 1], 52.5)\n",
" outlier_x, outlier_y = rotate(outliers_data[:, 0], outliers_data[:, 1], 52.5)\n",
" inlier_x, inlier_y = rotate(inliers_data[:, 0], inliers_data[:, 1], 52.5)\n",
" reference_x, reference_y = rotate(reference_data[:, 0], reference_data[:, 1], 52.5)\n",
"\n",
" smooth_gradient = LinearSegmentedColormap.from_list(\"smooth_gradient\", [\"white\"] + get_colors(20))\n",
"\n",
" plt.style.use('default')\n",
" plt.scatter(reference_x, reference_y, color=\"white\", s=0, marker=markers[1])\n",
" plt.scatter(outlier_x, outlier_y, color=my_colors[1], facecolors=(1, 1, 1, 0.1), marker=markers[0], label=\"Outliers\")\n",
" plt.scatter(inlier_x, inlier_y, color=my_colors[0], facecolors=(1, 1, 1, 0.1), marker=markers[1], label=\"Inliers\")\n",
" sns.kdeplot(x=pca_x, y=pca_y, cmap=smooth_gradient, fill=True, thresh=0, levels=100, alpha=0.5) \n",
" plt.xticks([])\n",
" plt.yticks([])\n",
" plt.xlim(-3, 5)\n",
" plt.ylim(-2, 6)\n",
" plt.legend(loc=\"upper left\")\n",
" plt.savefig(f'figures/{model_names[index]}-density.svg', format=\"svg\", dpi=300)\n",
" plt.clf()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Plot Environment Appending Tests\n",
"with open('dataset/environment.json') as f:\n",
" env_conf = json.load(f)\n",
"\n",
"for key in env_conf:\n",
" vis.add(chatgpt_data, default_color, 'Default')\n",
" for index, emotion in enumerate(env_conf[key].keys()):\n",
" vis.add(environment[environment[\"append_label\"] == emotion], my_colors[index+1], emotion.capitalize(), marker=markers[index+1])\n",
" vis.plot(f\"env-{key}\", random_zorder=True, exclude=[0], format=\"svg\", rotation_angle=52.5)\n",
" vis.clean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Plot Character Appending Tests\n",
"with open('dataset/character.json') as f:\n",
" chara_conf = json.load(f)\n",
"\n",
"for key in chara_conf:\n",
" vis.add(chatgpt_data, default_color, 'Default')\n",
" for index, chara in enumerate(chara_conf[key].keys()):\n",
" vis.add(character[character[\"append_label\"] == chara], my_colors[index], chara, marker=markers[index+1])\n",
" vis.plot(f\"chara-{key}\", random_zorder=True, exclude=[0], format=\"svg\", rotation_angle=52.5)\n",
" vis.clean()\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Plot Personality Tests\n",
"# Plot Personality Tests\n",
"personality_list = []\n",
"for file in ['personality_portray_cot', 'personality_portray', 'personality_biography_cot',\n",
" 'personality_biography', 'personality_qa_cot', 'personality_qa']:\n",
" personality_list.append(extract_data(f'save/{file}.json')[0])\n",
"pers_data = pd.concat(personality_list)\n",
"\n",
"with open('dataset/personality.json') as f:\n",
" pers_conf = json.load(f)\n",
"\n",
"for key in pers_conf:\n",
" vis.add(chatgpt_data, default_color, 'Default')\n",
" for index, pers in enumerate(pers_conf[key].keys()):\n",
" vis.add(pers_data[pers_data[\"append_label\"] == pers], my_colors[index], my_dict[pers].capitalize(), marker=markers[index+1])\n",
" vis.plot(f\"pers-{key}\", random_zorder=True, exclude=[0], format=\"svg\", rotation_angle=52.5)\n",
" vis.clean()\n",
" \n",
" \n",
"# Plot Personalities Comparisons\n",
"for min_label, max_label in zip(list(pers_conf[\"minimum\"].keys()), list(pers_conf[\"maximum\"].keys())):\n",
" vis.add(chatgpt_data, default_color, 'Default', marker=markers[0])\n",
" vis.add(pers_data[pers_data[\"append_label\"] == min_label], my_colors[0], my_dict[min_label].capitalize(), marker=markers[1])\n",
" vis.add(pers_data[pers_data[\"append_label\"] == max_label], my_colors[1], my_dict[max_label].capitalize(), marker=markers[2])\n",
" vis.plot(f'{my_dict[min_label]}-{my_dict[max_label]}', exclude=[0], format=\"svg\", random_zorder=True, rotation_angle=52.5)\n",
" vis.clean()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Plot COT Results\n",
"for file in ['personality_portray', 'personality_biography', 'personality_qa', 'character']:\n",
" cot, _ = extract_data(f'save/{file}_cot.json')\n",
" no_cot, _ = extract_data(f'save/{file}.json')\n",
" vis.add(chatgpt_data, default_color, 'Default', marker=markers[0])\n",
" vis.add(cot, my_colors[0], 'With COT', marker=markers[1])\n",
" vis.add(no_cot, my_colors[1], 'Without COT', marker=markers[2])\n",
" vis.plot(f\"cot-{file}\", format=\"svg\", random_zorder=True, exclude=[0], rotation_angle=52.5)\n",
" vis.clean()\n",
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "env_python",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 9df1b61

Please sign in to comment.