diff --git a/Analysis/README.md b/Analysis/README.md new file mode 100644 index 0000000..ac2a573 --- /dev/null +++ b/Analysis/README.md @@ -0,0 +1,2 @@ +## To-Do +Write the README. \ No newline at end of file diff --git a/visualization/basis/full.json b/Analysis/basis/full.json similarity index 100% rename from visualization/basis/full.json rename to Analysis/basis/full.json diff --git a/visualization/dataset/character.json b/Analysis/dataset/character.json similarity index 100% rename from visualization/dataset/character.json rename to Analysis/dataset/character.json diff --git a/visualization/dataset/environment.json b/Analysis/dataset/environment.json similarity index 100% rename from visualization/dataset/environment.json rename to Analysis/dataset/environment.json diff --git a/visualization/dataset/personality.json b/Analysis/dataset/personality.json similarity index 100% rename from visualization/dataset/personality.json rename to Analysis/dataset/personality.json diff --git a/visualization/dataset/questionnaires.json b/Analysis/dataset/questionnaires.json similarity index 100% rename from visualization/dataset/questionnaires.json rename to Analysis/dataset/questionnaires.json diff --git a/visualization/figures/append/chara-heros.png b/Analysis/figures/append/chara-heros.png similarity index 100% rename from visualization/figures/append/chara-heros.png rename to Analysis/figures/append/chara-heros.png diff --git a/visualization/figures/append/chara-villain.png b/Analysis/figures/append/chara-villain.png similarity index 100% rename from visualization/figures/append/chara-villain.png rename to Analysis/figures/append/chara-villain.png diff --git a/visualization/figures/append/env-negative.png b/Analysis/figures/append/env-negative.png similarity index 100% rename from visualization/figures/append/env-negative.png rename to Analysis/figures/append/env-negative.png diff --git a/visualization/figures/append/env-positive.png b/Analysis/figures/append/env-positive.png similarity index 100% rename from visualization/figures/append/env-positive.png rename to Analysis/figures/append/env-positive.png diff --git a/visualization/figures/append/pers-maximum.png b/Analysis/figures/append/pers-maximum.png similarity index 100% rename from visualization/figures/append/pers-maximum.png rename to Analysis/figures/append/pers-maximum.png diff --git a/visualization/figures/append/pers-minimum.png b/Analysis/figures/append/pers-minimum.png similarity index 100% rename from visualization/figures/append/pers-minimum.png rename to Analysis/figures/append/pers-minimum.png diff --git a/visualization/figures/cot/cot-character.png b/Analysis/figures/cot/cot-character.png similarity index 100% rename from visualization/figures/cot/cot-character.png rename to Analysis/figures/cot/cot-character.png diff --git a/visualization/figures/cot/cot-personality_biography.png b/Analysis/figures/cot/cot-personality_biography.png similarity index 100% rename from visualization/figures/cot/cot-personality_biography.png rename to Analysis/figures/cot/cot-personality_biography.png diff --git a/visualization/figures/cot/cot-personality_portray.png b/Analysis/figures/cot/cot-personality_portray.png similarity index 100% rename from visualization/figures/cot/cot-personality_portray.png rename to Analysis/figures/cot/cot-personality_portray.png diff --git a/visualization/figures/cot/cot-personality_qa.png b/Analysis/figures/cot/cot-personality_qa.png similarity index 100% rename from visualization/figures/cot/cot-personality_qa.png rename to Analysis/figures/cot/cot-personality_qa.png diff --git a/visualization/figures/default/bfi-label.png b/Analysis/figures/default/bfi-label.png similarity index 100% rename from visualization/figures/default/bfi-label.png rename to Analysis/figures/default/bfi-label.png diff --git a/visualization/figures/default/bfi-language.png b/Analysis/figures/default/bfi-language.png similarity index 100% rename from visualization/figures/default/bfi-language.png rename to Analysis/figures/default/bfi-language.png diff --git a/visualization/figures/default/bfi-order.png b/Analysis/figures/default/bfi-order.png similarity index 100% rename from visualization/figures/default/bfi-order.png rename to Analysis/figures/default/bfi-order.png diff --git a/visualization/figures/default/bfi-template.png b/Analysis/figures/default/bfi-template.png similarity index 100% rename from visualization/figures/default/bfi-template.png rename to Analysis/figures/default/bfi-template.png diff --git a/visualization/figures/default/bfi-version.png b/Analysis/figures/default/bfi-version.png similarity index 100% rename from visualization/figures/default/bfi-version.png rename to Analysis/figures/default/bfi-version.png diff --git a/visualization/figures/default/outliers.png b/Analysis/figures/default/outliers.png similarity index 100% rename from visualization/figures/default/outliers.png rename to Analysis/figures/default/outliers.png diff --git a/visualization/main.ipynb b/Analysis/main.ipynb similarity index 74% rename from visualization/main.ipynb rename to Analysis/main.ipynb index e15f12e..51d9c70 100644 --- a/visualization/main.ipynb +++ b/Analysis/main.ipynb @@ -8,9 +8,9 @@ "source": [ "import json\n", "import pandas as pd\n", - "from sklearn.cluster import DBSCAN\n", "\n", "from visualize import *\n", + "from utils import *\n", "\n", "my_dict = {'t1': 'T1','t2': 'T2','t3': 'T3','t4': 'T4','t5': 'T5',\n", " 'v1': 'V1','v2': 'V2','v3': 'V3','v4': 'V4','v5': 'V5',\n", @@ -44,7 +44,8 @@ "outputs": [], "source": [ "basis = extract_basis('basis/full.json')\n", - "vis = Visualize('BFI', basis)\n" + "vis = Visualize('BFI', basis)\n", + "data, info = extract_data('save/save.json')\n" ] }, { @@ -54,8 +55,6 @@ "outputs": [], "source": [ "# Plot Prompt Sensitivity Results\n", - "data, info = extract_data('save/save.json')\n", - "\n", "for aspect in info:\n", " for index, value in enumerate(data[aspect].unique()):\n", " vis.add(data[data[aspect] == value], my_colors[index], my_dict[value])\n", @@ -66,49 +65,16 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of Inliers: 2439\n", - "Number of Outliers: 61\n", - "2500\n", - "Saved \"figures/outliers-0\".\n" - ] - }, - { - "data": { - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Plot Outliers\n", - "def detect_outlier(df, eps, min_samples):\n", - " pca_data = vis.pca_transform(df)\n", - " dbscan = DBSCAN(eps=eps, min_samples=min_samples)\n", - " labels = dbscan.fit_predict(pca_data)\n", - " print(f\"Number of Inliers: {len(labels[labels != -1])}\")\n", - " print(f\"Number of Outliers: {len(labels[labels == -1])}\")\n", - " print(f\"{len(labels[labels != -1]) + len(labels[labels == -1])}\")\n", - " df['Label'] = labels\n", - " return df\n", - "\n", - "test_cases = [(0.302, 20)]\n", - "for i, (eps, min_samples) in enumerate(test_cases):\n", - " data_outliers = detect_outlier(data, eps, min_samples)\n", - " vis.add(data_outliers[data[\"Label\"] == -1], my_colors[0], 'Outliers')\n", - " vis.add(data_outliers[data[\"Label\"] != -1], my_colors[1], 'Inliers')\n", - " vis.plot(f'outliers-{i}')\n", - " vis.clean()\n", - " " + "data_outliers = vis.detect_outlier(data, 0.302, 20)\n", + "vis.add(data_outliers[data[\"Label\"] == -1], my_colors[0], 'Outliers')\n", + "vis.add(data_outliers[data[\"Label\"] != -1], my_colors[1], 'Inliers')\n", + "vis.plot(f'outliers')\n", + "vis.clean()\n" ] }, { @@ -206,6 +172,51 @@ " vis.clean()\n", " " ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Statistical Analysis\n", + "filename = 'comp.md'\n", + "questionnaire = get_questionnaire('BFI')\n", + "categories = list(questionnaire['categories'].keys())\n", + "\n", + "# data, info = extract_data('save/save.json')\n", + "\n", + "analysis_data = data\n", + "\n", + "# Remove outliers\n", + "analysis_data = vis.detect_outlier(data, 0.302, 20)[data[\"Label\"] != -1]\n", + "print(analysis_data.shape)\n", + "\n", + "with open(filename, 'w') as output_file:\n", + " for aspect in info:\n", + " write_df = pd.DataFrame(columns=categories)\n", + " \n", + " for index, value in enumerate(data[aspect].unique()):\n", + " records = list()\n", + " x = data[data[aspect] == value]\n", + " y = data[data[aspect] != value]\n", + " \n", + " for cat in categories:\n", + " sym, diff, _, p_val = hypothesis_testing(x[cat].tolist(), y[cat].tolist())\n", + " \n", + " \"==================== Output Template ====================\"\n", + " records.append(f'${sym} {diff:.2f}_{{{p_val:.2f}}}$')\n", + " \"=======================================================\"\n", + " \n", + " write_df.loc[my_dict[value]] = records\n", + "\n", + " output_file.write(f'### {aspect.capitalize()}\\n')\n", + " \"==================== Output Format ====================\"\n", + " output_file.write(write_df.to_markdown())\n", + " # output_file.write(write_df.to_latex(escape=False))\n", + " \"=======================================================\"\n", + " output_file.write('\\n\\n')\n" + ] } ], "metadata": { diff --git a/visualization/save/character.json b/Analysis/save/character.json similarity index 100% rename from visualization/save/character.json rename to Analysis/save/character.json diff --git a/visualization/save/character_cot.json b/Analysis/save/character_cot.json similarity index 100% rename from visualization/save/character_cot.json rename to Analysis/save/character_cot.json diff --git a/visualization/save/environment.json b/Analysis/save/environment.json similarity index 100% rename from visualization/save/environment.json rename to Analysis/save/environment.json diff --git a/visualization/save/personality_biography.json b/Analysis/save/personality_biography.json similarity index 100% rename from visualization/save/personality_biography.json rename to Analysis/save/personality_biography.json diff --git a/visualization/save/personality_biography_cot.json b/Analysis/save/personality_biography_cot.json similarity index 100% rename from visualization/save/personality_biography_cot.json rename to Analysis/save/personality_biography_cot.json diff --git a/visualization/save/personality_portray.json b/Analysis/save/personality_portray.json similarity index 100% rename from visualization/save/personality_portray.json rename to Analysis/save/personality_portray.json diff --git a/visualization/save/personality_portray_cot.json b/Analysis/save/personality_portray_cot.json similarity index 100% rename from visualization/save/personality_portray_cot.json rename to Analysis/save/personality_portray_cot.json diff --git a/visualization/save/personality_qa.json b/Analysis/save/personality_qa.json similarity index 100% rename from visualization/save/personality_qa.json rename to Analysis/save/personality_qa.json diff --git a/visualization/save/personality_qa_cot.json b/Analysis/save/personality_qa_cot.json similarity index 100% rename from visualization/save/personality_qa_cot.json rename to Analysis/save/personality_qa_cot.json diff --git a/visualization/save/save.json b/Analysis/save/save.json similarity index 100% rename from visualization/save/save.json rename to Analysis/save/save.json diff --git a/Analysis/tools.py b/Analysis/tools.py new file mode 100644 index 0000000..463de05 --- /dev/null +++ b/Analysis/tools.py @@ -0,0 +1,95 @@ +""" +Author: LAM Man Ho (mhlam@link.cuhk.edu.hk) +""" + +import json +import numpy as np +import pandas as pd +import scipy.stats as stats + +from itertools import product + +''' +Get corresponding questionnaire + name (str) +''' +def get_questionnaire(name): + try: + with open('dataset/questionnaires.json') as dataset: + data = json.load(dataset) + try: + return data[name] + except ValueError: raise ValueError("Questionnaire not found.") + except FileNotFoundError: raise FileNotFoundError("The 'questionnaires.json' file does not exist.") + +''' +Construct the basis and fit to PCA to extract the projection matrix for dimensional reduction +''' +def construct_basis(questionnaire_name, savefile, mode='full'): + basis = list() + questionnaire = get_questionnaire(questionnaire_name) + scales = questionnaire["scales"] + categories = list(questionnaire["categories"].keys()) + combinations = list(product(scales, repeat=len(categories))) + for item in combinations: + basis.append(dict(zip(categories, item))) + with open(savefile, 'w') as f: + json.dump(basis, f, indent=4) + +''' +Extract the fitting basis +''' +def extract_basis(filename): + with open(filename, 'r') as f: + basis = json.load(f) + df = pd.DataFrame(basis) + return df + +''' +Extract the save data as DataFrame +''' +def extract_data(filename): + try: + with open(filename, 'r') as f: + data = json.load(f) + except FileNotFoundError: + raise FileExistsError + + info = tuple(data["data"][0]["info"].keys()) + data = [{**d["info"], **d["data"]} for d in data["data"]] + df = pd.DataFrame(data) + return df, info + + +''' +Conduct hypothesis testing + x, y (list) +''' +def hypothesis_testing(x, y, significant_level=0.001): + mean1, std1, n1 = np.mean(x), np.std(x), len(x) + mean2, std2, n2 = np.mean(y), np.std(y), len(y) + + # Add an epsilon to prevent the zero standard deviarion + epsilon = 1e-8 + std1 += epsilon + std2 += epsilon + + # Perform F-test + if std1 > std2: + f_value = std1 ** 2 / std2 ** 2 + df1, df2 = n1 - 1, n2 - 1 + else: + f_value = std2 ** 2 / std1 ** 2 + df1, df2 = n2 - 1, n1 - 1 + + p_value = (1 - stats.f.cdf(f_value, df1, df2)) * 2 + equal_var = True if p_value > significant_level else False + + # Performing T-test + df = n1 + n2 - 2 if equal_var else ((std1**2 / n1 + std2**2 / n2)**2) / ((std1**2 / n1)**2 / (n1 - 1) + (std2**2 / n2)**2 / (n2 - 1)) + t_value, p_value = stats.ttest_ind_from_stats(mean1, std1, n1, mean2, std2, n2, equal_var=equal_var) + + diff = mean1 - mean2 + symbol = '=' if p_value > significant_level else '>' if t_value > 0 else '<' + + return symbol, diff, t_value, p_value diff --git a/visualization/visualize.py b/Analysis/visualize.py similarity index 63% rename from visualization/visualize.py rename to Analysis/visualize.py index 6da576f..245652b 100644 --- a/visualization/visualize.py +++ b/Analysis/visualize.py @@ -1,14 +1,14 @@ """ Author: LAM Man Ho (mhlam@link.cuhk.edu.hk) """ + import os -import json import random -import numpy as np -import pandas as pd import matplotlib.pyplot as plt -from itertools import product + +from sklearn.cluster import DBSCAN from sklearn.decomposition import PCA +from Analysis.tools import * class Visualize: ''' @@ -86,54 +86,14 @@ def plot(self, savename=None, random_zorder=False, exclude=[]): ''' def clean(self): self.data = list() - -''' -Get corresponding questionnaire - name (str) -''' -def get_questionnaire(name): - try: - with open('dataset/questionnaires.json') as dataset: - data = json.load(dataset) - try: - return data[name] - except ValueError: raise ValueError("Questionnaire not found.") - except FileNotFoundError: raise FileNotFoundError("The 'questionnaires.json' file does not exist.") - -''' -Construct the basis and fit to PCA to extract the projection matrix for dimensional reduction -''' -def construct_basis(questionnaire_name, savefile, mode='full'): - basis = list() - questionnaire = get_questionnaire(questionnaire_name) - scales = questionnaire["scales"] - categories = list(questionnaire["categories"].keys()) - combinations = list(product(scales, repeat=len(categories))) - for item in combinations: - basis.append(dict(zip(categories, item))) - with open(savefile, 'w') as f: - json.dump(basis, f, indent=4) -''' -Extract the fitting basis -''' -def extract_basis(filename): - with open(filename, 'r') as f: - basis = json.load(f) - df = pd.DataFrame(basis) - return df - -''' -Extract the save data as DataFrame -''' -def extract_data(filename): - try: - with open(filename, 'r') as f: - data = json.load(f) - except FileNotFoundError: - raise FileExistsError - info = tuple(data["data"][0]["info"].keys()) - data = [{**d["info"], **d["data"]} for d in data["data"]] - df = pd.DataFrame(data) - return df, info + def detect_outlier(self, df, eps, min_samples): + pca_data = self.pca_transform(df) + dbscan = DBSCAN(eps=eps, min_samples=min_samples) + labels = dbscan.fit_predict(pca_data) + print(f"Number of Inliers: {len(labels[labels != -1])}") + print(f"Number of Outliers: {len(labels[labels == -1])}") + print(f"{len(labels[labels != -1]) + len(labels[labels == -1])}") + df['Label'] = labels + return df diff --git a/dataset/add.ipynb b/dataset/add.ipynb deleted file mode 100644 index d1de9f9..0000000 --- a/dataset/add.ipynb +++ /dev/null @@ -1,722 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "version = 'v5'\n", - "\n", - "with open('questionnaires.json', 'r') as f:\n", - " questionnaire = json.load(f)\n", - "\n", - "with open('statements5.txt', 'w') as f:\n", - " for s in questionnaire[\"BFI\"][\"questions\"][\"En\"][version][\"statements\"].values():\n", - " f.write(s + '\\n')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# En English\n", - "en_questions = '''Tends to chat a lot.\n", - "Often spots mistakes or flaws in others.\n", - "Accomplishes tasks in a detailed and complete manner.\n", - "Experiences a sense of desolation or unhappiness.\n", - "Possesses originality and consistently produces innovative ideas.\n", - "Typically maintains a quiet demeanor.\n", - "Shows generosity and service to others without being self-centered.\n", - "Tends to make some oversights now and then.\n", - "Shows serenity and is skilled in managing tense situations.\n", - "Displays an interest in exploring a diverse range of topics.\n", - "Is bursting with liveliness.\n", - "Engages in altercations with others.\n", - "Is seen as a steadfast and reliable individual in the workplace.\n", - "May sometimes be in a state of high-strain or nerves.\n", - "Displays ingenuity and engages in deep cognitive analysis.\n", - "Produces a substantial amount of eagerness and interest.\n", - "Often displays a tendency to pardon mistakes\n", - "Usually has difficulty keeping things in order.\n", - "Regularly plagued by worries.\n", - "Has a vivid and energetic imagination.\n", - "Inclined to be taciturn.\n", - "Typically places trust in other people.\n", - "Usually shows a propensity for inactivity.\n", - "Exhibits emotional steadiness and is not prone to sudden emotional disturbances.\n", - "Shows a talent for creating original ideas.\n", - "Demonstrates a bold and authoritative character.\n", - "Might come across as unemotional and remote.\n", - "Does not stop working until the task is finalized.\n", - "May experience varying emotional states.\n", - "Assigns significance to encounters with art and aesthetics.\n", - "Can be occasionally bashful or reserved.\n", - "Demonstrates compassion and courtesy to almost all individuals.\n", - "Accomplishes tasks productively.\n", - "Retains tranquility during periods of high stress.\n", - "Favors engaging in jobs that have a regular routine.\n", - "Displays an outgoing character and is comfortable interacting with people.\n", - "Sometimes engages in disrespectful behavior towards others.\n", - "Establishes agendas and invariably completes them.\n", - "Usually feels uneasy rapidly.\n", - "Appreciates musing over and interacting with thoughts.\n", - "Maintains limited engagement in artistic endeavors.\n", - "Appreciates joint efforts with colleagues.\n", - "Struggles with staying attentive on tasks.\n", - "Demonstrates advanced knowledge and refinement in either art, music, or literature.'''\n", - "\n", - "en_system_prompt = 'You are a helpful assistant.'\n", - "\n", - "en_scales = '''strongly disagree\n", - "a little disagree\n", - "neither agree nor disagree\n", - "little agree\n", - "strongly agree'''\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Zh Chinese\n", - "zh_questions = '''倾向于聊天很多。\n", - "经常发现别人的错误或缺陷。\n", - "认真、完整地完成任务。\n", - "体验到一种荒凉或不快乐的感觉。\n", - "拥有独创性并不断产生创新的想法。\n", - "通常保持安静的举止。\n", - "表现出慷慨和为他人服务,但不以自我为中心。\n", - "往往会时不时地做出一些疏忽。\n", - "表现出平静并善于处理紧张的情况。\n", - "表现出对探索各种主题的兴趣。\n", - "充满了生机。\n", - "与他人发生争执。\n", - "在工作场所被视为坚定可靠的人。\n", - "有时可能处于高度紧张或紧张的状态。\n", - "表现出独创性并进行深入的认知分析。\n", - "产生大量的渴望和兴趣。\n", - "经常表现出原谅错误的倾向\n", - "通常很难让事情保持井然有序。\n", - "经常被忧虑所困扰。\n", - "具有生动而充满活力的想象力。\n", - "倾向于沉默寡言。\n", - "通常信任他人。\n", - "通常表现出不活动的倾向。\n", - "表现出情绪稳定,不容易出现突然的情绪困扰。\n", - "表现出创造原创想法的天赋。\n", - "表现出大胆和权威的性格。\n", - "可能会给人留下冷漠、冷漠的印象。\n", - "直到任务完成才停止工作。\n", - "可能会经历不同的情绪状态。\n", - "赋予与艺术和美学的接触以意义。\n", - "偶尔会害羞或矜持。\n", - "对几乎所有人表现出同情心和礼貌。\n", - "高效地完成任务。\n", - "在高压时期保持平静。\n", - "喜欢从事有规律的工作。\n", - "性格开朗,善于与人交往。\n", - "有时会对他人做出不尊重的行为。\n", - "制定议程并始终完成它们。\n", - "通常会很快感到不安。\n", - "欣赏沉思并与思想互动。\n", - "对艺术活动的参与有限。\n", - "赞赏与同事的共同努力。\n", - "难以集中注意力完成任务。\n", - "展现出艺术、音乐或文学方面的先进知识和修养。'''\n", - "\n", - "zh_system_prompt = '你是一个有用的助手。'\n", - "\n", - "zh_scales = '''强烈反对\n", - "有点不同意\n", - "既不同意也不反对\n", - "很少同意\n", - "非常同意'''" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Ko Korean\n", - "ko_questions = '''대화를 많이 하는 경향이 있다.\n", - "종종 다른 사람의 실수나 결점을 발견합니다.\n", - "세부적이고 완전한 방식으로 작업을 수행합니다.\n", - "황폐함이나 불행감을 경험합니다.\n", - "독창성을 가지고 끊임없이 혁신적인 아이디어를 만들어냅니다.\n", - "평소에는 조용한 태도를 유지합니다.\n", - "자기 중심적이지 않고 다른 사람에게 관대함과 봉사를 보여줍니다.\n", - "때때로 약간의 감독을 하는 경향이 있습니다.\n", - "평온함을 보여주며 긴장된 상황을 관리하는 데 능숙합니다.\n", - "다양한 주제를 탐구하는 데 관심을 보입니다.\n", - "생기가 넘칩니다.\n", - "다른 사람들과 언쟁을 벌입니다.\n", - "직장에서는 확고하고 신뢰할 수 있는 사람으로 여겨집니다.\n", - "때로는 긴장이나 신경이 과민한 상태일 수도 있습니다.\n", - "독창성을 발휘하고 심층적인 인지 분석에 참여합니다.\n", - "상당한 열의와 관심을 불러일으킵니다.\n", - "종종 실수를 용서하는 경향을 보임\n", - "일반적으로 일을 정리하는 데 어려움을 겪습니다.\n", - "정기적으로 걱정에 시달립니다.\n", - "생생하고 활력 넘치는 상상력을 가지고 있습니다.\n", - "과묵한 경향이 있다.\n", - "일반적으로 다른 사람을 신뢰합니다.\n", - "일반적으로 활동하지 않는 경향을 나타냅니다.\n", - "정서적 안정감을 나타내며 갑작스러운 정서적 장애를 일으키지 않습니다.\n", - "독창적인 아이디어를 창조하는 재능을 보여줍니다.\n", - "강인하고 권위적인 성격을 보여줍니다.\n", - "감정이 없고 외진 것처럼 보일 수도 있습니다.\n", - "작업이 완료될 때까지 작업을 중단하지 않습니다.\n", - "다양한 감정 상태를 경험할 수 있습니다.\n", - "예술과 미학의 만남에 의미를 부여합니다.\n", - "때때로 수줍어하거나 내성적일 수 있습니다.\n", - "거의 모든 개인에게 동정심과 예의를 보여줍니다.\n", - "작업을 생산적으로 수행합니다.\n", - "스트레스가 심한 기간에도 평온함을 유지합니다.\n", - "규칙적인 일상이 있는 직업에 참여하는 것을 선호합니다.\n", - "외향적인 성격을 보이며 사람들과 편안하게 교류합니다.\n", - "때로는 다른 사람에게 무례한 행동을 하기도 합니다.\n", - "의제를 설정하고 변함없이 완료합니다.\n", - "일반적으로 불안감을 빠르게 느낍니다.\n", - "생각에 대해 숙고하고 상호작용하는 것을 높이 평가합니다.\n", - "예술적 노력에 제한적으로 참여합니다.\n", - "동료와의 공동 노력에 감사드립니다.\n", - "작업에 계속 주의를 기울이는 데 어려움을 겪습니다.\n", - "미술, 음악 또는 문학에 대한 고급 지식과 세련미를 보여줍니다.'''\n", - "\n", - "ko_system_prompt = '당신은 도움이 되는 조수입니다.'\n", - "\n", - "ko_scales = '''강하게 동의\n", - "약간 동의하지 않는다\n", - "동의하지도 반대하지도 않는다\n", - "거의 동의하지 않는다\n", - "강력히 동의한다'''" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Es Spanish\n", - "es_questions = '''Suele charlar mucho.\n", - "A menudo detecta errores o defectos en los demás.\n", - "Realiza tareas de manera detallada y completa.\n", - "Experimenta una sensación de desolación o infelicidad.\n", - "Posee originalidad y produce consistentemente ideas innovadoras.\n", - "Normalmente mantiene una conducta tranquila.\n", - "Muestra generosidad y servicio a los demás sin ser egocéntrico.\n", - "Tiende a cometer algunos descuidos de vez en cuando.\n", - "Muestra serenidad y es hábil en el manejo de situaciones tensas.\n", - "Muestra interés en explorar una amplia gama de temas.\n", - "Está lleno de vivacidad.\n", - "Se involucra en altercados con otros.\n", - "Es visto como una persona firme y confiable en el lugar de trabajo.\n", - "A veces puede estar en un estado de mucha tensión o nervios.\n", - "Muestra ingenio y participa en un análisis cognitivo profundo.\n", - "Produce una cantidad sustancial de entusiasmo e interés.\n", - "A menudo muestra una tendencia a perdonar errores.\n", - "Generalmente tiene dificultades para mantener las cosas en orden.\n", - "Regularmente plagado de preocupaciones.\n", - "Tiene una imaginación vívida y enérgica.\n", - "Inclinado a ser taciturno.\n", - "Generalmente confía en otras personas.\n", - "Suele mostrar propensión a la inactividad.\n", - "Demuestra estabilidad emocional y no es propenso a sufrir alteraciones emocionales repentinas.\n", - "Demuestra talento para crear ideas originales.\n", - "Demuestra un carácter audaz y autoritario.\n", - "Puede parecer impasible y remoto.\n", - "No deja de trabajar hasta finalizar la tarea.\n", - "Puede experimentar diversos estados emocionales.\n", - "Asigna significado a los encuentros con el arte y la estética.\n", - "Puede ser ocasionalmente tímido o reservado.\n", - "Demuestra compasión y cortesía hacia casi todas las personas.\n", - "Realiza tareas productivamente.\n", - "Mantiene la tranquilidad durante periodos de alto estrés.\n", - "Prefiere realizar trabajos que tengan una rutina regular.\n", - "Muestra un carácter extrovertido y se siente cómodo interactuando con la gente.\n", - "A veces se comporta de manera irrespetuosa hacia los demás.\n", - "Establece agendas e invariablemente las completa.\n", - "Generalmente se siente incómodo rápidamente.\n", - "Aprecia reflexionar e interactuar con los pensamientos.\n", - "Mantiene un compromiso limitado en actividades artísticas.\n", - "Aprecia los esfuerzos conjuntos con los colegas.\n", - "Le cuesta mantenerse atento a las tareas.\n", - "Demuestra conocimiento avanzado y refinamiento en arte, música o literatura.'''\n", - "es_system_prompt = 'Eres un asistente útil.'\n", - "es_scales = '''muy en desacuerdo\n", - "un poco en desacuerdo\n", - "ni de acuerdo ni en desacuerdo\n", - "poco de acuerdo\n", - "Totalmente de acuerdo'''" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Fr French\n", - "fr_questions = '''A tendance à beaucoup discuter.\n", - "Repère souvent les erreurs ou les défauts chez les autres.\n", - "Accomplit les tâches de manière détaillée et complète.\n", - "Éprouve un sentiment de désolation ou de malheur.\n", - "Possède de l'originalité et produit constamment des idées innovantes.\n", - "Maintient généralement une attitude calme.\n", - "Fait preuve de générosité et de service envers les autres sans être égocentrique.\n", - "A tendance à faire quelques oublis de temps en temps.\n", - "Fait preuve de sérénité et sait gérer les situations tendues.\n", - "Montre un intérêt pour l’exploration d’un large éventail de sujets.\n", - "Est plein de vivacité.\n", - "S'engage dans des altercations avec les autres.\n", - "Est considéré comme une personne fidèle et fiable sur le lieu de travail.\n", - "Peut parfois être dans un état de forte tension ou de nervosité.\n", - "Fait preuve d’ingéniosité et s’engage dans une analyse cognitive approfondie.\n", - "Produit une quantité substantielle d’empressement et d’intérêt.\n", - "Affiche souvent une tendance à pardonner les erreurs\n", - "A généralement du mal à garder les choses en ordre.\n", - "Régulièrement en proie à des soucis.\n", - "A une imagination vive et énergique.\n", - "Enclin à être taciturne.\n", - "Fait généralement confiance aux autres.\n", - "Montre généralement une propension à l’inactivité.\n", - "Fait preuve d’une stabilité émotionnelle et n’est pas sujet à des perturbations émotionnelles soudaines.\n", - "Montre un talent pour créer des idées originales.\n", - "Démontre un caractère audacieux et autoritaire.\n", - "Cela pourrait paraître sans émotion et distant.\n", - "N'arrête pas de travailler jusqu'à ce que la tâche soit finalisée.\n", - "Peut éprouver divers états émotionnels.\n", - "Accorde une importance aux rencontres avec l’art et l’esthétique.\n", - "Peut être parfois timide ou réservé.\n", - "Fait preuve de compassion et de courtoisie envers presque toutes les personnes.\n", - "Accomplit les tâches de manière productive.\n", - "Conserve la tranquillité pendant les périodes de stress élevé.\n", - "Favorise l'engagement dans des emplois qui ont une routine régulière.\n", - "Affiche un caractère extraverti et est à l’aise pour interagir avec les gens.\n", - "Adopte parfois un comportement irrespectueux envers les autres.\n", - "Établit les ordres du jour et les complète invariablement.\n", - "Se sent généralement rapidement mal à l’aise.\n", - "Apprécie réfléchir et interagir avec ses pensées.\n", - "Maintient un engagement limité dans les activités artistiques.\n", - "Apprécie les efforts conjoints avec ses collègues.\n", - "A du mal à rester attentif aux tâches.\n", - "Démontre des connaissances avancées et un raffinement dans les domaines de l’art, de la musique ou de la littérature.'''\n", - "fr_system_prompt = 'Vous êtes un assistant utile.'\n", - "fr_scales = '''fortement en désaccord\n", - "un peu en désaccord\n", - "Ni d'accord ni en désaccord\n", - "peu d'accord\n", - "tout à fait d'accord'''\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# De Deutsch\n", - "de_questions = '''Neigt dazu, viel zu chatten.\n", - "Erkennt häufig Fehler oder Mängel bei anderen.\n", - "Erledigt Aufgaben detailliert und vollständig.\n", - "Erlebt ein Gefühl der Trostlosigkeit oder des Unglücks.\n", - "Besitzt Originalität und bringt stets innovative Ideen hervor.\n", - "Behält normalerweise ein ruhiges Verhalten bei.\n", - "Zeigt Großzügigkeit und Hilfsbereitschaft gegenüber anderen, ohne egozentrisch zu sein.\n", - "Neigt dazu, hin und wieder ein paar Fehler zu machen.\n", - "Zeigt Gelassenheit und ist geschickt im Umgang mit angespannten Situationen.\n", - "Zeigt Interesse an der Auseinandersetzung mit einem vielfältigen Themenspektrum.\n", - "Strotzt nur so vor Lebendigkeit.\n", - "Beteiligt sich an Auseinandersetzungen mit anderen.\n", - "Wird am Arbeitsplatz als standhafte und zuverlässige Person angesehen.\n", - "Kann manchmal in einem Zustand hoher Anspannung oder Nervosität sein.\n", - "Zeigt Einfallsreichtum und führt eine tiefgreifende kognitive Analyse durch.\n", - "Erzeugt ein erhebliches Maß an Eifer und Interesse.\n", - "Zeigt oft die Tendenz, Fehler zu verzeihen\n", - "Hat normalerweise Schwierigkeiten, die Dinge in Ordnung zu halten.\n", - "Regelmäßig von Sorgen geplagt.\n", - "Hat eine lebhafte und energiegeladene Vorstellungskraft.\n", - "Neigt dazu, wortkarg zu sein.\n", - "Schenkt typischerweise Vertrauen in andere Menschen.\n", - "Zeigt normalerweise eine Neigung zur Inaktivität.\n", - "Zeigt emotionale Stabilität und neigt nicht zu plötzlichen emotionalen Störungen.\n", - "Zeigt Talent für die Entwicklung origineller Ideen.\n", - "Zeigt einen mutigen und maßgeblichen Charakter.\n", - "Könnte emotionslos und distanziert wirken.\n", - "Hört nicht auf zu arbeiten, bis die Aufgabe abgeschlossen ist.\n", - "Kann unterschiedliche emotionale Zustände erleben.\n", - "Weist der Begegnung mit Kunst und Ästhetik Bedeutung zu.\n", - "Kann gelegentlich schüchtern oder zurückhaltend sein.\n", - "Zeigt Mitgefühl und Höflichkeit gegenüber fast allen Menschen.\n", - "Erledigt Aufgaben produktiv.\n", - "Bewahrt die Ruhe in Zeiten hoher Belastung.\n", - "Bevorzugt die Ausübung von Tätigkeiten mit geregeltem Ablauf.\n", - "Zeigt einen aufgeschlossenen Charakter und fühlt sich im Umgang mit Menschen wohl.\n", - "Verhält sich manchmal anderen gegenüber respektlos.\n", - "Legt Tagesordnungen fest und führt diese stets aus.\n", - "Fühlt sich normalerweise schnell unwohl.\n", - "Schätzt es, über Gedanken nachzudenken und mit ihnen zu interagieren.\n", - "Beteiligt sich nur begrenzt an künstlerischen Unternehmungen.\n", - "Schätzt gemeinsame Anstrengungen mit Kollegen.\n", - "Es fällt ihm schwer, bei Aufgaben aufmerksam zu bleiben.\n", - "Zeigt fortgeschrittene Kenntnisse und Verfeinerung in Kunst, Musik oder Literatur.'''\n", - "de_system_prompt = '''Sie sind ein hilfreicher Assistent.'''\n", - "de_scales = '''entschieden widersprechen\n", - "bin ein wenig anderer Meinung\n", - "weder zustimmen noch abstreiten\n", - "stimme kaum zu\n", - "stimme voll und ganz zu'''\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# It Italian\n", - "it_questions = '''Tende a chiacchierare molto.\n", - "Spesso individua errori o difetti negli altri.\n", - "Porta a termine i compiti in modo dettagliato e completo.\n", - "Prova un senso di desolazione o infelicità.\n", - "Possiede originalità e produce costantemente idee innovative.\n", - "Mantiene in genere un atteggiamento tranquillo.\n", - "Mostra generosità e servizio agli altri senza essere egocentrico.\n", - "Tende a fare qualche svista di tanto in tanto.\n", - "Mostra serenità ed è abile nel gestire situazioni di tensione.\n", - "Mostra interesse nell'esplorare una vasta gamma di argomenti.\n", - "È pieno di vivacità.\n", - "Coinvolge in alterchi con gli altri.\n", - "È visto come una persona seria e affidabile sul posto di lavoro.\n", - "A volte può essere in uno stato di forte tensione o nervosismo.\n", - "Mostra ingegno e si impegna in una profonda analisi cognitiva.\n", - "Produce una notevole quantità di entusiasmo e interesse.\n", - "Spesso mostra una tendenza a perdonare gli errori\n", - "Di solito ha difficoltà a tenere le cose in ordine.\n", - "Regolarmente tormentato dalle preoccupazioni.\n", - "Ha un'immaginazione vivida ed energica.\n", - "Propenso a essere taciturno.\n", - "In genere ripone fiducia nelle altre persone.\n", - "Di solito mostra una propensione all'inattività.\n", - "Mostra stabilità emotiva e non è soggetto a disturbi emotivi improvvisi.\n", - "Mostra un talento nel creare idee originali.\n", - "Dimostra un carattere audace e autorevole.\n", - "Potrebbe sembrare privo di emozioni e remoto.\n", - "Non smette di funzionare finché l'attività non viene completata.\n", - "Può sperimentare diversi stati emotivi.\n", - "Assegna significato agli incontri con l'arte e l'estetica.\n", - "Può essere occasionalmente timido o riservato.\n", - "Dimostra compassione e cortesia verso quasi tutti gli individui.\n", - "Porta a termine i compiti in modo produttivo.\n", - "Mantiene la tranquillità durante i periodi di forte stress.\n", - "Preferisce impegnarsi in lavori che abbiano una routine regolare.\n", - "Mostra un carattere estroverso e si sente a suo agio nell'interagire con le persone.\n", - "A volte assume comportamenti irrispettosi verso gli altri.\n", - "Stabilisce gli ordini del giorno e invariabilmente li completa.\n", - "Di solito si sente a disagio rapidamente.\n", - "Apprezza la riflessione e l'interazione con i pensieri.\n", - "Mantiene un impegno limitato negli sforzi artistici.\n", - "Apprezza gli sforzi congiunti con i colleghi.\n", - "Fatica a rimanere attento ai compiti.\n", - "Dimostra conoscenza avanzata e raffinatezza nell'arte, nella musica o nella letteratura.'''\n", - "\n", - "it_system_prompt = '''Sei un assistente utile.'''\n", - "it_scales = '''fortemente in disaccordo\n", - "un po' in disaccordo\n", - "Nè d'accordo né in disaccordo\n", - "poco d'accordo\n", - "fortemente d'accordo'''" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Ar Arabic\n", - "ar_questions = '''يميل إلى الدردشة كثيرًا.\n", - "كثيرًا ما يكتشف الأخطاء أو العيوب في الآخرين.\n", - "ينجز المهام بطريقة مفصلة وكاملة.\n", - "يشعر بالحزن أو التعاسة.\n", - "يمتلك الأصالة وينتج أفكارًا مبتكرة باستمرار.\n", - "عادة ما يحافظ على سلوك هادئ.\n", - "يُظهر الكرم وخدمة الآخرين دون أن يكون أنانيًا.\n", - "يميل إلى القيام ببعض الأخطاء بين الحين والآخر.\n", - "يظهر الهدوء ويتمتع بمهارة في إدارة المواقف المتوترة.\n", - "يُظهر اهتمامًا باستكشاف مجموعة متنوعة من المواضيع.\n", - "يفيض بالحيوية.\n", - "يدخل في مشاجرات مع الآخرين.\n", - "يُنظر إليه على أنه فرد ثابت وموثوق في مكان العمل.\n", - "قد يكون في بعض الأحيان في حالة من التوتر الشديد أو الأعصاب.\n", - "يظهر البراعة ويشارك في التحليل المعرفي العميق.\n", - "تنتج قدرًا كبيرًا من الشغف والاهتمام.\n", - "غالبًا ما يظهر ميلًا إلى العفو عن الأخطاء\n", - "عادة ما يجد صعوبة في حفظ الأمور في مكانها الصحيح.\n", - "تعاني بانتظام من المخاوف.\n", - "يتمتع بخيال حيوي وحيوي.\n", - "يميل إلى أن يكون قليل الكلام.\n", - "عادة ما يضع الثقة في الآخرين.\n", - "عادة ما يظهر الميل إلى الخمول.\n", - "يُظهر ثباتًا عاطفيًا وليس عرضة للاضطرابات العاطفية المفاجئة.\n", - "يُظهر موهبة خلق الأفكار الأصلية.\n", - "يظهر شخصية جريئة وموثوقة.\n", - "قد يبدو الأمر غير عاطفي وبعيد.\n", - "لا يتوقف عن العمل حتى يتم الانتهاء من المهمة.\n", - "قد يواجه حالات عاطفية مختلفة.\n", - "يعطي أهمية للقاءات مع الفن وعلم الجمال.\n", - "يمكن أن يكون خجولًا أو متحفظًا في بعض الأحيان.\n", - "يُظهر التعاطف واللطف مع جميع الأفراد تقريبًا.\n", - "ينجز المهام بشكل منتج.\n", - "يحافظ على الهدوء خلال فترات التوتر الشديد.\n", - "يفضل الانخراط في الوظائف التي لها روتين منتظم.\n", - "يظهر شخصية منفتحة ويتفاعل بشكل مريح مع الناس.\n", - "في بعض الأحيان ينخرط في سلوك غير محترم تجاه الآخرين.\n", - "يضع جداول الأعمال ويكملها دائمًا.\n", - "عادة ما يشعر بعدم الارتياح بسرعة.\n", - "يقدر التأمل والتفاعل مع الأفكار.\n", - "يحافظ على مشاركة محدودة في المساعي الفنية.\n", - "يقدر الجهود المشتركة مع الزملاء.\n", - "يواجه صعوبة في البقاء منتبهًا في المهام.\n", - "يُظهر المعرفة المتقدمة والصقل في الفن أو الموسيقى أو الأدب.'''\n", - "ar_system_prompt = '''أنت مساعد مفيد.'''\n", - "ar_scales = '''لا أوافق بشدة\n", - "لا أتفق قليلا\n", - "لا أوافق ولا أرفض\n", - "أتفق قليلا\n", - "موافق بشدة'''" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Ru Russian\n", - "ru_questions = '''Склонен много болтать.\n", - "Часто замечает ошибки и недостатки других.\n", - "Подробно и полно выполняет поставленные задачи.\n", - "Испытывает чувство опустошения или несчастья.\n", - "Обладает оригинальностью и последовательно выдвигает новаторские идеи.\n", - "Обычно ведет себя тихо.\n", - "Проявляет щедрость и служение другим, не будучи эгоцентричным.\n", - "Склонен время от времени допускать какие-то оплошности.\n", - "Демонстрирует спокойствие и умеет справляться с напряженными ситуациями.\n", - "Проявляет интерес к изучению разнообразных тем.\n", - "Разливается живостью.\n", - "Вступает в ссоры с другими.\n", - "На рабочем месте воспринимается как стойкий и надежный человек.\n", - "Иногда может находиться в состоянии повышенного напряжения или нервозности.\n", - "Проявляет изобретательность и занимается глубоким когнитивным анализом.\n", - "Вызывает значительное рвение и интерес.\n", - "Часто проявляет склонность прощать ошибки.\n", - "Обычно ему трудно поддерживать порядок.\n", - "Регулярно терзают переживания.\n", - "Обладает ярким и энергичным воображением.\n", - "Склонен к молчаливости.\n", - "Обычно доверяет другим людям.\n", - "Обычно проявляет склонность к бездействию.\n", - "Проявляет эмоциональную уравновешенность и не склонен к внезапным эмоциональным потрясениям.\n", - "Проявляет талант к созданию оригинальных идей.\n", - "Демонстрирует смелый и авторитетный характер.\n", - "Может показаться бесстрастным и отстраненным.\n", - "Не прекращает работу, пока задача не будет завершена.\n", - "Может испытывать различные эмоциональные состояния.\n", - "Придает значение встречам с искусством и эстетикой.\n", - "Иногда может быть застенчивым или сдержанным.\n", - "Демонстрирует сострадание и вежливость практически ко всем людям.\n", - "Продуктивно выполняет поставленные задачи.\n", - "Сохраняет спокойствие в периоды сильного стресса.\n", - "Предпочитает заниматься работой, которая имеет регулярный распорядок дня.\n", - "Обладает общительным характером и комфортно общается с людьми.\n", - "Иногда проявляет неуважительное поведение по отношению к другим.\n", - "Устанавливает планы и неизменно выполняет их.\n", - "Обычно быстро чувствует себя неловко.\n", - "Ценит размышления и взаимодействие с мыслями.\n", - "Сохраняет ограниченное участие в творческой деятельности.\n", - "Ценит совместные усилия с коллегами.\n", - "Трудно сохранять внимание при выполнении задач.\n", - "Демонстрирует передовые знания и изысканность в искусстве, музыке или литературе.'''\n", - "ru_system_prompt = '''Вы полезный помощник.'''\n", - "ru_scales = '''категорически не согласен\n", - "немного не согласен\n", - "Ни согласен, ни несогласен\n", - "немного согласен\n", - "полностью согласен'''" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Ja Japanese\n", - "ja_questions = '''よくおしゃべりする傾向があります。\n", - "他人の間違いや欠点を見つけることがよくあります。\n", - "詳細かつ完全な方法でタスクを実行します。\n", - "孤独感や不幸感を経験します。\n", - "オリジナリティがあり、常に革新的なアイデアを生み出します。\n", - "基本的には物静かな態度を保ちます。\n", - "自己中心的になることなく、他人に寛大さと奉仕を示します。\n", - "時々見落としをする傾向があります。\n", - "冷静さを示し、緊迫した状況を管理するのが得意です。\n", - "さまざまなトピックを探求することに興味を示します。\n", - "活気に溢れています。\n", - "他人と口論になる。\n", - "職場では堅実で信頼できる人物として見られています。\n", - "時には極度の緊張や神経質な状態になることもあります。\n", - "創意工夫を発揮し、深い認知分析に取り組みます。\n", - "かなりの熱意と関心を生み出します。\n", - "間違いを許す傾向があることが多い\n", - "通常、物事を整理整頓するのが困難です。\n", - "定期的に心配事に悩まされる。\n", - "鮮やかでエネルギッシュな想像力を持っています。\n", - "寡黙になりがち。\n", - "通常、他人を信頼します。\n", - "通常、非活動的な傾向を示します。\n", - "精神的に安定しており、突然感情が乱れる傾向はありません。\n", - "独創的なアイデアを生み出す才能を発揮します。\n", - "大胆で権威ある性格を示します。\n", - "無感情でよそよそしいと思われるかもしれません。\n", - "タスクが完了するまで作業は停止しません。\n", - "さまざまな感情状態を経験する可能性があります。\n", - "芸術や美学との出会いに意義を与える。\n", - "時々照れたり、遠慮したりすることもあります。\n", - "ほぼすべての人に対して思いやりと礼儀正しさを示します。\n", - "タスクを生産的に遂行します。\n", - "ストレスの高い時期でも平穏を保ちます。\n", - "規則正しいルーチンのある仕事に従事することを好みます。\n", - "社交的な性格を示し、人々との交流を快適にします。\n", - "時には他人に対して失礼な態度をとることもあります。\n", - "議題を設定し、必ずそれを完了します。\n", - "通常、急速に不安を感じます。\n", - "思索したり、考えと対話したりすることに感謝します。\n", - "芸術活動への参加は限定的に維持されます。\n", - "同僚との共同の努力に感謝します。\n", - "タスクに注意を払い続けるのに苦労している。\n", - "芸術、音楽、文学のいずれかにおいて高度な知識と洗練を実証します。'''\n", - "ja_system_prompt = '''あなたは役に立つアシスタントです。'''\n", - "ja_scales = '''強く同意しない\n", - "少し同意しません\n", - "同意も反対もしない\n", - "少し同意\n", - "強く同意します'''" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "def add_scales(language, system_prompt, scales_str):\n", - " with open('questionnaires.json', 'r') as f:\n", - " questionnaire = json.load(f)\n", - " \n", - " scales = {}\n", - " for i, q in enumerate(scales_str.split('\\n')):\n", - " scales[str(i+1)] = q\n", - " \n", - " if language not in questionnaire[\"BFI\"][\"questions\"]:\n", - " questionnaire[\"BFI\"][\"questions\"][language] = {}\n", - " \n", - " questionnaire[\"BFI\"][\"questions\"][language][\"system_prompt\"] = system_prompt\n", - " questionnaire[\"BFI\"][\"questions\"][language][\"scales\"] = scales\n", - " \n", - " with open('questionnaires.json', 'w', encoding='utf-8') as f:\n", - " json.dump(questionnaire, f, ensure_ascii=False, indent=4)\n", - " \n", - "\n", - "def add_questions(language, version, questions):\n", - " with open('questionnaires.json', 'r') as f:\n", - " questionnaire = json.load(f)\n", - " \n", - " statements = {}\n", - " for i, q in enumerate(questions.split('\\n')):\n", - " statements[str(i+1)] = q\n", - " \n", - " if language not in questionnaire[\"BFI\"][\"questions\"]:\n", - " questionnaire[\"BFI\"][\"questions\"][language] = {}\n", - " if version not in questionnaire[\"BFI\"][\"questions\"][language]:\n", - " questionnaire[\"BFI\"][\"questions\"][language][version] = {}\n", - "\n", - " questionnaire[\"BFI\"][\"questions\"][language][version][\"statements\"] = statements\n", - "\n", - " with open('questionnaires.json', 'w', encoding='utf-8') as f:\n", - " json.dump(questionnaire, f, ensure_ascii=False, indent=4)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# add_scales('en', en_system_prompt, en_scales)\n", - "# add_questions('en', version, en_questions)\n", - "\n", - "add_scales('zh', zh_system_prompt, zh_scales)\n", - "add_questions('zh', version, zh_questions)\n", - "\n", - "add_scales('ko', ko_system_prompt, ko_scales)\n", - "add_questions('ko', version, ko_questions)\n", - "\n", - "add_scales('es', es_system_prompt, es_scales)\n", - "add_questions('es', version, es_questions)\n", - "\n", - "add_scales('fr', fr_system_prompt, fr_scales)\n", - "add_questions('fr', version, fr_questions)\n", - "\n", - "add_scales('de', de_system_prompt, de_scales)\n", - "add_questions('de', version, de_questions)\n", - "\n", - "add_scales('it', it_system_prompt, it_scales)\n", - "add_questions('it', version, it_questions)\n", - "\n", - "add_scales('ar', ar_system_prompt, ar_scales)\n", - "add_questions('ar', version, ar_questions)\n", - "\n", - "add_scales('ru', ru_system_prompt, ru_scales)\n", - "add_questions('ru', version, ru_questions)\n", - "\n", - "add_scales('ja', ja_system_prompt, ja_scales)\n", - "add_questions('ja', version, ja_questions)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "env_python", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/dataset/add_scales.ipynb b/dataset/add_scales.ipynb new file mode 100644 index 0000000..ad4726e --- /dev/null +++ b/dataset/add_scales.ipynb @@ -0,0 +1,219 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# En English\n", + "en_system_prompt = 'You are a helpful assistant.'\n", + "en_scales = '''strongly disagree\n", + "a little disagree\n", + "neither agree nor disagree\n", + "little agree\n", + "strongly agree'''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Zh Chinese\n", + "zh_system_prompt = '你是一个有用的助手。'\n", + "zh_scales = '''强烈反对\n", + "有点不同意\n", + "既不同意也不反对\n", + "很少同意\n", + "非常同意'''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Ko Korean\n", + "ko_system_prompt = '당신은 도움이 되는 조수입니다.'\n", + "ko_scales = '''강하게 동의\n", + "약간 동의하지 않는다\n", + "동의하지도 반대하지도 않는다\n", + "거의 동의하지 않는다\n", + "강력히 동의한다'''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Es Spanish\n", + "es_system_prompt = 'Eres un asistente útil.'\n", + "es_scales = '''muy en desacuerdo\n", + "un poco en desacuerdo\n", + "ni de acuerdo ni en desacuerdo\n", + "poco de acuerdo\n", + "Totalmente de acuerdo'''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Fr French\n", + "fr_system_prompt = 'Vous êtes un assistant utile.'\n", + "fr_scales = '''fortement en désaccord\n", + "un peu en désaccord\n", + "Ni d'accord ni en désaccord\n", + "peu d'accord\n", + "tout à fait d'accord'''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# De Deutsch\n", + "de_system_prompt = '''Sie sind ein hilfreicher Assistent.'''\n", + "de_scales = '''entschieden widersprechen\n", + "bin ein wenig anderer Meinung\n", + "weder zustimmen noch abstreiten\n", + "stimme kaum zu\n", + "stimme voll und ganz zu'''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# It Italian\n", + "it_system_prompt = '''Sei un assistente utile.'''\n", + "it_scales = '''fortemente in disaccordo\n", + "un po' in disaccordo\n", + "Nè d'accordo né in disaccordo\n", + "poco d'accordo\n", + "fortemente d'accordo'''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Ar Arabic\n", + "ar_system_prompt = '''أنت مساعد مفيد.'''\n", + "ar_scales = '''لا أوافق بشدة\n", + "لا أتفق قليلا\n", + "لا أوافق ولا أرفض\n", + "أتفق قليلا\n", + "موافق بشدة'''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Ru Russian\n", + "ru_system_prompt = '''Вы полезный помощник.'''\n", + "ru_scales = '''категорически не согласен\n", + "немного не согласен\n", + "Ни согласен, ни несогласен\n", + "немного согласен\n", + "полностью согласен'''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Ja Japanese\n", + "ja_system_prompt = '''あなたは役に立つアシスタントです。'''\n", + "ja_scales = '''強く同意しない\n", + "少し同意しません\n", + "同意も反対もしない\n", + "少し同意\n", + "強く同意します'''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "def add_scales(language, system_prompt, scales_str):\n", + " with open('questionnaires.json', 'r') as f:\n", + " questionnaire = json.load(f)\n", + " \n", + " scales = {}\n", + " for i, q in enumerate(scales_str.split('\\n')):\n", + " scales[str(i+1)] = q\n", + " \n", + " if language not in questionnaire[\"BFI\"][\"questions\"]:\n", + " questionnaire[\"BFI\"][\"questions\"][language] = {}\n", + " \n", + " questionnaire[\"BFI\"][\"questions\"][language][\"system_prompt\"] = system_prompt\n", + " questionnaire[\"BFI\"][\"questions\"][language][\"scales\"] = scales\n", + " \n", + " with open('questionnaires.json', 'w', encoding='utf-8') as f:\n", + " json.dump(questionnaire, f, ensure_ascii=False, indent=4)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "add_scales('en', en_system_prompt, en_scales)\n", + "add_scales('zh', zh_system_prompt, zh_scales)\n", + "add_scales('ko', ko_system_prompt, ko_scales)\n", + "add_scales('es', es_system_prompt, es_scales)\n", + "add_scales('fr', fr_system_prompt, fr_scales)\n", + "add_scales('de', de_system_prompt, de_scales)\n", + "add_scales('it', it_system_prompt, it_scales)\n", + "add_scales('ar', ar_system_prompt, ar_scales)\n", + "add_scales('ru', ru_system_prompt, ru_scales)\n", + "add_scales('ja', ja_system_prompt, ja_scales)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "env_python", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/prompt_template/ar/rephrase_ar.txt b/dataset/rephrase_prompt/rephrase_Ar.txt similarity index 97% rename from prompt_template/ar/rephrase_ar.txt rename to dataset/rephrase_prompt/rephrase_Ar.txt index 5d3eb11..796fb5b 100644 --- a/prompt_template/ar/rephrase_ar.txt +++ b/dataset/rephrase_prompt/rephrase_Ar.txt @@ -1,4 +1,4 @@ -rephrase_ar.txt +rephrase_Ar.txt !! -- prompt !! -- existed statements diff --git a/prompt_template/de/rephrase_de.txt b/dataset/rephrase_prompt/rephrase_De.txt similarity index 96% rename from prompt_template/de/rephrase_de.txt rename to dataset/rephrase_prompt/rephrase_De.txt index 85ef4e0..a632b82 100644 --- a/prompt_template/de/rephrase_de.txt +++ b/dataset/rephrase_prompt/rephrase_De.txt @@ -1,4 +1,4 @@ -rephrase_de.txt +rephrase_De.txt !! -- prompt !! -- existed statements diff --git a/prompt_template/en/rephrase_en.txt b/dataset/rephrase_prompt/rephrase_En.txt similarity index 96% rename from prompt_template/en/rephrase_en.txt rename to dataset/rephrase_prompt/rephrase_En.txt index 8e23389..d01b71d 100644 --- a/prompt_template/en/rephrase_en.txt +++ b/dataset/rephrase_prompt/rephrase_En.txt @@ -1,4 +1,4 @@ -rephrase_en.txt +rephrase_En.txt !! -- prompt !! -- existed statements diff --git a/prompt_template/es/rephrase_es.txt b/dataset/rephrase_prompt/rephrase_Es.txt similarity index 96% rename from prompt_template/es/rephrase_es.txt rename to dataset/rephrase_prompt/rephrase_Es.txt index c55adc8..31977df 100644 --- a/prompt_template/es/rephrase_es.txt +++ b/dataset/rephrase_prompt/rephrase_Es.txt @@ -1,4 +1,4 @@ -rephrase_es.txt +rephrase_Es.txt !! -- prompt !! -- existed statements diff --git a/prompt_template/fr/rephrase_fr.txt b/dataset/rephrase_prompt/rephrase_Fr.txt similarity index 96% rename from prompt_template/fr/rephrase_fr.txt rename to dataset/rephrase_prompt/rephrase_Fr.txt index 1ef7b42..ac31516 100644 --- a/prompt_template/fr/rephrase_fr.txt +++ b/dataset/rephrase_prompt/rephrase_Fr.txt @@ -1,4 +1,4 @@ -rephrase_fr.txt +rephrase_Fr.txt !! -- prompt !! -- existed statements diff --git a/prompt_template/it/rephrase_it.txt b/dataset/rephrase_prompt/rephrase_It.txt similarity index 96% rename from prompt_template/it/rephrase_it.txt rename to dataset/rephrase_prompt/rephrase_It.txt index 2a46b1f..3ead2f9 100644 --- a/prompt_template/it/rephrase_it.txt +++ b/dataset/rephrase_prompt/rephrase_It.txt @@ -1,4 +1,4 @@ -rephrase_it.txt +rephrase_It.txt !! -- prompt !! -- existed statements diff --git a/prompt_template/ja/rephrase_ja.txt b/dataset/rephrase_prompt/rephrase_Ja.txt similarity index 96% rename from prompt_template/ja/rephrase_ja.txt rename to dataset/rephrase_prompt/rephrase_Ja.txt index 150208d..3b16933 100644 --- a/prompt_template/ja/rephrase_ja.txt +++ b/dataset/rephrase_prompt/rephrase_Ja.txt @@ -1,4 +1,4 @@ -rephrase_jp.txt +rephrase_Ja.txt !! -- prompt !! -- existed statements diff --git a/prompt_template/ko/rephrase_ko.txt b/dataset/rephrase_prompt/rephrase_Ko.txt similarity index 96% rename from prompt_template/ko/rephrase_ko.txt rename to dataset/rephrase_prompt/rephrase_Ko.txt index 761d437..e5a125b 100644 --- a/prompt_template/ko/rephrase_ko.txt +++ b/dataset/rephrase_prompt/rephrase_Ko.txt @@ -1,4 +1,4 @@ -rephrase_ko.txt +rephrase_Ko.txt !! -- prompt !! -- existed statements diff --git a/prompt_template/ru/rephrase_ru.txt b/dataset/rephrase_prompt/rephrase_Ru.txt similarity index 97% rename from prompt_template/ru/rephrase_ru.txt rename to dataset/rephrase_prompt/rephrase_Ru.txt index e8c86c2..6f0095d 100644 --- a/prompt_template/ru/rephrase_ru.txt +++ b/dataset/rephrase_prompt/rephrase_Ru.txt @@ -1,4 +1,4 @@ -rephrase_ru.txt +rephrase_Ru.txt !! -- prompt !! -- existed statements diff --git a/prompt_template/zh/rephrase_zh.txt b/dataset/rephrase_prompt/rephrase_Zh.txt similarity index 95% rename from prompt_template/zh/rephrase_zh.txt rename to dataset/rephrase_prompt/rephrase_Zh.txt index d179723..4ff66dd 100644 --- a/prompt_template/zh/rephrase_zh.txt +++ b/dataset/rephrase_prompt/rephrase_Zh.txt @@ -1,4 +1,4 @@ -rephrase_zh.txt +rephrase_Zh.txt !! -- prompt !! -- existed statements diff --git a/global_functions.py b/global_functions.py index 8fe0fc1..a4fb855 100644 --- a/global_functions.py +++ b/global_functions.py @@ -2,9 +2,6 @@ Author: LAM Man Ho (mhlam@link.cuhk.edu.hk) """ import json -import numpy as np -import scipy.stats as stats -from statistics import mean # Convert number to specified label def convert_number(label, number): @@ -96,43 +93,29 @@ def get_questionnaire(name): except FileNotFoundError: raise FileNotFoundError("The 'questionnaires.json' file does not exist.") -# Hypothesis Testing -def hypothesis_testing(sample1, sample2, significant_level=0.001): - mean1, std1, n1 = np.mean(sample1), np.std(sample1), len(sample1) - mean2, std2, n2 = np.mean(sample2), np.std(sample2), len(sample2) +# Add statements to dataset +def add_statement(qname, language, statements): + with open('dataset/questionnaires.json', 'r') as f: + data = json.load(f) - # Add an epsilon to prevent the zero standard deviarion - epsilon = 1e-8 - std1 += epsilon - std2 += epsilon - - # Perform F-test - if std1 > std2: - f_value = std1 ** 2 / std2 ** 2 - df1, df2 = n1 - 1, n2 - 1 + # If language not exist, create it + if language not in data[qname]["questions"]: + data[qname]["questions"][language] = {} + + # Get the latest version + version_list = list(data[qname]['questions'][language].keys()) + version_list = [item for item in version_list if item.startswith('v') and item[1:].isdigit()] + if version_list: + new_version = f'v{max([int(item[1:]) for item in version_list]) + 1}' else: - f_value = std2 ** 2 / std1 ** 2 - df1, df2 = n2 - 1, n1 - 1 + new_version = 'v1' - p_value = (1 - stats.f.cdf(f_value, df1, df2)) * 2 - equal_var = True if p_value > significant_level else False + if new_version not in data[qname]["questions"][language]: + data[qname]["questions"][language][new_version] = dict() + data[qname]["questions"][language][new_version]["statements"] = dict() - - # Performing T-test - df = n1 + n2 - 2 if equal_var else ((std1**2 / n1 + std2**2 / n2)**2) / ((std1**2 / n1)**2 / (n1 - 1) + (std2**2 / n2)**2 / (n2 - 1)) - t_value, p_value = stats.ttest_ind_from_stats(mean1, std1, n1, mean2, std2, n2, equal_var=equal_var) + for i, s in enumerate(statements): + data[qname]["questions"][language][new_version]["statements"][str(i+1)] = s - if p_value > significant_level: - # return f'- ({(mean1 - mean2):.2f})' - return f'$= {(mean1 - mean2):.2f}_{{{p_value:.2f}}}$' - # return f'= ({(mean1 - mean2):.2f})' - - else: - if t_value > 0: - # return '>' - # return f'> ({(mean1 - mean2):.2f})' - return f'$> {(mean1 - mean2):.2f}_{{{p_value:.2f}}}$' - else: - # return '<' - # return f'< ({(mean1 - mean2):.2f})' - return f'$< {(mean1 - mean2):.2f}_{{{p_value:.2f}}}$' + with open('dataset/questionnaires.json', 'w', encoding='utf-8') as f: + json.dump(data, f, ensure_ascii=False, indent=4) \ No newline at end of file diff --git a/main.py b/main.py index 8656082..894bece 100644 --- a/main.py +++ b/main.py @@ -31,9 +31,12 @@ ''' order = ['r', 'f'] +'=================================================================================================' +# rephrase('BFI', 'En') + '=================================================================================================' questionnaire_name = 'BFI' -name_exp = 'ko' +name_exp = 'save' # Start a server and generate pre-testing cases bfi_test = Server(questionnaire_name, template, version, language, label, order, name_exp=name_exp) @@ -43,4 +46,5 @@ # Run the pre-testing cases bfi_test.run() + '=================================================================================================' \ No newline at end of file diff --git a/rephrase.ipynb b/rephrase.ipynb deleted file mode 100644 index b47c3fa..0000000 --- a/rephrase.ipynb +++ /dev/null @@ -1,77 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from tqdm import tqdm\n", - "from gpt_setting import *\n", - "import json\n", - "import utils\n", - "\n", - "utils.temperature = 1\n", - "utils.delay_time = 2\n", - "language = \"en\"\n", - "\n", - "def rephrase():\n", - " with open('dataset/questionnaires.json', 'r') as dataset:\n", - " data = json.load(dataset)\n", - " statements = data[\"BFI\"][\"questions\"][language][\"v1\"][\"statements\"].items()\n", - " existed_statements = [statement[1] for statement in data[\"BFI\"][\"questions\"][language].items() if statement[0].startswith('v')]\n", - " \n", - " rephrased = []\n", - " for count, statement in tqdm(enumerate(statements)):\n", - " existed_rephrased_statements = [s[\"statements\"][str(count+1)] for s in existed_statements]\n", - " existed_rephrased_str = '\"' + '\", \"'.join(existed_rephrased_statements) + '\"'\n", - " while True:\n", - " with open(f'prompt_template/{language}/rephrase_{language}.txt', 'r') as file:\n", - " _, prompt = file.read().strip().split(\"###\")\n", - " prompt = prompt.replace('!!', statement[1])\n", - " prompt = prompt.replace('!!', existed_rephrased_str)\n", - " inputs = [\n", - " {\"role\": \"system\", \"content\": data[\"BFI\"][\"questions\"][language][\"system_prompt\"]},\n", - " {\"role\": \"user\", \"content\": prompt}\n", - " ]\n", - " print(inputs)\n", - " try:\n", - " response = chat('gpt-4', inputs).strip()\n", - " print(response)\n", - " parsered_responses = json.loads(response)\n", - " parsered_responses = parsered_responses[\"sentence\"]\n", - " break\n", - " except:\n", - " pass\n", - " rephrased.append(parsered_responses)\n", - " with open('rephrased_q3.txt', 'a') as f:\n", - " f.write(f'{parsered_responses}\\n')\n", - " \n", - " return\n", - "\n", - "rephrase()\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "env_python", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/server.py b/server.py index 2fefab5..f21cfc0 100644 --- a/server.py +++ b/server.py @@ -4,7 +4,6 @@ import random import json import shutil -import numpy as np from tqdm import tqdm from statistics import mean @@ -36,6 +35,45 @@ def load(file_path, name_exp=None): return Server(**loaded_data["meta"], data=loaded_data["data"]) +''' +rephrase(): Call GPT to rephrase the original statements. +''' +def rephrase(questionnaire_name, language, savename=None): + if savename is None: + savename = f'rephrased_{language}' + + with open('dataset/questionnaires.json', 'r') as dataset: + data = json.load(dataset) + questionnaire = data[questionnaire_name] + + statements = questionnaire["questions"][language]["v1"]["statements"].items() + existed_statements = [statement[1] for statement in questionnaire["questions"][language].items() if statement[0].startswith('v')] + + rephrased = [] + for count, statement in tqdm(enumerate(statements)): + existed_rephrased_statements = [s["statements"][str(count+1)] for s in existed_statements] + existed_rephrased_str = '"' + '", "'.join(existed_rephrased_statements) + '"' + while True: + with open(f'dataset/rephrase_prompt/rephrase_{language}.txt', 'r') as file: + _, prompt = file.read().strip().split("###") + prompt = prompt.replace('!!', statement[1]) + prompt = prompt.replace('!!', existed_rephrased_str) + inputs = [ + {"role": "system", "content": questionnaire["questions"][language]["system_prompt"]}, + {"role": "user", "content": prompt} + ] + try: + response = chat('gpt-4', inputs).strip() + parsered_responses = json.loads(response) + parsered_responses = parsered_responses["sentence"] + break + except: + pass + rephrased.append(parsered_responses) + + add_statement(questionnaire_name, language, rephrased) + + class Server: def __init__(self, questionnaire_name, template, version, language, label, order, name_exp='save', basis=None, pending_tests=None, data=[]): self.name_exp = name_exp