-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ca89a48
commit 80ef914
Showing
71 changed files
with
4,644 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
""" | ||
Author: LAM Man Ho ([email protected]) | ||
""" | ||
import json | ||
import numpy as np | ||
import scipy.stats as stats | ||
from statistics import mean | ||
|
||
# Convert number to specified label | ||
def convert_number(label, number): | ||
if label.startswith('n'): | ||
return number | ||
elif label.startswith('a'): | ||
lower_case = True if label.endswith('l') else False | ||
return num_to_alphabet(number, lower_case) | ||
elif label.startswith('r'): | ||
lower_case = True if label.endswith('l') else False | ||
return num_to_roman(number, lower_case) | ||
else: | ||
raise ValueError("Label wrong") | ||
|
||
# Convert label back to number | ||
def convert_symbol(label, symbol): | ||
if label.startswith('n'): | ||
return int(symbol) | ||
elif label.startswith('a'): | ||
lower_case = True if label.endswith('l') else False | ||
return int(alphabet_to_num(symbol)) | ||
elif label.startswith('r'): | ||
lower_case = True if label.endswith('l') else False | ||
return int(roman_to_num(symbol)) | ||
|
||
# Convert number to Roman number | ||
def num_to_roman(num, lower_case=False): | ||
val = [1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1] | ||
syb = ["M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", "I"] | ||
roman_num = '' | ||
i = 0 | ||
while num > 0: | ||
for _ in range(num // val[i]): | ||
roman_num += syb[i] | ||
num -= val[i] | ||
i += 1 | ||
return roman_num.lower() if lower_case else roman_num | ||
|
||
# Convert number to alphabet(s) | ||
def num_to_alphabet(num, lower_case=False): | ||
alphabet = '' | ||
while num > 0: | ||
remainder = (num - 1) % 26 | ||
alphabet = chr(65 + remainder) + alphabet | ||
num = (num - 1) // 26 | ||
return alphabet.lower() if lower_case else alphabet | ||
|
||
# Convert Roman number to number | ||
def roman_to_num(roman): | ||
roman = roman.upper() | ||
val = [1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1] | ||
syb = ["M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", "I"] | ||
result = 0 | ||
i = 0 | ||
while i < len(roman): | ||
for v, s in zip(val, syb): | ||
if roman.startswith(s, i): | ||
result += v | ||
i += len(s) | ||
break | ||
return result | ||
|
||
# Convert alphabet(s) to number | ||
def alphabet_to_num(alphabet): | ||
result = 0 | ||
for char in alphabet.upper(): | ||
result = result * 26 + ord(char) - ord('A') + 1 | ||
return result | ||
|
||
# Get Prompt Template | ||
def get_prompt(filename, inputs): | ||
with open(filename, 'r') as file: | ||
generated_prompt = file.read().split("<commentblockmarker>###</commentblockmarker>")[1].strip() | ||
for index, item in enumerate(inputs): | ||
key = f"!<INPUT {index}>!" | ||
generated_prompt = generated_prompt.replace(key, str(item)) | ||
return generated_prompt | ||
|
||
# Get questionnaire | ||
def get_questionnaire(name): | ||
try: | ||
with open('dataset/questionnaires.json') as dataset: | ||
data = json.load(dataset) | ||
try: | ||
questionnaire = data[name] | ||
return questionnaire | ||
except ValueError: | ||
raise ValueError("Questionnaire not found.") | ||
except FileNotFoundError: | ||
raise FileNotFoundError("The 'questionnaires.json' file does not exist.") | ||
|
||
# Hypothesis Testing | ||
def hypothesis_testing(sample1, sample2, significant_level=0.001): | ||
mean1, std1, n1 = np.mean(sample1), np.std(sample1), len(sample1) | ||
mean2, std2, n2 = np.mean(sample2), np.std(sample2), len(sample2) | ||
|
||
# Add an epsilon to prevent the zero standard deviarion | ||
epsilon = 1e-8 | ||
std1 += epsilon | ||
std2 += epsilon | ||
|
||
# Perform F-test | ||
if std1 > std2: | ||
f_value = std1 ** 2 / std2 ** 2 | ||
df1, df2 = n1 - 1, n2 - 1 | ||
else: | ||
f_value = std2 ** 2 / std1 ** 2 | ||
df1, df2 = n2 - 1, n1 - 1 | ||
|
||
p_value = (1 - stats.f.cdf(f_value, df1, df2)) * 2 | ||
equal_var = True if p_value > significant_level else False | ||
|
||
|
||
# Performing T-test | ||
df = n1 + n2 - 2 if equal_var else ((std1**2 / n1 + std2**2 / n2)**2) / ((std1**2 / n1)**2 / (n1 - 1) + (std2**2 / n2)**2 / (n2 - 1)) | ||
t_value, p_value = stats.ttest_ind_from_stats(mean1, std1, n1, mean2, std2, n2, equal_var=equal_var) | ||
|
||
if p_value > significant_level: | ||
# return f'- ({(mean1 - mean2):.2f})' | ||
return f'$= {(mean1 - mean2):.2f}_{{{p_value:.2f}}}$' | ||
# return f'<font color="F54747">= ({(mean1 - mean2):.2f})</font>' | ||
|
||
else: | ||
if t_value > 0: | ||
# return '>' | ||
# return f'> ({(mean1 - mean2):.2f})' | ||
return f'$> {(mean1 - mean2):.2f}_{{{p_value:.2f}}}$' | ||
else: | ||
# return '<' | ||
# return f'< ({(mean1 - mean2):.2f})' | ||
return f'$< {(mean1 - mean2):.2f}_{{{p_value:.2f}}}$' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
from tenacity import ( | ||
retry, | ||
stop_after_attempt, | ||
wait_random_exponential, | ||
) | ||
import openai | ||
import time | ||
import os | ||
import random | ||
|
||
from utils import * | ||
|
||
openai.api_key = api_key | ||
|
||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) | ||
def chat( | ||
model, # gpt-4, gpt-4-0314, gpt-4-32k, gpt-4-32k-0314, gpt-3.5-turbo, gpt-3.5-turbo-0301 | ||
messages, # [{"role": "system"/"user"/"assistant", "content": "Hello!", "name": "example"}] | ||
temperature=temperature, # [0, 2]: Lower values -> more focused and deterministic; Higher values -> more random. | ||
n=1, # Chat completion choices to generate for each input message. | ||
max_tokens=1024, # The maximum number of tokens to generate in the chat completion. | ||
delay=delay_time # Seconds to sleep after each request. | ||
): | ||
time.sleep(delay) | ||
|
||
response = openai.ChatCompletion.create( | ||
model=model, | ||
messages=messages, | ||
temperature=temperature, | ||
n=n, | ||
max_tokens=max_tokens | ||
) | ||
|
||
if n == 1: | ||
return response['choices'][0]['message']['content'] | ||
else: | ||
return [i['message']['content'] for i in response['choices']] | ||
|
||
|
||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) | ||
def completion( | ||
model, # text-davinci-003, text-davinci-002, text-curie-001, text-babbage-001, text-ada-001 | ||
prompt, # The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays. | ||
temperature=temperature, # [0, 2]: Lower values -> more focused and deterministic; Higher values -> more random. | ||
n=1, # Completions to generate for each prompt. | ||
max_tokens=1024, # The maximum number of tokens to generate in the chat completion. | ||
delay=delay_time # Seconds to sleep after each request. | ||
): | ||
time.sleep(delay) | ||
|
||
response = openai.Completion.create( | ||
model=model, | ||
prompt=prompt, | ||
temperature=temperature, | ||
n=n, | ||
max_tokens=max_tokens | ||
) | ||
|
||
if n == 1: | ||
return response['choices'][0]['text'] | ||
else: | ||
response = response['choices'] | ||
response.sort(key=lambda x: x['index']) | ||
return [i['text'] for i in response['choices']] | ||
|
||
def print_prompt(inputs, response): | ||
os.makedirs("records", exist_ok=True) | ||
with open(f"records/records.txt", 'a') as f: | ||
f.write(f"{inputs}\n----\n") | ||
f.write(f"{response}\n====\n") | ||
return | ||
|
||
def gpt_request(model, inputs): | ||
if model == 'text-davinci-003': | ||
response = completion(model, inputs).strip() | ||
print_prompt(inputs, response) | ||
return response | ||
elif model in ['gpt-3.5-turbo', 'gpt-4']: | ||
response = chat(model, inputs).strip() | ||
print_prompt(inputs, response) | ||
return response | ||
else: | ||
raise ValueError("The model is not supported or does not exist.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
from server import * | ||
|
||
''' | ||
Prompt Template | ||
''' | ||
template = ['t1','t2','t3','t4','t5'] | ||
|
||
''' | ||
Question Version | ||
''' | ||
version = ['v1','v2','v3','v4','v5'] | ||
|
||
''' | ||
Language: | ||
En: English, Zh: Simplified Chinese, Ko: Korean, Es: Spanish, Fr: France, | ||
De: Deutsch, It: Italian, Ar: Arabic, Ru: Russian, Ja: Japanese | ||
''' | ||
language = ['Zh', 'Ko', 'Es', 'Fr', 'De', 'It', 'Ar', 'Ru', 'Ja'] | ||
|
||
''' | ||
Label: | ||
n: Arabic Numeral | ||
al: Lowercase Latin, au: Uppercase Latin | ||
rl: Lowercase Roman, ru: Uppercase Roman | ||
''' | ||
label = ['n', 'al', 'au', 'rl', 'ru'] | ||
|
||
''' | ||
Order: | ||
f: Ascending, r: Descending | ||
''' | ||
order = ['r', 'f'] | ||
|
||
'=================================================================================================' | ||
questionnaire_name = 'BFI' | ||
name_exp = 'ko' | ||
|
||
# Start a server and generate pre-testing cases | ||
bfi_test = Server(questionnaire_name, template, version, language, label, order, name_exp=name_exp) | ||
|
||
# Load and continue a test | ||
# bfi_test = load('<filename>', '<new-filename>') | ||
|
||
# Run the pre-testing cases | ||
bfi_test.run() | ||
'=================================================================================================' |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
bfi_ar_t1.txt | ||
|
||
!<INPUT 0>! -- scale range min | ||
!<INPUT 1>! -- scale range max | ||
!<INPUT 2>! -- level description | ||
!<INPUT 3>! -- statements | ||
|
||
<commentblockmarker>###</commentblockmarker> | ||
|
||
يمكنك الرد من !<INPUT 0>! إلى !<INPUT 1>! فقط في العبارات التالية. فيما يلي عدد من الخصائص التي قد تنطبق عليك أو لا تنطبق عليك. يرجى الإشارة إلى مدى موافقتك أو عدم موافقتك على هذا البيان. !<INPUT 2>! . فيما يلي العبارات، يرجى تسجيلها واحدة تلو الأخرى: | ||
!<INPUT 3>! | ||
يرجى إخراج درجاتك بتنسيق JSON التالي: '{"<مؤشر البيان>": "<نتيجة>"}'. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
bfi_ar_t2.txt | ||
|
||
!<INPUT 0>! -- scale range min | ||
!<INPUT 1>! -- scale range max | ||
!<INPUT 2>! -- level description | ||
!<INPUT 3>! -- statements | ||
|
||
<commentblockmarker>###</commentblockmarker> | ||
|
||
الآن سأصف بإيجاز بعض الأشخاص. من فضلك اقرأ كل وصف وأخبرني عن مدى إعجاب كل شخص بك. اكتب إجابتك باستخدام المقياس التالي: !<INPUT 2>!. | ||
يرجى الإجابة على البيان، حتى لو لم تكن متأكدا تماما من إجابتك. | ||
!<INPUT 3>! | ||
يرجى إخراج درجاتك بتنسيق JSON التالي: '{"<مؤشر البيان>": "<نتيجة>"}'. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
bfi_ar_t3.txt | ||
|
||
!<INPUT 0>! -- scale range min | ||
!<INPUT 1>! -- scale range max | ||
!<INPUT 2>! -- level description | ||
!<INPUT 3>! -- statements | ||
|
||
<commentblockmarker>###</commentblockmarker> | ||
|
||
نظرا لتصريحاتك التالية: | ||
!<INPUT 3>! | ||
يرجى الاختيار من بين الخيارات التالية لتحديد مدى دقة هذا البيان في وصفك. !<INPUT 2>!. | ||
يرجى إخراج درجاتك بتنسيق JSON التالي: '{"<مؤشر البيان>": "<نتيجة>"}'. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
bfi_ar_t4.txt | ||
|
||
!<INPUT 0>! -- scale range min | ||
!<INPUT 1>! -- scale range max | ||
!<INPUT 2>! -- level description | ||
!<INPUT 3>! -- statements | ||
|
||
<commentblockmarker>###</commentblockmarker> | ||
|
||
فيما يلي عدد من الخصائص التي قد تنطبق عليك أو لا تنطبق عليك. يرجى تقييم مستوى موافقتك على مقياس من !<INPUT 0>! إلى !<INPUT 1>!. !<INPUT 2>!. وإليكم العبارات، سجلوها واحدة تلو الأخرى: | ||
!<INPUT 3>! | ||
يرجى إخراج درجاتك بتنسيق JSON التالي: '{"<مؤشر البيان>": "<نتيجة>"}'. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
bfi_ar_t4.txt | ||
|
||
!<INPUT 0>! -- scale range min | ||
!<INPUT 1>! -- scale range max | ||
!<INPUT 2>! -- level description | ||
!<INPUT 3>! -- statements | ||
|
||
<commentblockmarker>###</commentblockmarker> | ||
|
||
فيما يلي عدد من الخصائص التي قد تنطبق عليك أو لا تنطبق عليك. يرجى تقييم مدى موافقتك على مقياس من !<INPUT 0>! إلى !<INPUT 1>!. !<INPUT 2>!. وإليكم العبارات، سجلوها واحدة تلو الأخرى: | ||
!<INPUT 3>! | ||
يرجى إخراج درجاتك بتنسيق JSON التالي: '{"<مؤشر البيان>": "<نتيجة>"}'. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
rephrase_ar.txt | ||
|
||
!<INPUT 0>! -- prompt | ||
!<INPUT 1>! -- existed statements | ||
|
||
<commentblockmarker>###</commentblockmarker> | ||
|
||
الجملة التالية عبارة عن بيان من Big Five Inventory للمشاركين في المقابلات لتقييم أنفسهم. الرجاء إعادة صياغة الجمل التالية: "!<INPUT 0>!". | ||
قم بإخراج الجمل المعاد صياغتها بتنسيق JSON التالي: {"sentence": "<الجملة المعاد صياغتها>"}. | ||
لا تقم بإعادة صياغة الجملة (الجمل) التالية: !<INPUT 1>!. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
bfi_de_t1.txt | ||
|
||
!<INPUT 0>! -- scale range min | ||
!<INPUT 1>! -- scale range max | ||
!<INPUT 2>! -- level description | ||
!<INPUT 3>! -- statements | ||
|
||
<commentblockmarker>###</commentblockmarker> | ||
|
||
In den folgenden Aussagen können Sie nur eine Antwort von !<INPUT 0>! bis !<INPUT 1>! geben. Hier sind eine Reihe von Merkmalen aufgeführt, die möglicherweise auf Sie zutreffen oder auch nicht. Bitte geben Sie an, inwieweit Sie dieser Aussage zustimmen oder nicht. !<INPUT 2>!. Hier sind die Aussagen, bitte bewerten Sie sie einzeln: | ||
!<INPUT 3>! | ||
Bitte geben Sie Ihre Punktzahl im folgenden JSON-Format aus: '{"<Anweisungsindex>": "<Punktzahl>"}'. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
bfi_de_t2.txt | ||
|
||
!<INPUT 0>! -- scale range min | ||
!<INPUT 1>! -- scale range max | ||
!<INPUT 2>! -- level description | ||
!<INPUT 3>! -- statements | ||
|
||
<commentblockmarker>###</commentblockmarker> | ||
|
||
Jetzt werde ich einige Leute kurz beschreiben. Bitte lesen Sie jede Beschreibung und sagen Sie mir, wie sehr jede Person Ihnen ähnelt. Schreiben Sie Ihre Antwort mit der folgenden Skala: !<INPUT 2>!. | ||
Bitte beantworten Sie die Aussage, auch wenn Sie sich Ihrer Antwort nicht ganz sicher sind. | ||
!<INPUT 3>! | ||
Bitte geben Sie Ihre Punktzahl im folgenden JSON-Format aus: '{"<Anweisungsindex>": "<Punktzahl>"}'. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
bfi_de_t3.txt | ||
|
||
!<INPUT 0>! -- scale range min | ||
!<INPUT 1>! -- scale range max | ||
!<INPUT 2>! -- level description | ||
!<INPUT 3>! -- statements | ||
|
||
<commentblockmarker>###</commentblockmarker> | ||
|
||
Angesichts der folgenden Aussagen von Ihnen: | ||
!<INPUT 3>! | ||
Bitte wählen Sie eine der folgenden Optionen aus, um herauszufinden, wie genau diese Aussage Sie beschreibt. !<INPUT 2>!. | ||
Bitte geben Sie Ihre Punktzahl im folgenden JSON-Format aus: '{"<Anweisungsindex>": "<Punktzahl>"}'. |
Oops, something went wrong.