Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
DonaldLamNL committed Dec 28, 2023
1 parent ca89a48 commit 80ef914
Show file tree
Hide file tree
Showing 71 changed files with 4,644 additions and 0 deletions.
Binary file added .DS_Store
Binary file not shown.
Binary file removed GPTPersonality.xlsx
Binary file not shown.
Binary file added dataset/.DS_Store
Binary file not shown.
722 changes: 722 additions & 0 deletions dataset/add.ipynb

Large diffs are not rendered by default.

2,613 changes: 2,613 additions & 0 deletions dataset/questionnaires.json

Large diffs are not rendered by default.

138 changes: 138 additions & 0 deletions global_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
"""
Author: LAM Man Ho ([email protected])
"""
import json
import numpy as np
import scipy.stats as stats
from statistics import mean

# Convert number to specified label
def convert_number(label, number):
if label.startswith('n'):
return number
elif label.startswith('a'):
lower_case = True if label.endswith('l') else False
return num_to_alphabet(number, lower_case)
elif label.startswith('r'):
lower_case = True if label.endswith('l') else False
return num_to_roman(number, lower_case)
else:
raise ValueError("Label wrong")

# Convert label back to number
def convert_symbol(label, symbol):
if label.startswith('n'):
return int(symbol)
elif label.startswith('a'):
lower_case = True if label.endswith('l') else False
return int(alphabet_to_num(symbol))
elif label.startswith('r'):
lower_case = True if label.endswith('l') else False
return int(roman_to_num(symbol))

# Convert number to Roman number
def num_to_roman(num, lower_case=False):
val = [1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1]
syb = ["M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", "I"]
roman_num = ''
i = 0
while num > 0:
for _ in range(num // val[i]):
roman_num += syb[i]
num -= val[i]
i += 1
return roman_num.lower() if lower_case else roman_num

# Convert number to alphabet(s)
def num_to_alphabet(num, lower_case=False):
alphabet = ''
while num > 0:
remainder = (num - 1) % 26
alphabet = chr(65 + remainder) + alphabet
num = (num - 1) // 26
return alphabet.lower() if lower_case else alphabet

# Convert Roman number to number
def roman_to_num(roman):
roman = roman.upper()
val = [1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1]
syb = ["M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", "I"]
result = 0
i = 0
while i < len(roman):
for v, s in zip(val, syb):
if roman.startswith(s, i):
result += v
i += len(s)
break
return result

# Convert alphabet(s) to number
def alphabet_to_num(alphabet):
result = 0
for char in alphabet.upper():
result = result * 26 + ord(char) - ord('A') + 1
return result

# Get Prompt Template
def get_prompt(filename, inputs):
with open(filename, 'r') as file:
generated_prompt = file.read().split("<commentblockmarker>###</commentblockmarker>")[1].strip()
for index, item in enumerate(inputs):
key = f"!<INPUT {index}>!"
generated_prompt = generated_prompt.replace(key, str(item))
return generated_prompt

# Get questionnaire
def get_questionnaire(name):
try:
with open('dataset/questionnaires.json') as dataset:
data = json.load(dataset)
try:
questionnaire = data[name]
return questionnaire
except ValueError:
raise ValueError("Questionnaire not found.")
except FileNotFoundError:
raise FileNotFoundError("The 'questionnaires.json' file does not exist.")

# Hypothesis Testing
def hypothesis_testing(sample1, sample2, significant_level=0.001):
mean1, std1, n1 = np.mean(sample1), np.std(sample1), len(sample1)
mean2, std2, n2 = np.mean(sample2), np.std(sample2), len(sample2)

# Add an epsilon to prevent the zero standard deviarion
epsilon = 1e-8
std1 += epsilon
std2 += epsilon

# Perform F-test
if std1 > std2:
f_value = std1 ** 2 / std2 ** 2
df1, df2 = n1 - 1, n2 - 1
else:
f_value = std2 ** 2 / std1 ** 2
df1, df2 = n2 - 1, n1 - 1

p_value = (1 - stats.f.cdf(f_value, df1, df2)) * 2
equal_var = True if p_value > significant_level else False


# Performing T-test
df = n1 + n2 - 2 if equal_var else ((std1**2 / n1 + std2**2 / n2)**2) / ((std1**2 / n1)**2 / (n1 - 1) + (std2**2 / n2)**2 / (n2 - 1))
t_value, p_value = stats.ttest_ind_from_stats(mean1, std1, n1, mean2, std2, n2, equal_var=equal_var)

if p_value > significant_level:
# return f'- ({(mean1 - mean2):.2f})'
return f'$= {(mean1 - mean2):.2f}_{{{p_value:.2f}}}$'
# return f'<font color="F54747">= ({(mean1 - mean2):.2f})</font>'

else:
if t_value > 0:
# return '>'
# return f'> ({(mean1 - mean2):.2f})'
return f'$> {(mean1 - mean2):.2f}_{{{p_value:.2f}}}$'
else:
# return '<'
# return f'< ({(mean1 - mean2):.2f})'
return f'$< {(mean1 - mean2):.2f}_{{{p_value:.2f}}}$'
83 changes: 83 additions & 0 deletions gpt_setting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
)
import openai
import time
import os
import random

from utils import *

openai.api_key = api_key

@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def chat(
model, # gpt-4, gpt-4-0314, gpt-4-32k, gpt-4-32k-0314, gpt-3.5-turbo, gpt-3.5-turbo-0301
messages, # [{"role": "system"/"user"/"assistant", "content": "Hello!", "name": "example"}]
temperature=temperature, # [0, 2]: Lower values -> more focused and deterministic; Higher values -> more random.
n=1, # Chat completion choices to generate for each input message.
max_tokens=1024, # The maximum number of tokens to generate in the chat completion.
delay=delay_time # Seconds to sleep after each request.
):
time.sleep(delay)

response = openai.ChatCompletion.create(
model=model,
messages=messages,
temperature=temperature,
n=n,
max_tokens=max_tokens
)

if n == 1:
return response['choices'][0]['message']['content']
else:
return [i['message']['content'] for i in response['choices']]


@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def completion(
model, # text-davinci-003, text-davinci-002, text-curie-001, text-babbage-001, text-ada-001
prompt, # The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays.
temperature=temperature, # [0, 2]: Lower values -> more focused and deterministic; Higher values -> more random.
n=1, # Completions to generate for each prompt.
max_tokens=1024, # The maximum number of tokens to generate in the chat completion.
delay=delay_time # Seconds to sleep after each request.
):
time.sleep(delay)

response = openai.Completion.create(
model=model,
prompt=prompt,
temperature=temperature,
n=n,
max_tokens=max_tokens
)

if n == 1:
return response['choices'][0]['text']
else:
response = response['choices']
response.sort(key=lambda x: x['index'])
return [i['text'] for i in response['choices']]

def print_prompt(inputs, response):
os.makedirs("records", exist_ok=True)
with open(f"records/records.txt", 'a') as f:
f.write(f"{inputs}\n----\n")
f.write(f"{response}\n====\n")
return

def gpt_request(model, inputs):
if model == 'text-davinci-003':
response = completion(model, inputs).strip()
print_prompt(inputs, response)
return response
elif model in ['gpt-3.5-turbo', 'gpt-4']:
response = chat(model, inputs).strip()
print_prompt(inputs, response)
return response
else:
raise ValueError("The model is not supported or does not exist.")
46 changes: 46 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from server import *

'''
Prompt Template
'''
template = ['t1','t2','t3','t4','t5']

'''
Question Version
'''
version = ['v1','v2','v3','v4','v5']

'''
Language:
En: English, Zh: Simplified Chinese, Ko: Korean, Es: Spanish, Fr: France,
De: Deutsch, It: Italian, Ar: Arabic, Ru: Russian, Ja: Japanese
'''
language = ['Zh', 'Ko', 'Es', 'Fr', 'De', 'It', 'Ar', 'Ru', 'Ja']

'''
Label:
n: Arabic Numeral
al: Lowercase Latin, au: Uppercase Latin
rl: Lowercase Roman, ru: Uppercase Roman
'''
label = ['n', 'al', 'au', 'rl', 'ru']

'''
Order:
f: Ascending, r: Descending
'''
order = ['r', 'f']

'================================================================================================='
questionnaire_name = 'BFI'
name_exp = 'ko'

# Start a server and generate pre-testing cases
bfi_test = Server(questionnaire_name, template, version, language, label, order, name_exp=name_exp)

# Load and continue a test
# bfi_test = load('<filename>', '<new-filename>')

# Run the pre-testing cases
bfi_test.run()
'================================================================================================='
Binary file added prompt_template/.DS_Store
Binary file not shown.
12 changes: 12 additions & 0 deletions prompt_template/ar/bfi_ar_t1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
bfi_ar_t1.txt

!<INPUT 0>! -- scale range min
!<INPUT 1>! -- scale range max
!<INPUT 2>! -- level description
!<INPUT 3>! -- statements

<commentblockmarker>###</commentblockmarker>

يمكنك الرد من !<INPUT 0>! إلى !<INPUT 1>! فقط في العبارات التالية. فيما يلي عدد من الخصائص التي قد تنطبق عليك أو لا تنطبق عليك. يرجى الإشارة إلى مدى موافقتك أو عدم موافقتك على هذا البيان. !<INPUT 2>! . فيما يلي العبارات، يرجى تسجيلها واحدة تلو الأخرى:
!<INPUT 3>!
يرجى إخراج درجاتك بتنسيق JSON التالي: '{"<مؤشر البيان>": "<نتيجة>"}'.
13 changes: 13 additions & 0 deletions prompt_template/ar/bfi_ar_t2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
bfi_ar_t2.txt

!<INPUT 0>! -- scale range min
!<INPUT 1>! -- scale range max
!<INPUT 2>! -- level description
!<INPUT 3>! -- statements

<commentblockmarker>###</commentblockmarker>

الآن سأصف بإيجاز بعض الأشخاص. من فضلك اقرأ كل وصف وأخبرني عن مدى إعجاب كل شخص بك. اكتب إجابتك باستخدام المقياس التالي: !<INPUT 2>!.
يرجى الإجابة على البيان، حتى لو لم تكن متأكدا تماما من إجابتك.
!<INPUT 3>!
يرجى إخراج درجاتك بتنسيق JSON التالي: '{"<مؤشر البيان>": "<نتيجة>"}'.
13 changes: 13 additions & 0 deletions prompt_template/ar/bfi_ar_t3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
bfi_ar_t3.txt

!<INPUT 0>! -- scale range min
!<INPUT 1>! -- scale range max
!<INPUT 2>! -- level description
!<INPUT 3>! -- statements

<commentblockmarker>###</commentblockmarker>

نظرا لتصريحاتك التالية:
!<INPUT 3>!
يرجى الاختيار من بين الخيارات التالية لتحديد مدى دقة هذا البيان في وصفك. !<INPUT 2>!.
يرجى إخراج درجاتك بتنسيق JSON التالي: '{"<مؤشر البيان>": "<نتيجة>"}'.
12 changes: 12 additions & 0 deletions prompt_template/ar/bfi_ar_t4.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
bfi_ar_t4.txt

!<INPUT 0>! -- scale range min
!<INPUT 1>! -- scale range max
!<INPUT 2>! -- level description
!<INPUT 3>! -- statements

<commentblockmarker>###</commentblockmarker>

فيما يلي عدد من الخصائص التي قد تنطبق عليك أو لا تنطبق عليك. يرجى تقييم مستوى موافقتك على مقياس من !<INPUT 0>! إلى !<INPUT 1>!. !<INPUT 2>!. وإليكم العبارات، سجلوها واحدة تلو الأخرى:
!<INPUT 3>!
يرجى إخراج درجاتك بتنسيق JSON التالي: '{"<مؤشر البيان>": "<نتيجة>"}'.
12 changes: 12 additions & 0 deletions prompt_template/ar/bfi_ar_t5.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
bfi_ar_t4.txt

!<INPUT 0>! -- scale range min
!<INPUT 1>! -- scale range max
!<INPUT 2>! -- level description
!<INPUT 3>! -- statements

<commentblockmarker>###</commentblockmarker>

فيما يلي عدد من الخصائص التي قد تنطبق عليك أو لا تنطبق عليك. يرجى تقييم مدى موافقتك على مقياس من !<INPUT 0>! إلى !<INPUT 1>!. !<INPUT 2>!. وإليكم العبارات، سجلوها واحدة تلو الأخرى:
!<INPUT 3>!
يرجى إخراج درجاتك بتنسيق JSON التالي: '{"<مؤشر البيان>": "<نتيجة>"}'.
10 changes: 10 additions & 0 deletions prompt_template/ar/rephrase_ar.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
rephrase_ar.txt

!<INPUT 0>! -- prompt
!<INPUT 1>! -- existed statements

<commentblockmarker>###</commentblockmarker>

الجملة التالية عبارة عن بيان من Big Five Inventory للمشاركين في المقابلات لتقييم أنفسهم. الرجاء إعادة صياغة الجمل التالية: "!<INPUT 0>!".
قم بإخراج الجمل المعاد صياغتها بتنسيق JSON التالي: {"sentence": "<الجملة المعاد صياغتها>"}.
لا تقم بإعادة صياغة الجملة (الجمل) التالية: !<INPUT 1>!.
12 changes: 12 additions & 0 deletions prompt_template/de/bfi_de_t1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
bfi_de_t1.txt

!<INPUT 0>! -- scale range min
!<INPUT 1>! -- scale range max
!<INPUT 2>! -- level description
!<INPUT 3>! -- statements

<commentblockmarker>###</commentblockmarker>

In den folgenden Aussagen können Sie nur eine Antwort von !<INPUT 0>! bis !<INPUT 1>! geben. Hier sind eine Reihe von Merkmalen aufgeführt, die möglicherweise auf Sie zutreffen oder auch nicht. Bitte geben Sie an, inwieweit Sie dieser Aussage zustimmen oder nicht. !<INPUT 2>!. Hier sind die Aussagen, bitte bewerten Sie sie einzeln:
!<INPUT 3>!
Bitte geben Sie Ihre Punktzahl im folgenden JSON-Format aus: '{"<Anweisungsindex>": "<Punktzahl>"}'.
13 changes: 13 additions & 0 deletions prompt_template/de/bfi_de_t2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
bfi_de_t2.txt

!<INPUT 0>! -- scale range min
!<INPUT 1>! -- scale range max
!<INPUT 2>! -- level description
!<INPUT 3>! -- statements

<commentblockmarker>###</commentblockmarker>

Jetzt werde ich einige Leute kurz beschreiben. Bitte lesen Sie jede Beschreibung und sagen Sie mir, wie sehr jede Person Ihnen ähnelt. Schreiben Sie Ihre Antwort mit der folgenden Skala: !<INPUT 2>!.
Bitte beantworten Sie die Aussage, auch wenn Sie sich Ihrer Antwort nicht ganz sicher sind.
!<INPUT 3>!
Bitte geben Sie Ihre Punktzahl im folgenden JSON-Format aus: '{"<Anweisungsindex>": "<Punktzahl>"}'.
13 changes: 13 additions & 0 deletions prompt_template/de/bfi_de_t3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
bfi_de_t3.txt

!<INPUT 0>! -- scale range min
!<INPUT 1>! -- scale range max
!<INPUT 2>! -- level description
!<INPUT 3>! -- statements

<commentblockmarker>###</commentblockmarker>

Angesichts der folgenden Aussagen von Ihnen:
!<INPUT 3>!
Bitte wählen Sie eine der folgenden Optionen aus, um herauszufinden, wie genau diese Aussage Sie beschreibt. !<INPUT 2>!.
Bitte geben Sie Ihre Punktzahl im folgenden JSON-Format aus: '{"<Anweisungsindex>": "<Punktzahl>"}'.
Loading

0 comments on commit 80ef914

Please sign in to comment.