-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
176 lines (146 loc) · 7.82 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
from json import JSONDecodeError
import openai
from fastapi import FastAPI, Request
from fastapi.templating import Jinja2Templates
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from customValidators import validator
from fastapi import HTTPException
from openai_api import client
import json
import tldextract
from crawler import crawl, TextToCSVClass, scrape_html_tags
import requests
app = FastAPI()
app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="templates")
model_response_example = {
"improvements": [
{
"count": 10,
"description": "What should be improved",
"explanation": "How to improve it"
}
],
"semantics": [
{
"error": "Describe semantic error",
"suggestion": "Provide suggestion for error"
}
],
"persona": [
{
"error": "Describe found error",
"suggestion": "Provide suggestion which should be implemented"
}
]
}
@app.get("/", response_class=HTMLResponse)
async def root(request: Request):
context_data = {"request": request, "heading": "Karol - Usability Evaluator"}
return templates.TemplateResponse("index.html", {"request": request, **context_data})
@app.post("/process_form")
async def process_form(request: Request):
form_data = await request.form()
url = form_data.get("url_field", "No value provided")
option = form_data.get("user-selection-field", "middle-aged-male-generic")
persona = {}
with open("personas/personas.json", 'r') as json_file:
data = json.load(json_file)
for dictionary in data:
if dictionary.get('persona_id') == option:
persona = dictionary
if not validator.is_url(url):
raise HTTPException(400, "Error format for provided url")
scraped_html = scrape_html_tags(url)
if not validator.is_url(url):
raise HTTPException(500, "Unable to scrape and parse html")
descriptions = validator.validate(url)
domain = parse_domain(url)
css_improvements = {domain: []}
try:
crawl(url, domain=domain)
TextToCSVClass(domain).to_csv()
# Make a GET request to the Express server
response = requests.get(f'http://localhost:3000/checktags?domain={domain}')
# Check if the request was successful (status code 200)
if response.status_code == 200:
css_improvements = response.json()
else:
raise HTTPException(500)
except requests.RequestException as e:
print(e)
raise HTTPException(500)
try:
completion = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system",
"content": "You are skilled in explaining about website usability and you will recieve data resembling python list, where each element is tuple containing count, description and explanation values. I need you to read the description value and generate according explanation value which will be resulting explenation. Return provided data in JSON (with keys `improvements` and `semantics`) as array under the key `improvements`, where each element will be object with count, description and explenation"},
{"role": "user", "content": f"data list: {str(descriptions)}"},
{"role": "assistant",
"content": "Now you will receive prettified html structure and you will analyse the structure from the semantics point. Found errors and suggestions will be added to previous JSON result under the key `semantics` in form of object with keys `error` and `suggestion`"},
{"role": "user", "content": f"html structure:\n\n {str(scraped_html)}"},
{"role": "assistant",
"content": "Now you will receive persona specification as python dictionary with `age_range` (age range for persona), `gender` (gender of person), `occupations` (possible occupations for persona), `education_level` (possible achieved education), `technical_skill` (persona skill level when dealing with web content),`familiar_with_product` (if the persona used the product previously) characteristics. Use this characteristics (values of theses characteristics) and analysis for further analysis of the webpage and conduct if the webpage is suitable for this type of person. Add this analysis under the `persona` key in to the JSON result from previous steps, specify found errors as objects with `error` and `suggestion` keys. Return result as JSON."},
{"role": "user", "content": f"persona:\n\n {str(persona)}"},
{"role": "assistant",
"content": "Now you will conduct resulting overall score for whole analysis on scale from 1 to 10 (0.2 step) based on number of found errors, type of errors, where persona errors have biggest weight, semantics errors second biggest weight and code improvement errors the smallest weight. Make the scoring optimistic, if overall number of errors is less then 50 then the score must be bigger than 8. Specify this overall score as additional value for `score` key in resulting JSON. Return response as JSON."},
{"role": "user", "content": f"Generate final response as JSON"},
],
temperature=0,
top_p=0.7
)
except openai.BadRequestError:
raise HTTPException(500, "Too many tokens")
if completion.choices[0].finish_reason != 'stop':
raise HTTPException(500, "Too long input for GPT")
try:
message_content = completion.choices[0].message.content
if message_content.startswith("```json"):
message_content = message_content[len('```json'):]
if message_content.endswith("```"):
message_content = message_content.rstrip("```")
print(message_content)
resp = json.loads(message_content)
print(resp)
except JSONDecodeError:
raise HTTPException(500, "Unable to parse GPT response")
for dictionary in resp['improvements']:
for key in ['count', 'description', 'explanation']:
if key not in dictionary:
raise HTTPException(500, "GPT response was returned in inappropriate format")
for dictionary in resp['semantics']:
for key in ['error', 'suggestion']:
if key not in dictionary:
raise HTTPException(500, "GPT response was returned in inappropriate format")
for dictionary in resp['persona']:
for key in ['error', 'suggestion']:
if key not in dictionary:
raise HTTPException(500, "GPT response was returned in inappropriate format")
if 'score' not in resp:
raise HTTPException(500, "GPT response was returned in inappropriate format")
code_improvements = [(x['count'], x['description'], x['explanation']) for x in resp['improvements']]
semantic_suggestions = [(x['error'], x['suggestion']) for x in resp['semantics']]
persona_suggestions = [(x['error'], x['suggestion']) for x in resp['persona']]
print(resp['semantics'])
print(resp['score'])
return {"code-improvements": code_improvements,
"domain": domain,
"css-tags-improvements": css_improvements,
"semantic-suggestions": semantic_suggestions,
"persona_suggestions": persona_suggestions,
"score": resp['score']}
# UTILITIES
def parse_domain(url):
extracted = tldextract.extract(url)
domain = extracted.domain
suffix = extracted.suffix
to_return = f"{domain}.{suffix}".rstrip('/')
if to_return.startswith('https://'):
to_return = to_return[len('https://'):]
elif to_return.startswith('http://'):
to_return = to_return[len('http://'):]
if to_return.startswith('www.'):
to_return = to_return[len('www.'):]
return to_return