-
Notifications
You must be signed in to change notification settings - Fork 1
/
data-prep.html
359 lines (304 loc) · 15 KB
/
data-prep.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Text Windows Layout with Interactive Features</title>
<style>
body {
display: grid;
height: 90vh;
margin: 0;
grid-template-columns: 1fr 2fr 1fr;
grid-template-rows: 1fr 1fr 1fr 1fr;
gap: 10px;
padding: 10px;
background-color:#969696;
}
.text-window {
border: 2px solid #000;
padding: 10px;
overflow: auto;
}
.left, .right {
grid-column: 1;
grid-row: 1 / span 4;
}
.upper-middle {
grid-column: 2;
grid-row: 1;
}
.lower-middle {
grid-column: 2;
grid-row: 2;
}
.right {
grid-column: 3;
grid-row: 1 / span 4;
}
.counter, .buttons {
position: relative;
top: 15%;
grid-column: 2;
grid-row 3;
display: flex;
height: 25%;
justify-content: center;
align-items: center;
}
textarea {
resize: none;
background-color: #d4d4d4;
font-size: 18px;
}
.info-box {
display: flex;
flex-direction: column;
justify-content: center; /* Center the content vertically */
align-items: center; /* Center the content horizontally */
padding: 10px; /* Add some padding */
background-color: #f0f0f0; /* Light background for the info box */
border-radius: 10px; /* Optional: adds rounded corners */
box-shadow: 0 2px 4px rgba(0,0,0,0.1); /* Optional: adds a slight shadow for depth */
}
</style>
</head>
<body>
<input type="file" id="fileInput" style="display: none;" accept=".txt" onchange="loadFile()">
<textarea class="text-window left"></textarea>
<textarea class="text-window upper-middle">Single newlines are converted to \n. Double newlines, or "----" separate your input and output.</textarea>
<textarea class="text-window lower-middle"></textarea>
<textarea class="text-window right"></textarea>
<div class="counter" style="grid-column: 2; grid-row: 3; text-align: center;">
<button onclick="updateCounter('conversation', -1)">-</button>
<input type="number" id="conversationNumber" value="1" oninput="formatText()">
<button onclick="updateCounter('conversation', +1)">+</button>
Conversation Number
<button onclick="updateCounter('message', -1)">-</button>
<input type="number" id="messageNumber" value="1" oninput="formatText()">
<button onclick="updateCounter('message', +1)">+</button>
Message Number
</div>
<div class="buttons" style="grid-column: 2; grid-row: 3 / span 2;">
<button id="loadButton" onclick="document.getElementById('fileInput').click();" style="height:50px">Load</button>
<button id="selectTextButton" onclick="selectTextUpToDoubleNewline()" style="height:50px">Select Text</button>
<button id="transferButton" onclick="transferText()" style="height:50px">Transfer</button>
<button id="SelTra" onclick="selectTextUpToDoubleNewline(), transferText()" style="height:50px">Select and Transfer</button>
<button id="approveEntryButton" onclick="approveEntry(), saveState()" style="height:50px">Approve Entry</button>
<button id="SelTraApp" onclick="selectTextUpToDoubleNewline(), transferText(), approveEntry()" style="height:50px">Select, Transfer, Approve</button>
<button onclick="resumeState()" style="height:50px">Resume State</button>
<button id="saveButton" onclick="saveText(), saveState()" style="height:50px">Save</button>
</div>
<div class="info-box" style="grid-column: 2; grid-row: 4; text-align: center;">
<p style="margin-bottom: 5px; margin-top: 0px;"><b>Program Info:</b></p>
<p style="margin-bottom: 5px; margin-top: 5px;">This program is a simple conversational dataset prep tool. You can load a text file, the contents of which will appear in the left window. You can manually highlight the entries you want to edit, or use the "select text" button to select up to the second double newline. Pressing transfer will cut and paste the selected text into the upper middle window. The program will automatically cut the text in the upper middle window by finding either the first instance of a double newline, or the string "----", and put it into a conversational JSON format, displayed in the lower middle text box. You can then use the "Approve Entry" button to send the JSON sting to the right hand window. It will append each approved entry to the list that is already there and add 1 to the message number. Changing the conversation number will reset the message number to 1. It will try to replace the " with \" automatically to make it JSON compatible.</p>
<p style="margin-bottom: 5px; margin-top: 5px;">This is the JSON template used:</p>
<p style="margin-bottom: 5px; margin-top: 5px;">{
"conversation,message,input,output": "Conversation #: %conversation%\nExchange #: %message%\nUSER: %input%\nASSISTANT: %output%"
}</p>
</div>
<script>
// Pretty sure I used this to load the file?
function loadFile() {
const fileInput = document.getElementById('fileInput');
const file = fileInput.files[0];
if (file) {
const reader = new FileReader();
reader.onload = function(e) {
const content = e.target.result; // No need to replace \n with <br> for <textarea>
document.querySelector('.left').value = content; // Use value for <textarea>
};
reader.readAsText(file);
}
}
// Take the highlighted text from the left, copy and paste it into the upper middle
function transferText() {
const leftTextArea = document.querySelector('.left');
const upperMiddleTextArea = document.querySelector('.upper-middle');
// Get the selected text from the left textarea
const start = leftTextArea.selectionStart;
const end = leftTextArea.selectionEnd;
let selectedText = leftTextArea.value.substring(start, end);
// Remove trailing newlines from the selected text
selectedText = selectedText.replace(/\n+$/, '');
// Append the selected text to the upper middle textarea
upperMiddleTextArea.value = selectedText;
// Remove the selected text from the left textarea
leftTextArea.value = leftTextArea.value.substring(0, start) + leftTextArea.value.substring(end);
formatText();
cleanLeftTextArea();
}
// Update the counters, no, not the one in your kitchen
function updateCounter(type, delta) {
const counter = document.getElementById(`${type}Number`);
let value = parseInt(counter.value, 10) + delta;
value = Math.max(value, 1); // Prevent counter from going below 1
counter.value = value;
if (type === 'conversation') {
document.getElementById('messageNumber').value = 1; // Reset message number to 1
}
formatText(); // Call formatText to update the output based on new counter values
}
// Function to monitor changes in the upper middle window and format text
let observer = new MutationObserver(mutations => {
formatText();
});
observer.observe(document.querySelector('.upper-middle'), { childList: true, subtree: true });
// Take the Upper Middle textarea, chop it, then insert it into a predefined JSON format
function formatText() {
const upperMiddleTextarea = document.querySelector('.upper-middle');
const textContent = upperMiddleTextarea.value; // Direct plaintext from textarea
const conversationNumber = document.getElementById('conversationNumber').value;
const messageNumber = document.getElementById('messageNumber').value;
// Process the textContent to handle newline characters
let parts = textContent.split(/\n\n|----/); // Split using double line breaks or "----"
// Replace single \n with \\n in parts to escape newlines in JSON format
// This operation is done after splitting to not interfere with the separator
let inputText = parts.length > 0 ? parts[0].replace(/\n/g, "\\n") : '';
let outputText = parts.length > 1 ? parts[1].replace(/\n/g, "\\n") : '';
// Escape double quotes for valid JSON string
inputText = inputText.replace(/"/g, '\"');
outputText = outputText.replace(/"/g, '\"');
// Construct the JSON string with formatted input and output
let formattedText = `{"conversation":"${conversationNumber}","message":"${messageNumber}","input":"${inputText}","output":"${outputText}"}`;
// Display the formatted text
document.querySelector('.lower-middle').textContent = formattedText;
}
// Add keyup event listener to update text whenever a key is pressed
document.addEventListener('keyup', function(event) {
formatText();
});
// I think this is used to save?
function saveText() {
const text = document.querySelector('.right').value;
const blob = new Blob([text], { type: 'text/plain' });
// Ask the user for a filename
let filename = prompt("Please enter a filename for the saved text:", "savedText.txt");
if (filename) { // Proceed if the user didn't cancel the prompt
const anchor = document.createElement('a');
anchor.download = filename; // Use the user-provided filename
anchor.href = window.URL.createObjectURL(blob);
anchor.style.display = "none";
document.body.appendChild(anchor);
anchor.click();
document.body.removeChild(anchor);
}
}
// Take the formatted text from the middle lower textarea and transfer it over to the right hand window
function approveEntry() {
const lowerMiddleTextArea = document.querySelector('.lower-middle');
const upperMiddleTextArea = document.querySelector('.upper-middle'); // Reference to upper middle textarea
const rightTextArea = document.querySelector('.right');
try {
// Attempt to parse the JSON content of the lowerMiddleTextArea
const entry = JSON.parse(lowerMiddleTextArea.textContent);
// Check if both input and output values are present and not empty
if (entry.input && entry.input.trim() && entry.output && entry.output.trim()) {
// Append with comma and newline for JSON formatting
rightTextArea.value += (rightTextArea.value ? ',\n' : '') + JSON.stringify(entry);
// Clear the text in the upper middle textarea window
upperMiddleTextArea.value = '';
// Clear the lower middle textarea as well
lowerMiddleTextArea.textContent = '';
// Increment message number
const messageCounter = document.getElementById('messageNumber');
messageCounter.value = parseInt(messageCounter.value) + 1;
} else {
// If either input or output is empty, alert the user or handle accordingly
console.log("Both 'input' and 'output' must have non-empty values.");
}
} catch (error) {
// If parsing fails, alert the user or handle the error accordingly
console.error("Error parsing JSON from lower middle text area:", error);
}
}
// Clean out useless newlines from the begining of the loaded text file
function cleanLeftTextArea() {
const leftTextArea = document.querySelector('.left');
// While the first character is a newline, remove it
while (leftTextArea.value.startsWith('\n')) {
leftTextArea.value = leftTextArea.value.substring(1);
}
}
// Search for, and select text up to the second double newline
function selectTextUpToDoubleNewline() {
const leftTextArea = document.querySelector('.left');
const text = leftTextArea.value;
// Find the index of the first double newline
const doubleNewlineIndex = text.indexOf('\n\n', text.indexOf('\n\n') + 2);
if (doubleNewlineIndex !== -1) {
// If a double newline is found, select text up to this point
leftTextArea.setSelectionRange(0, doubleNewlineIndex + 1);
} else {
// If no double newline is found, select the entire text
leftTextArea.setSelectionRange(0, text.length);
}
// Focus the textarea to make the selection visible to the user
leftTextArea.focus();
}
// Reset the message number to 1 when the user sets the conversation number
function onConversationNumberChange() {
document.getElementById('messageNumber').value = 1; // Set message number to 1
formatText(); // Call formatText function
}
// Funtion to save the current progress of the user to localstorage
function saveState() {
const leftText = document.querySelector('.text-window.left').value;
const rightText = document.querySelector('.text-window.right').value;
// Save the text from both textareas to localStorage
localStorage.setItem('rawData', leftText);
localStorage.setItem('currentProgress', rightText);
// Notify the user
// alert('State saved!');
}
// Function to check for and load appropriate files for resuming the state.
function resumeState() {
if (localStorage.getItem('rawData') && localStorage.getItem('currentProgress')) {
const rawData = localStorage.getItem('rawData');
let currentProgress = localStorage.getItem('currentProgress').trim(); // Trim whitespace for accurate checks
document.querySelector('.text-window.left').value = rawData;
// Ensure the string starts with [
if (!currentProgress.startsWith('[')) {
currentProgress = '[' + currentProgress;
}
// Ensure the string ends properly for parsing
if (currentProgress.endsWith(',')) {
currentProgress = currentProgress.substring(0, currentProgress.length - 1) + ']'; // Replace final comma with ]
} else if (currentProgress.endsWith('}')) {
currentProgress += ']'; // Append ] if ending with }
}
// Remove the trailing ] for editing purposes
let editableProgress = currentProgress;
if (editableProgress.endsWith(']')) {
editableProgress = editableProgress.substring(0, editableProgress.length - 1);
}
// Set the modified string without the trailing ] for editing
document.querySelector('.text-window.right').value = editableProgress;
try {
const entries = JSON.parse(currentProgress); // Parse the original formatted string
let highestConversation = 0;
let highestMessageInConversation = 0;
entries.forEach(entry => {
const conversation = parseInt(entry.conversation, 10);
const message = parseInt(entry.message, 10);
if (conversation > highestConversation) {
highestConversation = conversation;
highestMessageInConversation = message;
} else if (conversation === highestConversation && message > highestMessageInConversation) {
highestMessageInConversation = message;
}
});
document.getElementById('conversationNumber').value = highestConversation;
document.getElementById('messageNumber').value = highestMessageInConversation + 1;
alert('State resumed!');
} catch (error) {
console.error('Error parsing JSON from current progress:', error);
alert('Error resuming state. Please check the format of your saved data.');
}
} else {
alert('No saved state found.');
}
}
</script>
</body>
</html>