Skip to content

Commit

Permalink
Replace low-level string manipulation for Unicode.
Browse files Browse the repository at this point in the history
JavaScript strings are not "strings": they are immutable arrays of 16-bit numbers. Much as developers attempting to manipulate "strings" in a language like C need to use the high-level string function mbslen instead of the low-level character array function strlen to handle UTF-8 sequences, developers attempting to manipulate "strings" in JavaScript need to use functions that understand UTF-16 sequences instead of dropping down to low-level JavaScript string functions.

Note: this patch is not "complete", in that only the critical editor-level functionality has been fixed; I have not yet spent the time to fix all of the various plugins or even surrounding features such as search. However, this proof-of-concept handles the core issue I described in the upstream issue ajaxorg#1153, and fixes the problem not just at the level of cursor movements and character updates, but correctly returns multi-unit characters from events and via the ACE API.
  • Loading branch information
saurik committed Dec 24, 2012
1 parent 117d29a commit 2353eef
Show file tree
Hide file tree
Showing 8 changed files with 189 additions and 88 deletions.
2 changes: 1 addition & 1 deletion lib/ace/background_tokenizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ var BackgroundTokenizer = function(tokenizer, editor) {
var state = this.states[row - 1];

if (line.length > MAX_LINE_LENGTH) {
var overflow = {value: line.substr(MAX_LINE_LENGTH), type: "text"};
var overflow = {value: line.substring(MAX_LINE_LENGTH), type: "text"};
line = line.slice(0, MAX_LINE_LENGTH);
}
var data = this.tokenizer.getLineTokens(line, state);
Expand Down
30 changes: 15 additions & 15 deletions lib/ace/document.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ var oop = require("./lib/oop");
var EventEmitter = require("./lib/event_emitter").EventEmitter;
var Range = require("./range").Range;
var Anchor = require("./anchor").Anchor;
var unicode = require("./unicode");

/**
* Contains the text of the document. Document can be attached to several [[EditSession `EditSession`]]s.
Expand Down Expand Up @@ -76,7 +77,7 @@ var Document = function(text) {
**/
this.setValue = function(text) {
var len = this.getLength();
this.remove(new Range(0, 0, len, this.getLine(len-1).length));
this.remove(new Range(0, 0, len, unicode.length(this.getLine(len-1))));
this.insert({row: 0, column:0}, text);
};

Expand Down Expand Up @@ -231,13 +232,12 @@ var Document = function(text) {
**/
this.getTextRange = function(range) {
if (range.start.row == range.end.row) {
return this.$lines[range.start.row].substring(range.start.column,
range.end.column);
return unicode.substring(this.$lines[range.start.row], range.start.column, range.end.column);
}
else {
var lines = this.getLines(range.start.row+1, range.end.row-1);
lines.unshift((this.$lines[range.start.row] || "").substring(range.start.column));
lines.push((this.$lines[range.end.row] || "").substring(0, range.end.column));
lines.unshift(unicode.substring(this.$lines[range.start.row] || "", range.start.column));
lines.push(unicode.substring(this.$lines[range.end.row] || "", 0, range.end.column));
return lines.join(this.getNewLineCharacter());
}
};
Expand All @@ -246,7 +246,7 @@ var Document = function(text) {
var length = this.getLength();
if (position.row >= length) {
position.row = Math.max(0, length - 1);
position.column = this.getLine(length-1).length;
position.column = unicode.length(this.getLine(length-1));
}
return position;
};
Expand Down Expand Up @@ -359,8 +359,8 @@ var Document = function(text) {
position = this.$clipPosition(position);
var line = this.$lines[position.row] || "";

this.$lines[position.row] = line.substring(0, position.column);
this.$lines.splice(position.row + 1, 0, line.substring(position.column, line.length));
this.$lines[position.row] = unicode.substring(line, 0, position.column);
this.$lines.splice(position.row + 1, 0, unicode.substring(line, position.column));

var end = {
row : position.row + 1,
Expand Down Expand Up @@ -393,12 +393,12 @@ var Document = function(text) {

var line = this.$lines[position.row] || "";

this.$lines[position.row] = line.substring(0, position.column) + text
+ line.substring(position.column);
this.$lines[position.row] = unicode.substring(line, 0, position.column) + text
+ unicode.substring(line, position.column);

var end = {
row : position.row,
column : position.column + text.length
column : position.column + unicode.length(text)
};

var delta = {
Expand Down Expand Up @@ -440,7 +440,7 @@ var Document = function(text) {
this.removeLines(firstFullRow, lastFullRow);

if (firstFullRow != firstRow) {
this.removeInLine(firstRow, range.start.column, this.getLine(firstRow).length);
this.removeInLine(firstRow, range.start.column, unicode.length(this.getLine(firstRow)));
this.removeNewLine(range.start.row);
}
}
Expand All @@ -465,8 +465,8 @@ var Document = function(text) {

var range = new Range(row, startColumn, row, endColumn);
var line = this.getLine(row);
var removed = line.substring(startColumn, endColumn);
var newLine = line.substring(0, startColumn) + line.substring(endColumn, line.length);
var removed = unicode.substring(line, startColumn, endColumn);
var newLine = unicode.substring(line, 0, startColumn) + unicode.substring(line, endColumn);
this.$lines.splice(row, 1, newLine);

var delta = {
Expand Down Expand Up @@ -509,7 +509,7 @@ var Document = function(text) {
var firstLine = this.getLine(row);
var secondLine = this.getLine(row+1);

var range = new Range(row, firstLine.length, row+1, 0);
var range = new Range(row, unicode.length(firstLine), row+1, 0);
var line = firstLine + secondLine;

this.$lines.splice(row, 2, line);
Expand Down
83 changes: 56 additions & 27 deletions lib/ace/edit_session.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ var Range = require("./range").Range;
var Document = require("./document").Document;
var BackgroundTokenizer = require("./background_tokenizer").BackgroundTokenizer;
var SearchHighlight = require("./search_highlight").SearchHighlight;
var unicode = require("./unicode");

/**
*
Expand Down Expand Up @@ -360,10 +361,10 @@ var EditSession = function(text, mode) {
var token, c = 0;
if (column == null) {
i = tokens.length - 1;
c = this.getLine(row).length;
c = unicode.length(this.getLine(row));
} else {
for (var i = 0; i < tokens.length; i++) {
c += tokens[i].value.length;
c += unicode.length(tokens[i].value);
if (c >= column)
break;
}
Expand All @@ -372,7 +373,7 @@ var EditSession = function(text, mode) {
if (!token)
return null;
token.index = i;
token.start = c - token.value.length;
token.start = c - unicode.length(token.value);
return token;
};

Expand Down Expand Up @@ -796,14 +797,14 @@ var EditSession = function(text, mode) {

var inToken = false;
if (column > 0)
inToken = !!line.charAt(column - 1).match(this.tokenRe);
inToken = !!unicode.charAt(line, column - 1).match(this.tokenRe);

if (!inToken)
inToken = !!line.charAt(column).match(this.tokenRe);
inToken = !!unicode.charAt(line, column).match(this.tokenRe);

if (inToken)
var re = this.tokenRe;
else if (/^\s+$/.test(line.slice(column-1, column+1)))
else if (/^\s+$/.test(unicode.substring(line, column-1, column+1)))
var re = /\s/;
else
var re = this.nonTokenRe;
Expand All @@ -813,12 +814,12 @@ var EditSession = function(text, mode) {
do {
start--;
}
while (start >= 0 && line.charAt(start).match(re));
while (start >= 0 && unicode.charAt(line, start).match(re));
start++;
}

var end = column;
while (end < line.length && line.charAt(end).match(re)) {
while (end < unicode.length(line) && unicode.charAt(line, end).match(re)) {
end++;
}

Expand All @@ -836,7 +837,7 @@ var EditSession = function(text, mode) {
var wordRange = this.getWordRange(row, column);
var line = this.getLine(wordRange.end.row);

while (line.charAt(wordRange.end.column).match(/[ \t]/)) {
while (unicode.charAt(line, wordRange.end.column).match(/[ \t]/)) {
wordRange.end.column += 1;
}
return wordRange;
Expand Down Expand Up @@ -1380,7 +1381,7 @@ var EditSession = function(text, mode) {
// front of the chosen point of insertion.
if (!fromRange.isMultiLine() && fromRange.start.row == toRow &&
fromRange.end.column < toColumn)
toColumn -= text.length;
toColumn -= unicode.length(text);

if (fromRange.isMultiLine() && fromRange.end.row < toRow) {
var lines = this.doc.$split(text);
Expand Down Expand Up @@ -1432,9 +1433,9 @@ var EditSession = function(text, mode) {
deleteRange.start.row = i;
deleteRange.end.row = i;
for (var j = 0; j < size; ++j)
if (line.charAt(j) != ' ')
if (unicode.charAt(line, j) != ' ')
break;
if (j < size && line.charAt(j) == '\t') {
if (j < size && unicode.charAt(line, j) == '\t') {
deleteRange.start.column = j;
deleteRange.end.column = j + 1;
} else {
Expand Down Expand Up @@ -1507,7 +1508,7 @@ var EditSession = function(text, mode) {
this.$clipColumnToRow = function(row, column) {
if (column < 0)
return 0;
return Math.min(this.doc.getLine(row).length, column);
return Math.min(unicode.length(this.doc.getLine(row)), column);
};


Expand All @@ -1521,9 +1522,9 @@ var EditSession = function(text, mode) {
var len = this.doc.getLength();
if (row >= len) {
row = len - 1;
column = this.doc.getLine(len-1).length;
column = unicode.length(this.doc.getLine(len-1));
} else {
column = Math.min(this.doc.getLine(row).length, column);
column = Math.min(unicode.length(this.doc.getLine(row)), column);
}
}

Expand All @@ -1547,7 +1548,7 @@ var EditSession = function(text, mode) {
var len = this.doc.getLength() - 1;
if (range.end.row > len) {
range.end.row = len;
range.end.column = this.doc.getLine(len).length;
range.end.column = unicode.length(this.doc.getLine(len));
} else {
range.end.column = this.$clipColumnToRow(
range.end.row,
Expand Down Expand Up @@ -1834,13 +1835,13 @@ var EditSession = function(text, mode) {
}
} else {
walkTokens = this.$getDisplayTokens(
lines[row].substring(lastColumn, column),
unicode.substring(lines[row], lastColumn, column),
tokens.length);
}
tokens = tokens.concat(walkTokens);
}.bind(this),
foldLine.end.row,
lines[foldLine.end.row].length + 1
unicode.length(lines[foldLine.end.row]) + 1
);
// Remove spaces/tabs from the back of the token array.
while (tokens.length != 0 && tokens[tokens.length - 1] >= SPACE)
Expand Down Expand Up @@ -1994,6 +1995,20 @@ var EditSession = function(text, mode) {

for (var i = 0; i < str.length; i++) {
var c = str.charCodeAt(i);

var part = c & 0xfc00;
if (part == 0xdc00)
return null;
else if (part != 0xd800);
else if (++i == str.length)
return null;
else {
var next = str.charCodeAt(i);
if ((next & 0xfc00) != 0xdc00)
return null;
c = 0x10000 | (c & 0x03ff) << 10 | next & 0x03ff;
}

// Tab
if (c == 9) {
tabSize = this.getScreenTabSize(arr.length + offset);
Expand Down Expand Up @@ -2038,9 +2053,23 @@ var EditSession = function(text, mode) {
maxScreenColumn = Infinity;
screenColumn = screenColumn || 0;

var c, column;
for (column = 0; column < str.length; column++) {
c = str.charCodeAt(column);
var c, column = 0;
for (var i = 0; i < str.length; i++, column++) {
c = str.charCodeAt(i);

var part = c & 0xfc00;
if (part == 0xdc00)
return null;
else if (part != 0xd800);
else if (++i == str.length)
return null;
else {
var next = str.charCodeAt(i);
if ((next & 0xfc00) != 0xdc00)
return null;
c = 0x10000 | (c & 0x03ff) << 10 | next & 0x03ff;
}

// tab
if (c == 9) {
screenColumn += this.getScreenTabSize(screenColumn);
Expand Down Expand Up @@ -2204,7 +2233,7 @@ var EditSession = function(text, mode) {
// clip at the end of the document
return {
row: maxRow,
column: this.getLine(maxRow).length
column: unicode.length(this.getLine(maxRow))
}
} else {
line = this.getLine(docRow);
Expand All @@ -2217,7 +2246,7 @@ var EditSession = function(text, mode) {
column = splits[screenRow - row];
if(screenRow > row && splits.length) {
docColumn = splits[screenRow - row - 1] || splits[splits.length - 1];
line = line.substring(docColumn);
line = unicode.substring(line, docColumn);
}
}
}
Expand Down Expand Up @@ -2311,19 +2340,19 @@ var EditSession = function(text, mode) {
textLine = this.getFoldDisplayLine(foldLine, docRow, docColumn);
foldStartRow = foldLine.start.row;
} else {
textLine = this.getLine(docRow).substring(0, docColumn);
textLine = unicode.substring(this.getLine(docRow), 0, docColumn);
foldStartRow = docRow;
}
// Clamp textLine if in wrapMode.
if (this.$useWrapMode) {
var wrapRow = this.$wrapData[foldStartRow];
var screenRowOffset = 0;
while (textLine.length >= wrapRow[screenRowOffset]) {
while (unicode.length(textLine) >= wrapRow[screenRowOffset]) {
screenRow ++;
screenRowOffset++;
}
textLine = textLine.substring(
wrapRow[screenRowOffset - 1] || 0, textLine.length
textLine = unicode.substring(textLine,
wrapRow[screenRowOffset - 1] || 0, unicode.length(textLine)
);
}

Expand Down
Loading

0 comments on commit 2353eef

Please sign in to comment.