Skip to content
This repository has been archived by the owner on Feb 2, 2021. It is now read-only.

Commit

Permalink
Merge pull request #664 from facebook/nekto/utf8_handling
Browse files Browse the repository at this point in the history
Better handling of invalid UTF8 strings.
  • Loading branch information
ExtremeMan committed Feb 9, 2016
2 parents 27caa7d + c9d7dc2 commit 9cdb3bc
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 8 deletions.
1 change: 1 addition & 0 deletions Common/TaskUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ typedef void (^FdOutputLineFeedBlock)(int fd, NSString *);
typedef void (^BlockToRunWhileReading)(void);

NSString *StripAnsi(NSString *inputString);
NSString *StringFromDispatchDataWithBrokenUTF8Encoding(const char *dataPtr, size_t dataSz);

/**
* Returns array of NSString's with contents read from fildes.
Expand Down
34 changes: 26 additions & 8 deletions Common/TaskUtil.m
Original file line number Diff line number Diff line change
Expand Up @@ -68,23 +68,41 @@
return outputString;
}

static NSString *StringFromDispatchDataWithBrokenUTF8Encoding(const char *dataPtr, size_t dataSz)
NSString *StringFromDispatchDataWithBrokenUTF8Encoding(const char *dataPtr, size_t dataSz)
{
int one = 1;
iconv_t cd = iconv_open("UTF-8", "UTF-8");
iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &one);
size_t inbytesleft = dataSz;
size_t outbytesleft = dataSz;
char *inbuf = (char *)dataPtr;
char *outbuf = malloc(sizeof(char) * dataSz);
char *outptr = outbuf;
NSString *string = nil;
if (iconv(cd, &inbuf, &inbytesleft, &outptr, &outbytesleft) != (size_t)-1) {
string = [[NSString alloc] initWithBytes:outbuf length:dataSz - outbytesleft encoding:NSUTF8StringEncoding];
NSMutableString *outputString = [NSMutableString string];
long bytesToProcess = dataSz;
while (bytesToProcess > 0) {
NSString *string = nil;
size_t inbytesleft = bytesToProcess;
size_t outbytesleft = bytesToProcess;
char *outptr = outbuf;
size_t iconvResult = iconv(cd, &inbuf, &inbytesleft, &outptr, &outbytesleft);
size_t outbytesLength = bytesToProcess - outbytesleft;
if (outbytesLength > 0) {
string = [[NSString alloc] initWithBytesNoCopy:outbuf length:outbytesLength encoding:NSUTF8StringEncoding freeWhenDone:NO];
[outputString appendString:string];
}
if (iconvResult != (size_t)-1) {
inbuf += (bytesToProcess - inbytesleft);
} else if (errno == EINVAL) {
// skip first byte and then all next 10xxxxxx bytes (see UTF-8 description for more details)
do {
inbuf++;
inbytesleft--;
} while (((*inbuf) & 0xC0) == 0x80 && inbytesleft > 0);
[outputString appendString:@"\uFFFD"];
}
bytesToProcess = inbytesleft;
}
free(outbuf);
iconv_close(cd);
return string;
return outputString;
}

static NSArray *LinesFromDispatchData(dispatch_data_t data, BOOL omitNewlineCharacters, BOOL forceUntilTheEnd, size_t *convertedSize)
Expand Down
12 changes: 12 additions & 0 deletions xctool/xctool-tests/TaskUtilTests.m
Original file line number Diff line number Diff line change
Expand Up @@ -80,4 +80,16 @@ - (void)testLaunchTaskAndFeedOutputLinesToBlockMultibyteUtf8
}
}

- (void)testConversionToUT8OfBrokenUTF8SequenceOfBytes
{
NSData *data = [NSData dataWithContentsOfFile:TEST_DATA @"BrokenUTF8EncodingInFile.txt"];
NSString *string = StringFromDispatchDataWithBrokenUTF8Encoding(data.bytes, data.length);
NSString *fixedString = [NSString stringWithContentsOfFile:TEST_DATA @"BrokenUTF8EncodingInFile-FIXED.txt" encoding:NSUTF8StringEncoding error:nil];
XCTAssertEqualObjects(string, fixedString);

NSString *regularString = @"qwertyuiopasdfghjk';123^&*()_<>?";
NSData *regularStringData = [regularString dataUsingEncoding:NSUTF8StringEncoding];
XCTAssertEqualObjects(StringFromDispatchDataWithBrokenUTF8Encoding(regularStringData.bytes, regularStringData.length), regularString);
}

@end
Binary file not shown.
Binary file not shown.

0 comments on commit 9cdb3bc

Please sign in to comment.