Skip to content

Commit

Permalink
Tentative fix for multibyte character handling bug
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinboone committed Apr 3, 2017
1 parent 96b82fb commit 589e5aa
Showing 1 changed file with 22 additions and 2 deletions.
24 changes: 22 additions & 2 deletions epub2txt.c
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ void epub2txt_flush_para (const klib_String *para, int width, BOOL notrim)
output_para++;
if (start_para != 0 && output_para < start_para) return;

// While it is be quicker just to dump the para to stdout in
// While it is quicker just to dump the para to stdout in
// unlimited-line-length mode, doing this doesn't get us the
// benefit of trimming whitespace, etc
if (width == 0 && notrim)
Expand Down Expand Up @@ -486,7 +486,7 @@ void epub2txt_flush_para (const klib_String *para, int width, BOOL notrim)
{
char c = s[i];

if (mode == MODE_START && (c == ' '
/*if (mode == MODE_START && (c == ' '
|| (unsigned char) c == (unsigned char)0xC2))
{
if (i < l - 1)
Expand All @@ -498,11 +498,30 @@ void epub2txt_flush_para (const klib_String *para, int width, BOOL notrim)
}
// Absorb leading spaces
}
*/
if ((mode == MODE_START && (c == ' '
|| (unsigned char) c == (unsigned char)0xC2))
&&
(i < l - 1)
&&
((unsigned char)s[i + 1] == (unsigned char)0xA0))
{
i++;
}
// Absorb leading spaces
else if (mode == MODE_START)
{
klib_string_append_byte (word, c);
mode = MODE_WORD;
}
else if ((mode == MODE_SPACE && (c == ' '
|| (unsigned char) c == (unsigned char)0xC2))
&&
((unsigned char)s[i + 1] == (unsigned char)0xA0))
{
i++;
}
/*
else if (mode == MODE_SPACE && (c == ' '
|| (unsigned char) c == (unsigned char)0xC2))
{
Expand All @@ -511,6 +530,7 @@ void epub2txt_flush_para (const klib_String *para, int width, BOOL notrim)
i++;
}
}
*/
else if (mode == MODE_SPACE)
{
mode = MODE_WORD;
Expand Down

0 comments on commit 589e5aa

Please sign in to comment.