diff --git a/regression_testing/cases/github-cases/case-1008.conf b/regression_testing/cases/github-cases/case-1008.conf new file mode 100755 index 00000000..9e59ba03 --- /dev/null +++ b/regression_testing/cases/github-cases/case-1008.conf @@ -0,0 +1 @@ +force-output: yes diff --git a/regression_testing/cases/github-cases/case-1008@1.html b/regression_testing/cases/github-cases/case-1008@1.html new file mode 100644 index 00000000..5c38c196 --- /dev/null +++ b/regression_testing/cases/github-cases/case-1008@1.html @@ -0,0 +1 @@ +
+
+
+
+
+
+
+
diff --git a/regression_testing/cases/github-expects/case-1008.txt b/regression_testing/cases/github-expects/case-1008.txt
new file mode 100644
index 00000000..0d7ddfd9
--- /dev/null
+++ b/regression_testing/cases/github-expects/case-1008.txt
@@ -0,0 +1,26 @@
+line 1 column 1 - Warning: 
 missing '>' for end of tag
+line 1 column 1 - Warning: missing  declaration
+line 1 column 1 - Warning: inserting implicit 
+line 1 column 6 - Warning: missing 
+line 1 column 1 - Warning: missing 
+line 1 column 1 - Warning: inserting missing 'title' element +line 1 column 6 - Warning: trimming empty +line 1 column 1 - Warning: trimming empty
+Info: Document content looks like HTML5
+Tidy found 8 warnings and 0 errors!
+
+One or more empty elements were present in the source document but
+dropped on output. If these elements are necessary or you don't want
+this behavior, then consider setting the option "drop-empty-elements"
+to no.
+
+About HTML Tidy: https://github.com/htacg/tidy-html5
+Bug reports and comments: https://github.com/htacg/tidy-html5/issues
+Official mailing list: https://lists.w3.org/Archives/Public/public-htacg/
+Latest HTML specification: https://html.spec.whatwg.org/multipage/
+Validate your HTML documents: https://validator.w3.org/nu/
+Lobby your company to join the W3C: https://www.w3.org/Consortium
+
+Do you speak a language other than English, or a different variant of 
+English? Consider helping us to localize HTML Tidy. For details please see 
+https://github.com/htacg/tidy-html5/blob/master/README/LOCALIZE.md
diff --git a/src/clean.c b/src/clean.c
index e0cd3baa..48e3a527 100644
--- a/src/clean.c
+++ b/src/clean.c
@@ -1824,13 +1824,23 @@ void TY_(NormalizeSpaces)(Lexer *lexer, Node *node)
                 c = (byte) lexer->lexbuf[i];
 
                 /* look for UTF-8 multibyte character */
+                int bytes = 0;
                 if ( c > 0x7F )
-                    i += TY_(GetUTF8)( lexer->lexbuf + i, &c );
+                    bytes = TY_(GetUTF8)( lexer->lexbuf + i, &c );
 
                 if ( c == 160 )
                     c = ' ';
 
-                p = TY_(PutUTF8)(p, c);
+                /* don't copy replacement char on invalid UTF-8, as it might */
+                /* be larger than original char and overflow the buffer */
+                if(bytes > 0) {
+                    p = TY_(PutUTF8)(p, c);
+                } else {
+                    *p = lexer->lexbuf[i];
+                    p++;
+                }
+
+                i += bytes;
             }
             node->end = p - lexer->lexbuf;
         }
diff --git a/src/mappedio.c b/src/mappedio.c
index c2c403d9..c29d354a 100644
--- a/src/mappedio.c
+++ b/src/mappedio.c
@@ -156,7 +156,7 @@ static int TIDY_CALL mapped_getByte( void *sourceData )
 static Bool TIDY_CALL mapped_eof( void *sourceData )
 {
     MappedFileSource *data = sourceData;
-    return ( data->pos >= data->size );
+    return ( data->pos + (data->iter - data->view) >= data->size );
 }
 
 static void TIDY_CALL mapped_ungetByte( void *sourceData, byte ARG_UNUSED(bt) )
diff --git a/src/parser.c b/src/parser.c
index fd066113..3b77753d 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -4458,9 +4458,10 @@ Node* TY_(ParsePre)( TidyDocImpl* doc, Node *pre, GetTokenMode ARG_UNUSED(mode)
             DEBUG_LOG_EXIT;
             return NULL;
         }
+
+        TY_(InlineDup)( doc, NULL ); /* tell lexer to insert inlines if needed */
     }
 
-    TY_(InlineDup)( doc, NULL ); /* tell lexer to insert inlines if needed */
 
     while ( state != STATE_COMPLETE )
     {
diff --git a/src/tmbstr.c b/src/tmbstr.c
index e3526987..447c6dd2 100644
--- a/src/tmbstr.c
+++ b/src/tmbstr.c
@@ -255,7 +255,7 @@ void TY_(strrep)(tmbstr buffer, ctmbstr str, ctmbstr rep)
         if(p)
         {
             char buf[1024];
-            memset(buf,'\0',strlen(buf));
+            memset(buf,'\0',sizeof(buf));
 
             if(buffer == p)
             {