diff --git a/SdlXliff Compare/Sdl.Community.XliffCompare.Core/Comparer/TextComparer/TextComparer.cs b/SdlXliff Compare/Sdl.Community.XliffCompare.Core/Comparer/TextComparer/TextComparer.cs index 766505cbef..942525c2c7 100644 --- a/SdlXliff Compare/Sdl.Community.XliffCompare.Core/Comparer/TextComparer/TextComparer.cs +++ b/SdlXliff Compare/Sdl.Community.XliffCompare.Core/Comparer/TextComparer/TextComparer.cs @@ -329,50 +329,65 @@ internal static WordsCollection Parse(List xSegmentSect else { - while (curPos < xSegmentSection.Content.Length) - { - var prevPos = curPos; - while (curPos < xSegmentSection.Content.Length && - (char.IsControl(xSegmentSection.Content[curPos]) - || char.IsWhiteSpace(xSegmentSection.Content[curPos]))) - { - curPos++; - } - prefix += xSegmentSection.Content.Substring(prevPos, curPos - prevPos); - - if (curPos == xSegmentSection.Content.Length) - { - - if (prefix != string.Empty) - { - words.Add(new Word(string.Empty, prefix, string.Empty)); - } - break; - } - - prevPos = curPos; - while (curPos < xSegmentSection.Content.Length && - !char.IsControl(xSegmentSection.Content[curPos]) && - !char.IsWhiteSpace(xSegmentSection.Content[curPos])) - { - curPos++; - } - var word = xSegmentSection.Content.Substring(prevPos, curPos - prevPos); - - - prevPos = curPos; - while (curPos < xSegmentSection.Content.Length && - (char.IsControl(xSegmentSection.Content[curPos]) || - char.IsWhiteSpace(xSegmentSection.Content[curPos]))) - { - curPos++; - } - suffix = xSegmentSection.Content.Substring(prevPos, curPos - prevPos); - ProcessWord(words, prefix, word, suffix); - prefix = string.Empty; - } - } - } + while (curPos < xSegmentSection.Content.Length) + { + var prevPos = curPos; + while (curPos < xSegmentSection.Content.Length && + xSegmentSection.Content[curPos] != '\xa0' && // stop at nbsp + (char.IsControl(xSegmentSection.Content[curPos]) + || char.IsWhiteSpace(xSegmentSection.Content[curPos]))) + { + curPos++; + } + prefix += xSegmentSection.Content.Substring(prevPos, curPos - prevPos); + + if (curPos == xSegmentSection.Content.Length) + { + + if (prefix != string.Empty) + { + words.Add(new Word(string.Empty, prefix, string.Empty)); + } + break; + } + + prevPos = curPos; + + if (xSegmentSection.Content[curPos] == '\xa0') // make nbsp a word + { + // a word may be a string of non-breaking spaces ... + while (xSegmentSection.Content[curPos] == '\xa0') + { + curPos++; + } + } + else + { + // ... or a string of non-whitespace/non-control + while (curPos < xSegmentSection.Content.Length && + !char.IsControl(xSegmentSection.Content[curPos]) && + !char.IsWhiteSpace(xSegmentSection.Content[curPos])) + { + curPos++; + } + } + var word = xSegmentSection.Content.Substring(prevPos, curPos - prevPos); + + + prevPos = curPos; + while (curPos < xSegmentSection.Content.Length && + xSegmentSection.Content[curPos] != '\xa0' && // stop at nbsp + (char.IsControl(xSegmentSection.Content[curPos]) || + char.IsWhiteSpace(xSegmentSection.Content[curPos]))) + { + curPos++; + } + suffix = xSegmentSection.Content.Substring(prevPos, curPos - prevPos); + ProcessWord(words, prefix, word, suffix); + prefix = string.Empty; + } + } + } return words; } diff --git a/StudioViews/StudioViews/Services/ProjectFileService.cs b/StudioViews/StudioViews/Services/ProjectFileService.cs index 839db32ab5..0caf9f1d13 100644 --- a/StudioViews/StudioViews/Services/ProjectFileService.cs +++ b/StudioViews/StudioViews/Services/ProjectFileService.cs @@ -242,7 +242,16 @@ public List GetProjectFiles(string filePath) try { - var xml = XDocument.Load(filePath, LoadOptions.PreserveWhitespace); + //var xml = XDocument.Load(filePath, LoadOptions.PreserveWhitespace); + XDocument xml = null; + // avoid System.Xml.XmlException Message: '?', hexadecimal value 0x1C, 0x1E, ... is an invalid character. + XmlReaderSettings xmlReaderSettings = new XmlReaderSettings { CheckCharacters = false }; + using (XmlReader xmlReader = XmlReader.Create(filePath, xmlReaderSettings)) + { + xmlReader.MoveToContent(); + xml = XDocument.Load(xmlReader, LoadOptions.PreserveWhitespace); + } + var xliff = xml.Root; if (xliff != null && string.Compare(xliff.Name.LocalName, "xliff", StringComparison.InvariantCultureIgnoreCase) == 0) diff --git a/StudioViews/StudioViews/ViewModel/StudioViewsEditorViewModel.cs b/StudioViews/StudioViews/ViewModel/StudioViewsEditorViewModel.cs index b70f70ed1f..3b3e73c426 100644 --- a/StudioViews/StudioViews/ViewModel/StudioViewsEditorViewModel.cs +++ b/StudioViews/StudioViews/ViewModel/StudioViewsEditorViewModel.cs @@ -835,8 +835,9 @@ private ExportResult ExportFiles(IReadOnlyCollection projectFiles, InputFiles = new List(projectFiles.Select(a => a.LocalFilePath)) }; - var sourceLanguage = projectFiles.FirstOrDefault()?.SourceFile.Language.CultureInfo; - var targetLanguage = projectFiles.FirstOrDefault()?.Language.CultureInfo; + // UNNECESSARY AND POSSIBLY A SOURCE FOR ERROR: SourceFile might be null in a WorldServer project + //var sourceLanguage = projectFiles.FirstOrDefault()?.SourceFile.Language.CultureInfo; + //var targetLanguage = projectFiles.FirstOrDefault()?.Language.CultureInfo; foreach (var documentFile in projectFiles) {