From ec738a350981549b1d8145a5bdb622716b84360c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Sousa?= Date: Tue, 13 Feb 2018 22:07:59 -0300 Subject: [PATCH 1/3] Enhance OFX Parser Fix #34 --- lib/OfxParser/Parser.php | 52 +++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/lib/OfxParser/Parser.php b/lib/OfxParser/Parser.php index 081c82a..ed0dc89 100644 --- a/lib/OfxParser/Parser.php +++ b/lib/OfxParser/Parser.php @@ -39,10 +39,8 @@ public function loadFromFile($ofxFile) public function loadFromString($ofxContent) { $ofxContent = utf8_encode($ofxContent); - $ofxContent = $this->conditionallyAddNewlines($ofxContent); - $sgmlStart = stripos($ofxContent, ''); - $ofxSgml = trim(substr($ofxContent, $sgmlStart)); + $ofxSgml = trim($this->fixNewlines(substr($ofxContent, $sgmlStart))); $ofxXml = $this->convertSgmlToXml($ofxSgml); @@ -52,18 +50,17 @@ public function loadFromString($ofxContent) } /** - * Detect if the OFX file is on one line. If it is, add newlines automatically. + * Prepare OFX file contents. * * @param string $ofxContent * @return string */ - private function conditionallyAddNewlines($ofxContent) + private function fixNewlines($ofxContent) { - if (preg_match('/.*<\/OFX>/', $ofxContent) === 1) { - return str_replace('<', "\n<", $ofxContent); // add line breaks to allow XML to parse - } - - return $ofxContent; + // clear all new line characters first + $ofxContent = str_replace(["\r", "\n"], '', $ofxContent); + // add line breaks before opening tags only, to allow XML to parse + return preg_replace('/<[^\/!]/', "\n" . '$0', $ofxContent); } /** @@ -94,17 +91,22 @@ private function xmlLoadString($xmlString) */ private function closeUnclosedXmlTags($line) { - // Matches: blah - // Does not match: - // Does not match: blah - if (preg_match( - "/<([A-Za-z0-9.]+)>([\wà-úÀ-Ú0-9\.\-\_\+\, ;:\[\]\'\&\/\\\*\(\)\+\{\|\}\!\£\$\?=@€£#%±§~`]+)$/", - trim($line), - $matches - )) { - return "<{$matches[1]}>{$matches[2]}"; + $line = trim($line); + $tag = ltrim(substr($line, 1, strpos($line, '>') - 1), '/'); + + // Line is "" or "" + if ($line == '<' . $tag . '>' || $line == '') { + return $line; } - return $line; + + // Tag is properly closed + if (strpos($line, '') !== false) { + return $line; + } + + $lines = explode("\n", str_replace(''; + return implode('', $lines); } /** @@ -115,15 +117,11 @@ private function closeUnclosedXmlTags($line) */ private function convertSgmlToXml($sgml) { - $sgml = str_replace(["\r\n", "\r"], "\n", $sgml); - - $lines = explode("\n", $sgml); - $xml = ''; - foreach ($lines as $line) { - $xml .= trim($this->closeUnclosedXmlTags($line)) . "\n"; + foreach (explode("\n", $sgml) as $line) { + $xml .= $this->closeUnclosedXmlTags($line) . "\n"; } - return trim($xml); + return rtrim($xml); } } From f422ad7a6ba2ca3b4583e2a80d689e359c880c1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Sousa?= Date: Wed, 14 Feb 2018 11:50:16 -0300 Subject: [PATCH 2/3] Added test fixture --- tests/OfxParser/ParserTest.php | 1 + tests/fixtures/ofxdata-oneline-with-closing-tags.ofx | 10 ++++++++++ 2 files changed, 11 insertions(+) create mode 100644 tests/fixtures/ofxdata-oneline-with-closing-tags.ofx diff --git a/tests/OfxParser/ParserTest.php b/tests/OfxParser/ParserTest.php index 6e5c773..b027dd1 100644 --- a/tests/OfxParser/ParserTest.php +++ b/tests/OfxParser/ParserTest.php @@ -161,6 +161,7 @@ public function loadFromStringProvider() return [ 'ofxdata.ofx' => [dirname(__DIR__).'/fixtures/ofxdata.ofx'], 'ofxdata-oneline.ofx' => [dirname(__DIR__).'/fixtures/ofxdata-oneline.ofx'], + 'ofxdata-oneline-with-closing-tags.ofx' => [dirname(__DIR__).'/fixtures/ofxdata-oneline-with-closing-tags.ofx'], 'ofxdata-cmfr.ofx' => [dirname(__DIR__).'/fixtures/ofxdata-cmfr.ofx'], 'ofxdata-bb.ofx' => [dirname(__DIR__).'/fixtures/ofxdata-bb.ofx'], 'ofxdata-bb-two-stmtrs.ofx' => [dirname(__DIR__).'/fixtures/ofxdata-bb-two-stmtrs.ofx'], diff --git a/tests/fixtures/ofxdata-oneline-with-closing-tags.ofx b/tests/fixtures/ofxdata-oneline-with-closing-tags.ofx new file mode 100644 index 0000000..380e40c --- /dev/null +++ b/tests/fixtures/ofxdata-oneline-with-closing-tags.ofx @@ -0,0 +1,10 @@ +OFXHEADER:100 +DATA:OFXSGML +VERSION:102 +SECURITY:NONE +ENCODING:USASCII +CHARSET:1252 +COMPRESSION:NONE +OLDFILEUID:NONE +NEWFILEUID:NONE +0INFO20160720101818FRE14690026985560INFO1469002698556EURXXXXXXXXXXXXXXXXXXXXXCHECKING552016020820160720DEBIT20160719-1.55PU3YMMLLQ9FRAIS TRIMESTRIELSPAYMENT20160503-383.00PU3Y_VLC48PRELEVEMENTDEBIT20160418-1.55PU3YIVLLQ9FRAISDIRECTDEP20160411200.00PU3Y5ALLCKVIREMENT DE MR XXXXDIRECTDEP20160329-80.00PU3YBALLL2VIREMENT POURDIRECTDEP20160321-600.00PU3YWALLC4VIREMENTDIRECTDEP20160216-1000.00PU3Y36LCPFVIREMENTPAYMENT20160216-111.00PU3Y36LC4IPRELEVEMENT100.0020160719100.0020160719 \ No newline at end of file From 87528bbde14be99e36d6da0022f41e0bc1fa489b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Sousa?= Date: Wed, 14 Feb 2018 13:13:08 -0300 Subject: [PATCH 3/3] Apply requested changes --- lib/OfxParser/Parser.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/OfxParser/Parser.php b/lib/OfxParser/Parser.php index ed0dc89..611f4ae 100644 --- a/lib/OfxParser/Parser.php +++ b/lib/OfxParser/Parser.php @@ -40,7 +40,7 @@ public function loadFromString($ofxContent) { $ofxContent = utf8_encode($ofxContent); $sgmlStart = stripos($ofxContent, ''); - $ofxSgml = trim($this->fixNewlines(substr($ofxContent, $sgmlStart))); + $ofxSgml = trim($this->normalizeNewlines(substr($ofxContent, $sgmlStart))); $ofxXml = $this->convertSgmlToXml($ofxSgml); @@ -50,12 +50,12 @@ public function loadFromString($ofxContent) } /** - * Prepare OFX file contents. + * Normalize newlines by removing and adding newlines only before opening tags * * @param string $ofxContent * @return string */ - private function fixNewlines($ofxContent) + private function normalizeNewlines($ofxContent) { // clear all new line characters first $ofxContent = str_replace(["\r", "\n"], '', $ofxContent); @@ -95,7 +95,7 @@ private function closeUnclosedXmlTags($line) $tag = ltrim(substr($line, 1, strpos($line, '>') - 1), '/'); // Line is "" or "" - if ($line == '<' . $tag . '>' || $line == '') { + if ($line === '<' . $tag . '>' || $line === '') { return $line; }