Skip to content

Commit

Permalink
Encoding support in CSV formats
Browse files Browse the repository at this point in the history
  • Loading branch information
ImanuelBertrand committed Nov 7, 2023
1 parent 02273c2 commit 071a3e1
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 8 deletions.
7 changes: 7 additions & 0 deletions Controller/Adminhtml/CsvFormat/Save.php
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ public function execute()
{
$id = $this->getRequest()->getParam('entity_id');

$encoding = $this->getRequest()->getParam('encoding');
if (!in_array($encoding, mb_list_encodings())) {
$this->messageManager->addErrorMessage(__('Invalid encoding. Valid encodings: %1', implode(', ', mb_list_encodings())));
return $this->resultRedirectFactory->create()->setPath('*/*/edit', ['id' => $id]);
}

try {
$format = !empty($id)
? $this->csvFormatRepository->getById($id)
Expand All @@ -51,6 +57,7 @@ public function execute()
$format->setThousandsSeparator($this->getRequest()->getParam('thousands_separator'));
$format->setDecimalSeparator($this->getRequest()->getParam('decimal_separator'));
$format->setDateFormat($this->getRequest()->getParam('date_format'));
$format->setEncoding($this->getRequest()->getParam('encoding'));

foreach ($format::COLUMNS as $column) {
$format->setData($column . '_column', $this->getRequest()->getParam($column . '_column'));
Expand Down
30 changes: 25 additions & 5 deletions Lib/Csv.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class Csv extends CoreCsv
protected int $ignoreLeadingLines = 0;
protected int $ignoreTailingLines = 0;
protected bool $ignoreInvalidLines = false;
protected string $encoding = 'UTF-8';

public function __construct(File $file, Logger $logger)
{
Expand Down Expand Up @@ -62,6 +63,16 @@ public function setIgnoreInvalidLines(bool $value): static
return $this;
}

/**
* @param string $value
* @return $this
*/
public function setEncoding(string $value): static
{
$this->encoding = $value;
return $this;
}

/**
* Get data from CSV file and return data as array
*
Expand All @@ -72,18 +83,27 @@ public function setIgnoreInvalidLines(bool $value): static
*/
public function getData($file)
{
$tempFilename = tempnam(sys_get_temp_dir(), 'csv');
$contents = $this->file->fileGetContents($file);

// Remove UTF8 BOM if present
if (substr($contents, 0, 3) == pack('CCC', 0xef, 0xbb, 0xbf)) {
if (!in_array($this->encoding, mb_list_encodings())) {
throw new Exception('Invalid CSV file encoding: ' . $this->encoding);
} elseif ($this->encoding !== 'UTF-8') {
$contents = mb_convert_encoding($contents, 'UTF-8', $this->encoding);

if ($contents === false) {
throw new Exception('Encoding the file to UTF-8 failed');
}
}

// Remove BOM if present
if (substr($contents, 0, 3) === pack('CCC', 0xef, 0xbb, 0xbf)) {
$contents = substr($contents, 3);
}

// Write to temp file, read it with the base CSV class and delete the temp file
$tempFilename = tempnam(sys_get_temp_dir(), 'csv');
$this->file->filePutContents($tempFilename, $contents);

$data = parent::getData($tempFilename);

$this->file->deleteFile($tempFilename);

if ($this->ignoreLeadingLines > 0) {
Expand Down
17 changes: 14 additions & 3 deletions Model/CsvFormat.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
*
* @method string getName()
* @method setName(string $value)
* @method string getEncoding()
* @method setEncoding(string $value)
* @method bool getHasHeader()
* @method setHasHeader(bool $value)
* @method string getDelimiter()
Expand Down Expand Up @@ -129,7 +131,7 @@ protected function parseCsvContent(array $rows): array
$csvNames = explode($this->getDelimiter(), $csvName);
$values = [];
foreach ($csvNames as $_csvName) {
$values[] = $this->getValue($row, trim($_csvName), $this->getData($name . '_regex'));
$values[] = $this-> getValue($row, trim($_csvName), $this->getData($name . '_regex'));
}
$rowValues[$name] = trim(implode(' / ', $values), ' /');
}
Expand All @@ -149,6 +151,7 @@ protected function parseCsvContent(array $rows): array
* @param string $column
* @param string $regexPattern
* @return string
* @throws Exception
*/
public function getValue(array $row, string $column, string $regexPattern): string
{
Expand All @@ -157,7 +160,14 @@ public function getValue(array $row, string $column, string $regexPattern): stri
return '';
}

$columnContent = trim($row[$column]) ?? '';
try {
$columnContent = trim($row[$column]) ?? '';
} catch (Exception $e) {
$this->logger->error($e->getMessage());
$this->logger->error('Column: ' . $column);
$this->logger->error('Row: ' . json_encode($row));
throw $e;
}

if (empty($regexPattern)) {
return $columnContent;
Expand Down Expand Up @@ -246,7 +256,8 @@ protected function getCsvProcessor(): Csv
->setEnclosure($this->getEnclosure())
->setIgnoreLeadingLines($this->getIgnoreLeadingLines())
->setIgnoreTailingLines($this->getIgnoreTailingLines())
->setIgnoreInvalidLines($this->getIgnoreInvalidLines());
->setIgnoreInvalidLines($this->getIgnoreInvalidLines())
->setEncoding($this->getEncoding() ?? 'UTF-8');
}

/**
Expand Down
1 change: 1 addition & 0 deletions etc/db_schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
<table name="banksync_csv_format">
<column xsi:type="int" name="entity_id" unsigned="true" nullable="false" identity="true" comment="Entity ID"/>
<column xsi:type="varchar" name="name" nullable="false" comment="Name"/>
<column xsi:type="varchar" name="encoding" nullable="false" comment="File encoding"/>
<column xsi:type="boolean" name="has_header" nullable="false" default="1" comment="Has Header"/>
<column xsi:type="varchar" name="delimiter" nullable="false" comment="Delimiter"/>
<column xsi:type="varchar" name="enclosure" nullable="false" comment="Enclosure"/>
Expand Down
1 change: 1 addition & 0 deletions etc/db_schema_whitelist.json
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
"banksync_csv_format": {
"column": {
"name": true,
"encoding": true,
"has_header": true,
"delimiter": true,
"enclosure": true,
Expand Down
9 changes: 9 additions & 0 deletions view/adminhtml/ui_component/csvformat_edit_form.xml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,15 @@
</settings>
</input>

<input name="encoding">
<settings>
<label>File encoding</label>
<dataScope>encoding</dataScope>
<required>true</required>
<notice>Supported values are those supported by mb_convert_encoding()</notice>
</settings>
</input>

<checkbox name="has_header">
<settings>
<dataScope>has_header</dataScope>
Expand Down

0 comments on commit 071a3e1

Please sign in to comment.