From 4cd77fb1aa0780b966f624c169663da28a45b2c1 Mon Sep 17 00:00:00 2001 From: Mantas Date: Thu, 4 Apr 2024 11:06:35 +0300 Subject: [PATCH] Fix line endings --- src/Code/Converters/CsvConverter.php | 211 ++++++++++++++++++++++++++- 1 file changed, 210 insertions(+), 1 deletion(-) diff --git a/src/Code/Converters/CsvConverter.php b/src/Code/Converters/CsvConverter.php index 9e11bec..ff5d3de 100644 --- a/src/Code/Converters/CsvConverter.php +++ b/src/Code/Converters/CsvConverter.php @@ -1 +1,210 @@ - $column) { if (preg_match(self::timeRegex(), $column)) { $start_time_column = $k; $checked_column = $k; break; } } if ($start_time_column !== null) { for ($i = $checked_column + 1; $i < $column_count; $i++) { $column = $last_row[$i]; if (TxtConverter::hasText($column)) { break; } if (preg_match(self::timeRegex(), $column)) { $end_time_column = $i; $checked_column = $i; break; } } } for ($i = $checked_column + 1; $i < $column_count; $i++) { $column = $last_row[$i]; if (TxtConverter::hasText($column)) { $text_column = $i; break; } } if ($text_column === null) { throw new UserException('No text (CsvConverter)'); } $data_string = ''; $found_data = false; foreach ($data as $row) { if (!$found_data && $start_time_column !== null) { $is_start_time = preg_match(self::timeRegex(), $row[$start_time_column]); if (!$is_start_time) { continue; // skip few first rows if label or empty } } if (!$found_data && !TxtConverter::hasText($row[$text_column])) { continue; } $found_data = true; if ($start_time_column !== null) { $start_time = $row[$start_time_column]; if (is_numeric($start_time)) { $start_time = number_format($start_time, 3, '.', ''); } $data_string .= "\n" . $start_time; } if ($end_time_column !== null) { $end_time = $row[$end_time_column]; if (is_numeric($end_time)) { $end_time = number_format($end_time, 3, '.', ''); } $data_string .= ' ' . $end_time; } $data_string .= "\n" . $row[$text_column]; } return (new TxtConverter)->fileContentToInternalFormat($data_string, ''); } /** * Convert library's "internal format" (array) to file's content * * @param array $internal_format Internal format * @return string Converted file content */ public function internalFormatToFileContent(array $internal_format , array $options) { $data = [['Start', 'End', 'Text']]; foreach ($internal_format as $k => $block) { $start = $block['start']; $end = $block['end']; $text = implode(" ", $block['lines']); $data[] = [$start, $end, $text]; } ob_start(); $fp = fopen('php://output', 'w'); foreach ($data as $fields) { fputcsv($fp, $fields); } $file_content = ob_get_clean(); fclose($fp); return $file_content; } private static function csvToArray($content) { $fp = fopen("php://temp", 'r+'); fputs($fp, $content); rewind($fp); $separator = self::detectSeparator($content); $csv = []; while ( ($data = fgetcsv($fp, 0, $separator) ) !== false ) { $csv[] = $data; } fclose($fp); $csv2 = []; foreach ($csv as $row) { if (!isset($row[0]) || !isset($row[1])) { continue; } if (trim($row[0]) === '' && trim($row[1]) === '') { continue; } $csv2[] = $row; } return $csv2; } private static function detectSeparator($file_content) { $lines = explode("\n", $file_content); $results = []; foreach ($lines as $line) { foreach (self::$allowedSeparators as $delimiter) { $count = count(explode($delimiter, $line)); if ($count < 2) continue; // delimiter not found in line, minimum 2 cols (timestamp + text) if (empty($results[$delimiter])) { $results[$delimiter] = []; } $results[$delimiter][] = $count; } } foreach ($results as $delimiter => $value) { $flipped = array_flip($value); $results[$delimiter] = max($flipped); } arsort($results, SORT_NUMERIC); return !empty($results) ? key($results) : self::$allowedSeparators[0]; } } \ No newline at end of file + $column) { + if (preg_match(self::timeRegex(), $column)) { + $start_time_column = $k; + $checked_column = $k; + break; + } + } + if ($start_time_column !== null) { + for ($i = $checked_column + 1; $i < $column_count; $i++) { + $column = $last_row[$i]; + if (TxtConverter::hasText($column)) { + break; + } + if (preg_match(self::timeRegex(), $column)) { + $end_time_column = $i; + $checked_column = $i; + break; + } + } + } + for ($i = $checked_column + 1; $i < $column_count; $i++) { + $column = $last_row[$i]; + if (TxtConverter::hasText($column)) { + $text_column = $i; + break; + } + } + + if ($text_column === null) { + throw new UserException('No text (CsvConverter)'); + } + + $data_string = ''; + $found_data = false; + foreach ($data as $row) { + if (!$found_data && $start_time_column !== null) { + $is_start_time = preg_match(self::timeRegex(), $row[$start_time_column]); + if (!$is_start_time) { + continue; // skip few first rows if label or empty + } + } + if (!$found_data && !TxtConverter::hasText($row[$text_column])) { + continue; + } + $found_data = true; + + if ($start_time_column !== null) { + $start_time = $row[$start_time_column]; + if (is_numeric($start_time)) { + $start_time = number_format($start_time, 3, '.', ''); + } + $data_string .= "\n" . $start_time; + } + if ($end_time_column !== null) { + $end_time = $row[$end_time_column]; + if (is_numeric($end_time)) { + $end_time = number_format($end_time, 3, '.', ''); + } + $data_string .= ' ' . $end_time; + } + $data_string .= "\n" . $row[$text_column]; + } + + return (new TxtConverter)->fileContentToInternalFormat($data_string, ''); + } + + /** + * Convert library's "internal format" (array) to file's content + * + * @param array $internal_format Internal format + * @return string Converted file content + */ + public function internalFormatToFileContent(array $internal_format , array $options) + { + $data = [['Start', 'End', 'Text']]; + foreach ($internal_format as $k => $block) { + $start = $block['start']; + $end = $block['end']; + $text = implode(" ", $block['lines']); + + $data[] = [$start, $end, $text]; + } + + ob_start(); + $fp = fopen('php://output', 'w'); + foreach ($data as $fields) { + fputcsv($fp, $fields); + } + $file_content = ob_get_clean(); + fclose($fp); + + return $file_content; + } + + private static function csvToArray($content) + { + $fp = fopen("php://temp", 'r+'); + fputs($fp, $content); + rewind($fp); + + $separator = self::detectSeparator($content); + $csv = []; + while ( ($data = fgetcsv($fp, 0, $separator) ) !== false ) { + $csv[] = $data; + } + fclose($fp); + + $csv2 = []; + foreach ($csv as $row) { + if (!isset($row[0]) || !isset($row[1])) { + continue; + } + if (trim($row[0]) === '' && trim($row[1]) === '') { + continue; + } + $csv2[] = $row; + } + + return $csv2; + } + + private static function detectSeparator($file_content) + { + $lines = explode("\n", $file_content); + $results = []; + foreach ($lines as $line) { + foreach (self::$allowedSeparators as $delimiter) { + $count = count(explode($delimiter, $line)); + if ($count < 2) continue; // delimiter not found in line, minimum 2 cols (timestamp + text) + + if (empty($results[$delimiter])) { + $results[$delimiter] = []; + } + $results[$delimiter][] = $count; + } + } + + foreach ($results as $delimiter => $value) { + $flipped = array_flip($value); + $results[$delimiter] = max($flipped); + } + + arsort($results, SORT_NUMERIC); + + return !empty($results) ? key($results) : self::$allowedSeparators[0]; + } +}