diff --git a/.gitignore b/.gitignore index a2c0ad8..41ac92c 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,8 @@ Gemfile.lock ProgrammerNotes.md *.gem releases/ +test/benchmark/*.xlsx +!test/benchmark/xlsx_200000_rows.xlsx +.yardoc/ +coverage/ +doc/ \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 21c0040..d8e5fb1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,31 @@ # Changelog All notable changes to this project will be documented in this file. -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) featuring Added, Changed, Deprecated, +Removed, Fixed, Security, and others; and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.3.0] - 2021-01-12 +### Added +- A benchmark comparison against other xlsx reading libraries. +- Several new assertions. +- Although the OS will claim memory gathered from malloc at program termination, it's a good practice to free this +allocated memory manually. This is why a new hook method will be called at program -in terms- termination: +XLSXDrone::Workbook.close_workbooks(). + +### Changed +- The error reporting system from the C's library was updated, so had to be the Ruby one. Those changes were reflected. +- The XLSXDrone::Workbook#load_sheet() method is safer now. Will raise an exception if ANY problem arises, instead of +returning nil. This means, it will never return nil. It will return a valid XLSXDrone::Sheet object or will raise an +exception. +- If the user tries to use a workbook already closed, an exception will get raised explaining the situation, no more +segfaults. +### Fixed +- The native binding was improved to work well with x64. +- UTF-8 strings now are successfully read! +- Fixed several problems with date, time & date time Excel values. ## [0.2.0] - 2019-04-05 ### Added diff --git a/Gemfile b/Gemfile index c73ad2e..2e0a953 100644 --- a/Gemfile +++ b/Gemfile @@ -7,6 +7,14 @@ group :development do gem 'bundler' gem 'yard' gem 'test-unit' + gem 'simplecov' +end + +group :test do + gem 'roo' + gem 'creek' + gem 'rubyXL' + gem 'simple_xlsx_reader' end group :production do diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..934359d --- /dev/null +++ b/LICENSE @@ -0,0 +1,14 @@ +Copyright 2021 Damián M. G. + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit +persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md deleted file mode 100644 index e69de29..0000000 diff --git a/README.md b/README.md index c8321fc..87da3aa 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,37 @@ -# PORCUPINE_RUBY +# xlsx_drone + +[![](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/damian-m-g/xlsx_drone_rb/master/data/shields/simplecov.json)](#xlsx_drone) +[![](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/damian-m-g/xlsx_drone_rb/master/data/shields/test_suite.json)](#xlsx_drone) +[![](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/damian-m-g/xlsx_drone_rb/master/data/shields/assertions.json)](#xlsx_drone) + +Fast _Microsoft Excel's_ **\*.xlsx** reader. Binding of [C's xlsx_drone](https://github.com/damian-m-g/xlsx_drone) lib. + +## Table of contents + +* [Summary](#summary) +* [Installation](#installation) +* [Usage](#usage) +* [API](#api) +* [TODO](#todo) +* [License](#license) + +## Summary + +The xlsx_drone gem highlight specifically in runtime speed. This is because almost all the process of gathering information happens in well constructed -for speed- native C code. + +[You can find a benchmark inside the repository](https://github.com/damian-m-g/xlsx_drone_rb/blob/master/test/benchmark/speed.rb) that measure the **reading speed** of the most known (and used) Ruby libraries for *.xlsx's reading/writing. The results gathered in my old notebook, reading 200000 rows × 3 columns (number, string and date) are as follow: + +![](data/README.md_images/bm_result.png) + +**x2** times faster than the fastest one. + +You can run this test on your own computer with the `rake bm` task. + +## Installation + +Use the _gem_ command that comes with your Ruby installation: + +`gem install xlsx_drone` ## Usage @@ -10,17 +43,30 @@ wb = XLSXDrone.open(path_to_xlsx) #: XLSXDrone::Workbook sheets_amount = wb.sheets_amount #: Integer # you can pass its index (starts with 1) or its name as argument -ws = wb.load_sheet(1) #: XLSXDrone::Sheet +sheet = wb.load_sheet(1) #: XLSXDrone::Sheet +puts "Sheet #1 name: #{sheet.name}" -1.upto(ws.last_row) do |row| - p ws.read_cell(row, 'A') - p ws.read_cell(row, 'B') +1.upto(sheet.last_row) do |row| + p sheet.read_cell(row, 'A') + p sheet.read_cell(row, 'B') end - -# remember to close the wb once done -wb.close() ``` -## Known problems +## API + +You can fully produce the documentation with the `rake yard` task, although ~90% of the API (and most useful part) is shown above. + +## TODO + +All ideas about new implementations are thoroughly thought to keep the essence of the library, which is to be fast and simple. Hence, next TODOs could be taken into account or dismissed based on that. + +Also, consider that this TODO list is somehow concatenated to the [C's xlsx_drone](https://github.com/damian-m-g/xlsx_drone#todo) TODO list. Changes implemented there, will be _immediately_ mirrored here. + +- C's xlsx_drone has in its plans to provide **writing support** for xlsx files. As soon as this is implemented there, I'll perform the neccessary binding. +- Consider making `XLSXDrone::Workbook#load_sheet()` to keep a reference to the loaded sheet as an accessible instance variable (i.e.: @loaded_sheets). + +**Be free to [make (or upvote)](https://github.com/damian-m-g/xlsx_drone_rb/issues) any feature request.** + +## License -So far, it doesn't work on Ruby x64 versions. +#### [MIT](https://github.com/damian-m-g/xlsx_drone_rb/blob/master/LICENSE) diff --git a/Rakefile b/Rakefile index b210bd2..041141d 100644 --- a/Rakefile +++ b/Rakefile @@ -23,3 +23,16 @@ task :build do end puts('Gem(s) moved.') end + +desc 'run bechmark' +task :bm do + load('test/benchmark/speed.rb') +end + +# you will execute this before every new version release +desc 'perform measures & produce badges metadata' +task :badges do + # TODO: Should parse coverage/index.html and produce coverage data only with lib/**/*.rb files, hardcoding value for now + # TODO: Should parse all assertions and produce sum of all of them, hardcoding value for now + # TODO: Should produce test suite pass badge only if all test passes, hardcoding value for now +end diff --git a/data/README.md_images/bm_result.png b/data/README.md_images/bm_result.png new file mode 100644 index 0000000..e8d0d9d Binary files /dev/null and b/data/README.md_images/bm_result.png differ diff --git a/data/shields/assertions.json b/data/shields/assertions.json new file mode 100644 index 0000000..8b5e23f --- /dev/null +++ b/data/shields/assertions.json @@ -0,0 +1 @@ +{"schemaVersion":1,"label":"test assertions","message":"112","color":"informational"} \ No newline at end of file diff --git a/data/shields/simplecov.json b/data/shields/simplecov.json new file mode 100644 index 0000000..dd1d221 --- /dev/null +++ b/data/shields/simplecov.json @@ -0,0 +1 @@ +{"schemaVersion":1,"label":"coverage","message":"91.79%","color":"green"} \ No newline at end of file diff --git a/data/shields/test_suite.json b/data/shields/test_suite.json new file mode 100644 index 0000000..c4c798d --- /dev/null +++ b/data/shields/test_suite.json @@ -0,0 +1 @@ +{"schemaVersion":1,"label":"test suite","message":"pass","color":"brightgreen"} \ No newline at end of file diff --git a/ext/libporcupine_x32.dll b/ext/libporcupine_x32.dll deleted file mode 100644 index a5c4389..0000000 Binary files a/ext/libporcupine_x32.dll and /dev/null differ diff --git a/ext/libporcupine_x64.dll b/ext/libporcupine_x64.dll deleted file mode 100644 index 447dff6..0000000 Binary files a/ext/libporcupine_x64.dll and /dev/null differ diff --git a/ext/xlsx_drone_x64.dll b/ext/xlsx_drone_x64.dll new file mode 100644 index 0000000..4902302 Binary files /dev/null and b/ext/xlsx_drone_x64.dll differ diff --git a/ext/xlsx_drone_x86.dll b/ext/xlsx_drone_x86.dll new file mode 100644 index 0000000..bcefb4f Binary files /dev/null and b/ext/xlsx_drone_x86.dll differ diff --git a/lib/xlsx_drone.rb b/lib/xlsx_drone.rb index 3c885dd..ec42c48 100644 --- a/lib/xlsx_drone.rb +++ b/lib/xlsx_drone.rb @@ -10,3 +10,8 @@ # turn off err printing from the native library XLSXDrone::NativeBinding.xlsx_set_print_err_messages(0) + +# ensure that all opened workbooks get closed (if can't OS will claim it anyways, just filling a duty here) +at_exit do + XLSXDrone::Workbook.close_workbooks() +end \ No newline at end of file diff --git a/lib/xlsx_drone/exceptions.rb b/lib/xlsx_drone/exceptions.rb index 651a231..305d4f5 100644 --- a/lib/xlsx_drone/exceptions.rb +++ b/lib/xlsx_drone/exceptions.rb @@ -9,6 +9,9 @@ class IndexOutOfBounds < RuntimeError; end # May happen on xlsx_load_sheet(). class NonExistent < RuntimeError; end + + # May happen when try to interact with a workbook already closed. + class WorkbookClosed < RuntimeError; end end # Errors caused by the system itself. diff --git a/lib/xlsx_drone/native_binding.rb b/lib/xlsx_drone/native_binding.rb index f95576e..de1669a 100644 --- a/lib/xlsx_drone/native_binding.rb +++ b/lib/xlsx_drone/native_binding.rb @@ -1,80 +1,72 @@ # Namespace (protector) of the library. module XLSXDrone - + # All things related to the binding with the native C library. module NativeBinding - + PLATFORM_X64 = RUBY_PLATFORM.match(/64/) ? true : false EXT_PATH = "#{File.dirname(File.dirname(File.dirname(__FILE__)))}/ext" - DLL_PATH = PLATFORM_X64 ? "#{EXT_PATH}/libporcupine_x64.dll" : "#{EXT_PATH}/libporcupine_x32.dll" - + DLL_PATH = PLATFORM_X64 ? "#{EXT_PATH}/xlsx_drone_x64.dll" : "#{EXT_PATH}/xlsx_drone_x86.dll" + class XLSXWorkbookT < FFI::Struct - - byte_index = 0 - + layout \ - :deployment_path, :pointer, byte_index, - :shared_strings_xml, :pointer, byte_index += FFI.type_size(FFI::Type::POINTER), - :n_styles, :int, byte_index += FFI.type_size(FFI::Type::POINTER), - :styles, :pointer, byte_index += FFI.type_size(FFI::Type::INT), - :n_sheets, :int, byte_index += FFI.type_size(FFI::Type::POINTER), - :sheets, :pointer, byte_index += FFI.type_size(FFI::Type::INT) + :deployment_path, :pointer, + :shared_strings_xml, :pointer, + :n_styles, :int, + :styles, :pointer, + :n_sheets, :int, + :sheets, :pointer end - + class XLSXStyleT < FFI::Struct - - byte_index = 0 - + layout \ - :style_id, :int, byte_index, - :related_type, :int, byte_index += FFI.type_size(FFI::Type::INT), - :format_code, :pointer, byte_index += FFI.type_size(FFI::Type::INT) + :style_id, :int, + :related_category, :int, + :format_code, :pointer end - + class XLSXReferenceToRowT < FFI::Struct - - byte_index = 0 - + layout \ - :row_n, :int, byte_index, - :sheetdata_child_i, :int, byte_index += FFI.type_size(FFI::Type::INT) + :row_n, :int, + :sheetdata_child_i, :int end - + class XLSXSheetT < FFI::Struct - - byte_index = 0 - + layout \ - :xlsx, :pointer, byte_index, - :name, :pointer, byte_index += FFI.type_size(FFI::Type::POINTER), - :sheet_xml, :pointer, byte_index += FFI.type_size(FFI::Type::POINTER), - :sheetdata, :pointer, byte_index += FFI.type_size(FFI::Type::POINTER), - :last_row, :int, byte_index += FFI.type_size(FFI::Type::POINTER), - :last_row_looked, XLSXReferenceToRowT, byte_index += FFI.type_size(FFI::Type::INT) + :xlsx, :pointer, + :name, :pointer, + :sheet_xml, :pointer, + :sheetdata, :pointer, + :last_row, :int, + :last_row_looked, XLSXReferenceToRowT end - + class XLSXCellValue < FFI::Union + layout \ :pointer_to_char_value, :pointer, :int_value, :int, :long_long_value, :long_long, :double_value, :double end - + class XLSXCellT < FFI::Struct - - byte_index = 0 - + layout \ - :style, :pointer, byte_index, - :value_type, :int, byte_index += FFI.type_size(FFI::Type::POINTER), - :value, XLSXCellValue, byte_index += FFI.type_size(FFI::Type::INT) + :style, :pointer, + :value_type, :int, + :value, XLSXCellValue end - + extend FFI::Library ffi_lib DLL_PATH - + # function attachings + attach_function :xlsx_get_xlsx_errno, [], :int attach_function :xlsx_set_print_err_messages, [:int], :void attach_function :xlsx_open, [:string, :pointer], :int attach_function :xlsx_load_sheet, [:pointer, :int, :string], :pointer diff --git a/lib/xlsx_drone/sheet.rb b/lib/xlsx_drone/sheet.rb index 2385e7e..1a28914 100644 --- a/lib/xlsx_drone/sheet.rb +++ b/lib/xlsx_drone/sheet.rb @@ -18,10 +18,11 @@ def last_row # @return [String] def name - @native_sheet[:name].read_string + @native_sheet[:name].get_string(0).force_encoding(Encoding::UTF_8) end - - # @param row [Integer], @param column [String] + + # @param row [Integer] + # @param column [String] # @return [Integer, Float, String, Time, NilClass] def read_cell(row, column) XLSXDrone::NativeBinding.xlsx_read_cell(@native_sheet, row, column, @native_cell) @@ -29,7 +30,7 @@ def read_cell(row, column) if(@native_cell[:style].null?) case @native_cell[:value_type] when 0 - @native_cell[:value][:pointer_to_char_value].read_string + @native_cell[:value][:pointer_to_char_value].get_string(0).force_encoding(Encoding::UTF_8) when 1 @native_cell[:value][:int_value] when 2 @@ -41,24 +42,32 @@ def read_cell(row, column) end else address = @native_cell[:style].address + # speeding purpose if(!(@styles.has_key?(address))) style_obj = XLSXDrone::NativeBinding::XLSXStyleT.new(@native_cell[:style]) - @styles[address] = style_obj[:related_type] + @styles[address] = style_obj[:related_category] end case @styles[address] when 2 - # XLSX_DATE - Time.new(1900) + ((@native_cell[:value][:int_value] - 2) * 86400) + # XLSX_DATE, it could be represented also as plain string + if(@native_cell[:value_type] == 0) + @native_cell[:value][:pointer_to_char_value].get_string(0).force_encoding(Encoding::UTF_8) + else + Time.new(1900) + ((@native_cell[:value][:int_value] - 2) * 86400) + end when 4 # XLSX_DATE_TIME, there are specific cases in which it's a DATE_TIME, but the internal representation appears as an int, so basically # the "time" part of the data comes fixed at mid-day or at the start of the day, that's what you actually see on Excel - if(@native_cell[:value_type] == 1) - Time.new(1900) + ((@native_cell[:value][:int_value] - 2) * 86400) - else - match = @native_cell[:value][:double_value].to_s.match(/(\d+)\.(\d+)/) - integral_part = match[1].to_i - floating_part = "0.#{match[2]}".to_f - Time.new(1900) + ((integral_part - 2) * 86400) + (floating_part * 86400) + case(@native_cell[:value_type]) + when 0 + @native_cell[:value][:pointer_to_char_value].get_string(0).force_encoding(Encoding::UTF_8) + when 1 + Time.new(1900) + ((@native_cell[:value][:int_value] - 2) * 86400) + else + match = @native_cell[:value][:double_value].to_s.match(/(\d+)\.(\d+)/) + integral_part = match[1].to_i + floating_part = "0.#{match[2]}".to_f + Time.new(1900) + ((integral_part - 2) * 86400) + (floating_part * 86400) end when 0 # XLSX_NUMBER @@ -74,7 +83,7 @@ def read_cell(row, column) end when 1 # XLSX_TEXT - @native_cell[:value][:pointer_to_char_value].read_string + @native_cell[:value][:pointer_to_char_value].get_string(0).force_encoding(Encoding::UTF_8) when 3 # XLSX_TIME (Time.new(1900) + (@native_cell[:value][:double_value] * 86400)).strftime("%H:%M:%S") @@ -82,7 +91,7 @@ def read_cell(row, column) # XLSX_UNKNOWN case @native_cell[:value_type] when 0 - @native_cell[:value][:pointer_to_char_value].read_string + @native_cell[:value][:pointer_to_char_value].get_string(0).force_encoding(Encoding::UTF_8) when 1 @native_cell[:value][:int_value] when 2 diff --git a/lib/xlsx_drone/workbook.rb b/lib/xlsx_drone/workbook.rb index 9b1606a..bd359a8 100644 --- a/lib/xlsx_drone/workbook.rb +++ b/lib/xlsx_drone/workbook.rb @@ -3,54 +3,74 @@ module XLSXDrone # XLSX Workbook. class Workbook + + @@opened_workbooks = [] + # You could use this method to close all opened workbooks at the same time. + def self.close_workbooks + @@opened_workbooks.each do |wb| + wb.close + end + end + # @param xlsx_workbook_mpointer [FFI::MemoryPointer] # @return [Workbook] def initialize(xlsx_workbook_mpointer) @native_workbook = XLSXDrone::NativeBinding::XLSXWorkbookT.new(xlsx_workbook_mpointer) + @@opened_workbooks << self end - # Sheets aren't loaded by default. You have to load them one by one, once you need them. You can *reference* a sheet passing its name or its index (first one is 1). Returns nil if didn't match. + # Sheets aren't loaded by default. You have to load them one by one, once you need them. You can *reference* a sheet passing its name or its index (first one is 1). Raises an exception if it can't for some reason. # @param reference [String, Integer] - # @return [XLSXDrone::Sheet, NilClass] + # @return [XLSXDrone::Sheet] def load_sheet(reference) - loaded_sheet = \ - case reference - when String - XLSXDrone::NativeBinding.xlsx_load_sheet(@native_workbook, 0, reference) - when Integer - XLSXDrone::NativeBinding.xlsx_load_sheet(@native_workbook, reference, nil) - else - return nil + if(@native_workbook) + loaded_sheet = \ + case reference + when String + XLSXDrone::NativeBinding.xlsx_load_sheet(@native_workbook, 0, reference) + when Integer + XLSXDrone::NativeBinding.xlsx_load_sheet(@native_workbook, reference, nil) + else + raise XLSXDrone::LogicError::ClientError::MalformedParams, "Pass a valid index as an #Integer (> 0 && <= #sheets_amount()), or a valid sheet name as a #String." + end + if(!loaded_sheet.null?) + XLSXDrone::Sheet.new(loaded_sheet) + else + # no sheet was loaded + case XLSXDrone::NativeBinding.xlsx_get_xlsx_errno() + when -11 + raise XLSXDrone::LogicError::ClientError::MalformedParams, "Pass a valid index (> 0 && <= #sheets_amount()), or a valid sheet name." + when -12 + raise NoMemoryError + when -13 + raise XLSXDrone::UserError::IndexOutOfBounds, "If you pass an integer as parameter, note that can't surpass #sheets_amount()." + when -14 + raise XLSXDrone::LogicError::InternalError::XMLParsingError, "The XLSX may be corrupted or it belongs to a version unsupported by this library." + when -15 + raise XLSXDrone::UserError::NonExistent, "There's not such sheet with that name." + end end - if(!loaded_sheet.null?) - XLSXDrone::Sheet.new(loaded_sheet) else - # no sheet was loaded - case FFI::LastError.error - when -1 - raise XLSXDrone::LogicError::ClientError::MalformedParams, "Pass a valid index (> 0 && <= #sheets_amount()), or a valid sheet name." - when -2 - raise NoMemoryError - when -3 - raise XLSXDrone::UserError::IndexOutOfBounds, "If you pass an integer as parameter, note that can't surpass #sheets_amount()." - when -4 - raise XLSXDrone::LogicError::InternalError::XMLParsingError, "The XLSX may be corrupted or it belongs to a version unsupported by this library." - when -5 - raise XLSXDrone::UserError::NonExistent, "There's not such sheet with that name." - end + raise XLSXDrone::UserError::WorkbookClosed, "The workbook you're trying to access was already closed." end end - + # @return [Integer] the amount of sheets contained on this workbook def sheets_amount @native_workbook[:n_sheets] end - # Must-call method, once you finished working with the workbook. + # Should-call method, once you finish working with the workbook. # @return [TrueClass, FalseClass] depending on if the close was successful or not def close - XLSXDrone::NativeBinding.xlsx_close(@native_workbook) == 1 ? true : false + if(XLSXDrone::NativeBinding.xlsx_close(@native_workbook) == 1) + @@opened_workbooks.delete(self) + @native_workbook = nil + true + else + false + end end end end diff --git a/lib/xlsx_drone/xlsx_drone.rb b/lib/xlsx_drone/xlsx_drone.rb index 2181989..e563e9e 100644 --- a/lib/xlsx_drone/xlsx_drone.rb +++ b/lib/xlsx_drone/xlsx_drone.rb @@ -1,16 +1,12 @@ # Namespace (protector) and #open() functionallity for the library. module XLSXDrone - - # Opens an XLSX file, must be closed after working with him. - # Can raise several exceptions: - # * NoMemoryError - # * XLSXDrone::InternalError::CantDeployFile: will get raised if the file doesn't exist, or other reassons. - # * XLSXDrone::InternalError::XMLParsingError: will get raised if the XLSX is corrupted, or if it's somehow unrecognizable by the library. + # @param path [String] - # @return [XLSXDrone::Workbook, NilClass] + # @return [XLSXDrone::Workbook] + # Opens an XLSX file, should be closed after working with him. Can raise several exceptions. def self.open(path) # check that the *path* is always a #String - if(!path.is_a?(String)) then return end + raise XLSXDrone::LogicError::ClientError::MalformedParams, "A #String is expected." if(!path.is_a?(String)) # reserve memory for an xlsx_workbook struct xlsx_workbook_mpointer = FFI::MemoryPointer.new(1, XLSXDrone::NativeBinding::XLSXWorkbookT.size, false) if(XLSXDrone::NativeBinding.xlsx_open(File.absolute_path(path), xlsx_workbook_mpointer) == 1) @@ -18,11 +14,11 @@ def self.open(path) XLSXDrone::Workbook.new(xlsx_workbook_mpointer) else # something went wrong - case FFI::LastError.error + case XLSXDrone::NativeBinding.xlsx_get_xlsx_errno() when -2 raise NoMemoryError when -3 - raise XLSXDrone::LogicError::InternalError::CantDeployFile, "Can't deploy #{path}." + raise XLSXDrone::LogicError::InternalError::CantDeployFile, "Can't deploy #{path}. Check that the file isn't already opened.unl" when -4 raise XLSXDrone::LogicError::InternalError::XMLParsingError, "The XLSX may be corrupted or it belongs to a version unsupported by this library." end diff --git a/test/benchmark/speed.rb b/test/benchmark/speed.rb new file mode 100644 index 0000000..bba8f0d --- /dev/null +++ b/test/benchmark/speed.rb @@ -0,0 +1,101 @@ +=begin +The idea of this file is to be able to benchmark the reading speed among the most used xlsx reading libraries. +As xlsx_drone is focused on speed, we expect to get #1 place. +Testing is performed between: + * roo + * creek + * rubyXL + * simple_xlsx_reader + * xlsx_drone +=end + +require 'benchmark' + +Benchmark.bm(20) do |bm| + + TEST_SUBJECT = 'test/benchmark/xlsx_200000_rows.xlsx'.freeze + COLUMNS = ['A', 'B', 'C'] + ROWS = 1..200_000 + + # roo + puts 'Measuring roo...' + bm.report('roo') do + require 'roo' + xlsx = Roo::Excelx.new(TEST_SUBJECT) + cell_value = nil + ROWS.each do |r| + COLUMNS.each do |c| + cell_value = xlsx.cell(r, c) + end + end + end + + # clean + GC.start + + # creek + puts 'Measuring creek...' + bm.report('creek') do + require 'creek' + creek = Creek::Book.new(TEST_SUBJECT) + sheet = creek.sheets[0] + cell_value = nil + sheet.rows.each do |r| + r.values.each do |v| + cell_value = v + end + end + end + + # clean + GC.start + + # rubyXL + puts 'Measuring rubyXL...' + bm.report('rubyXL') do + require 'rubyXL' + workbook = RubyXL::Parser.parse(TEST_SUBJECT) + worksheet = workbook[0] + cell_value = nil + worksheet.each do |r| + r.cells.each do |cell| + cell_value = cell.value + end + end + end + + # clean + GC.start + + # simple_xlsx_reader + # ATTENTION: Doesn't have the ability to differentiate between number and string + puts 'Measuring simple_xlsx_reader...' + bm.report('simple_xlsx_reader') do + require 'simple_xlsx_reader' + doc = SimpleXlsxReader.open(TEST_SUBJECT) + sheet = doc.sheets[0] + cell_value = nil + sheet.rows.each do |r| + r.each do |cell| + cell_value = cell + end + end + end + + # clean + GC.start + + # xlsx_drone + puts 'Measuring xlsx_drone...' + bm.report('xlsx_drone') do + require_relative '../../lib/xlsx_drone' + wb = XLSXDrone.open(TEST_SUBJECT) + sheet = wb.load_sheet(1) + cell_value = nil + ROWS.each do |r| + COLUMNS.each do |c| + cell_value = sheet.read_cell(r, c) + end + end + end +end diff --git a/test/benchmark/xlsx_200000_rows.xlsx b/test/benchmark/xlsx_200000_rows.xlsx new file mode 100644 index 0000000..f5181de Binary files /dev/null and b/test/benchmark/xlsx_200000_rows.xlsx differ diff --git a/test/helper/foo.xlsx b/test/helper/foo.xlsx index 69f4dea..1040e1a 100644 Binary files a/test/helper/foo.xlsx and b/test/helper/foo.xlsx differ diff --git a/test/tc_sheet.rb b/test/tc_sheet.rb index 7877fd0..b4c02cd 100644 --- a/test/tc_sheet.rb +++ b/test/tc_sheet.rb @@ -126,6 +126,9 @@ def test_read_cell # column "J", Text assert_equal("1875", @sheet.read_cell(2, "J")) assert_equal("Just text", @sheet.read_cell(3, "J")) + # unicode text + assert_equal("foo你bar好qaz", @sheet.read_cell(5, "J")) + assert_equal("𐐀34", @sheet.read_cell(4, "J")) # column "K", Special assert_equal(2000, @sheet.read_cell(2, "K")) @@ -136,6 +139,10 @@ def test_read_cell # column "L", Custom assert_equal(12, @sheet.read_cell(2, "L")) assert_equal(Time.new(2012, 2, 16), @sheet.read_cell(3, "L")) + + # empty cells + assert_equal(nil, @sheet.read_cell(13, "D")) + assert_equal(nil, @sheet.read_cell(50, "A")) end def teardown diff --git a/test/tc_workbook.rb b/test/tc_workbook.rb index aeabf75..874b3bc 100644 --- a/test/tc_workbook.rb +++ b/test/tc_workbook.rb @@ -7,8 +7,8 @@ def setup end def test_load_sheet - assert_nil(@workbook.load_sheet(2.5), "Should do nothing if you don't pass a #String or an #Integer.") - assert_nil(@workbook.load_sheet(nil), "Should do nothing if you don't pass a #String or an #Integer.") + assert_raise(XLSXDrone::LogicError::ClientError::MalformedParams) {@workbook.load_sheet(2.5)} + assert_raise(XLSXDrone::LogicError::ClientError::MalformedParams) {@workbook.load_sheet(nil)} # you can load a sheet by its index, that starts with 1, or by its name assert_instance_of(XLSXDrone::Sheet, @workbook.load_sheet(1)) assert_instance_of(XLSXDrone::Sheet, @workbook.load_sheet("Sheet1")) diff --git a/test/tc_xlsx_drone.rb b/test/tc_xlsx_drone.rb index 7e16787..d248452 100644 --- a/test/tc_xlsx_drone.rb +++ b/test/tc_xlsx_drone.rb @@ -3,7 +3,7 @@ class TCXLSXDrone < Test::Unit::TestCase XLSX_PATH = "#{File.dirname(__FILE__)}/helper/foo.xlsx" def test_open - assert_nil(XLSXDrone.open(123), "Should do nothing if the arg isn't a #String") + assert_raise(XLSXDrone::LogicError::ClientError::MalformedParams) {XLSXDrone.open(123)} assert_raise(XLSXDrone::LogicError::InternalError::CantDeployFile) do XLSXDrone.open("non existent file path") end diff --git a/xlsx_drone.gemspec b/xlsx_drone.gemspec index ea2eaf5..b9870c5 100644 --- a/xlsx_drone.gemspec +++ b/xlsx_drone.gemspec @@ -1,12 +1,12 @@ Gem::Specification.new do |s| s.name = "xlsx_drone" - s.version = "0.2.0" - s.summary = "XLSX reader/writer. So far, functionality for reading is provided." + s.version = "0.3.0" + s.summary = "Fast Microsoft Excel's XLSX reader. Binding of C's xlsx_drone lib." s.author = "Damián M. González" - s.homepage = "http://www.jorobuslab.net" - s.license = "Nonstandard" + s.homepage = "https://github.com/damian-m-g/xlsx_drone_rb" + s.license = "MIT" s.files = Dir["bin/*.rb"] + Dir["ext/*.dll"] + Dir["lib/**/*.rb"] - s.required_ruby_version = ['~> 2', '< 2.6'] + s.required_ruby_version = '> 2' s.add_runtime_dependency 'ffi', '~>1.0' end \ No newline at end of file