diff --git a/README.md b/README.md index 9d68811..9656641 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,23 @@ # xlsx-to-json -Using Python, convert an Excel document to JSON +Using Python, convert an Excel (xlsx) document to JSON ## dependencies -* xlrd -* xlwt +* pylightxl -you can run `pip install xlrd` in your terminal/command window if you don't have these dependencies installed +you can run `pip3 install pylightxl` in your terminal/command window if you don't have these dependencies installed ## Usage -* Run the command `python xslx-to-json.py` in your terminal/command window -* Do what the prompt says +* Run the command `python3 xlstojson.py` in your terminal/command window +* Do what the prompt says (which is enter the path to the file) A .json file matching the name of your xlsx document will be generated. ## Caveats and Whatnots -I'm not a Python Programmer. The code is written about as well as a non-Python programmer programming Python could write it. +I wrote this six years ago; it was the first thing I'd ever written in Python. This code was written about as well as a non-Python programmer programming Python could write it. -Forgive me, I don't know what I'm doing +I have made attempts to improve it, but I may still not know what I'm doing diff --git a/requirements.txt b/requirements.txt index 27c3114..de484c8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1 @@ -xlwt==1.3.0 -xlrd==1.1.0 \ No newline at end of file +pylightxl=1.6.0 \ No newline at end of file diff --git a/xlstojson.py b/xlstojson.py index acd68c2..0750400 100644 --- a/xlstojson.py +++ b/xlstojson.py @@ -1,70 +1,25 @@ -#!/usr/bin/python2.7 -import xlrd -import xlwt -import json -import os.path -import datetime - -def getColNames(sheet): - rowSize = sheet.row_len(0) - colValues = sheet.row_values(0, 0, rowSize ) - columnNames = [] - - for value in colValues: - columnNames.append(value) - - return columnNames - -def getRowData(row, columnNames): - rowData = {} - counter = 0 - - for cell in row: - # check if it is of date type print in iso format - if cell.ctype==xlrd.XL_CELL_DATE: - rowData[columnNames[counter].lower().replace(' ', '_')] = datetime.datetime(*xlrd.xldate_as_tuple(cell.value,0)).isoformat() - else: - rowData[columnNames[counter].lower().replace(' ', '_')] = cell.value - counter +=1 - - return rowData - -def getSheetData(sheet, columnNames): - nRows = sheet.nrows - sheetData = [] - counter = 1 - - for idx in range(1, nRows): - row = sheet.row(idx) - rowData = getRowData(row, columnNames) - sheetData.append(rowData) - - return sheetData - -def getWorkBookData(workbook): - nsheets = workbook.nsheets - counter = 0 - workbookdata = {} - - for idx in range(0, nsheets): - worksheet = workbook.sheet_by_index(idx) - columnNames = getColNames(worksheet) - sheetdata = getSheetData(worksheet, columnNames) - workbookdata[worksheet.name.lower().replace(' ', '_')] = sheetdata - - return workbookdata +#!/usr/bin/python3.10 +import json, os, sys +from xlsx_reader import get_workbook, get_workbook_data def main(): - filename = input("Enter the path to the filename -> ") - if os.path.isfile(filename): - workbook = xlrd.open_workbook(filename) - workbookdata = getWorkBookData(workbook) - output = \ - open((filename.replace("xlsx", "json")).replace("xls", "json"), "w+") - output.write(json.dumps(workbookdata, sort_keys=True, indent=2, separators=(',', ": "))) - output.close() - print ("%s was created" %output.name) - else: - print ("Sorry, that was not a valid filename") + ''' The CLI / output task. ''' + source_file = input("Enter the path to the filename -> ") + if os.path.isfile(source_file): + pathname = os.path.splitext(source_file) + file_name = pathname[0].split('/')[-1] + try: + output_file_name = file_name + '.json' + workbook = get_workbook(source_file) + workbook_data = get_workbook_data(workbook) + with open(output_file_name, 'w+', encoding="utf-8") as output_file: + output_file.write(json.dumps(workbook_data, sort_keys=True, indent=2, separators=(",", ": "))) + print (f"{output_file.name} was created") + except Exception as error: + print("some error occured") + print(error) + sys.exit(2) + else: + print ("Sorry, that was not a valid filename") main() diff --git a/xlsx_reader.py b/xlsx_reader.py new file mode 100644 index 0000000..5a153bf --- /dev/null +++ b/xlsx_reader.py @@ -0,0 +1,56 @@ +'''Module that reads an xlsx spreadsheet and can produce json data from it''' +import pylightxl as xl + +def get_column_names(sheet): + '''Takes a single worksheet, returns the strings in the top row of each column''' + column_lists = sheet.cols + column_names = [] + + for column_list in column_lists: + column_names.append(column_list[00]) + + return column_names + +def get_row_data(row, column_names): + '''takes a single row of a worksheet and an array of rows, + returns an object with column_name:rowvalue + ''' + row_data = {} + counter = 0 + + for cell in row: + column_name = column_names[counter] + #TODO: this doesn't format any for a cell. Consider formatting date/numbers + row_data[column_name] = cell + counter = counter + 1 + + return row_data + +def get_sheet_data(sheet, column_names): + '''Takes a single worksheet, returns an object with row data''' + max_rows = sheet.size[0] + sheet_data = [] + + for idx in range(2, max_rows): + row = sheet.row(idx) + row_data = get_row_data(row, column_names) + sheet_data.append(row_data) + + return sheet_data + +def get_workbook_data(workbook): + '''Takes a workbook and returns all worksheet data''' + workbook_sheet_names = workbook.ws_names + workbook_data = {} + + for sheet_name in workbook_sheet_names: + worksheet = workbook.ws(ws=sheet_name) + column_names = get_column_names(worksheet) + sheet_data = get_sheet_data(worksheet, column_names) + workbook_data[sheet_name.lower().replace(' ', '_')] = sheet_data + + return workbook_data + +def get_workbook(filename): + '''opens a workbook for reading''' + return xl.readxl(filename)