diff --git a/DOCX2Text.go b/DOCX2Text.go index c801655..1d01a67 100644 --- a/DOCX2Text.go +++ b/DOCX2Text.go @@ -4,6 +4,8 @@ Copyright: 2018 Kleissner Investments s.r.o. Author: Peter Kleissner This code is forked from https://github.com/guylaor/goword and extracts text from DOCX files. +Project from: https://github.com/IntelligenceX/fileconversion +Modify by: Vytek (9/3/2023) */ package opencrucible @@ -14,7 +16,6 @@ import ( "encoding/xml" "fmt" "io" - "io/ioutil" "strings" ) @@ -115,7 +116,7 @@ func openWordFile(file io.ReaderAt, size int64) (string, error) { } defer rc.Close() if f.Name == "word/document.xml" { - doc, err := ioutil.ReadAll(rc) + doc, err := io.ReadAll(rc) if err != nil { return "", err } diff --git a/opencrucible.go b/opencrucible.go index ff14149..5fa4018 100644 --- a/opencrucible.go +++ b/opencrucible.go @@ -58,11 +58,13 @@ func DOCXFileParseToString(FileToParse string) (string, error) { // extract text from an XLSX file file, err := os.Open(FileToParse) if err != nil { - return "", errors.New(fmt.Sprintf("Error opening file: %s\n", err)) + return "", fmt.Errorf("error opening file: %s", err) } defer file.Close() stat, _ := file.Stat() + docx, err := DOCX2Text(file, stat.Size()) + return docx, err } func ODTParseToString(StreamToParse []byte) (string, error) { diff --git a/opencrucible_test.go b/opencrucible_test.go index 542a3e2..895a771 100644 --- a/opencrucible_test.go +++ b/opencrucible_test.go @@ -51,4 +51,28 @@ func TestRTFarse(t *testing.T) { if got != want { t.Errorf("got %q, wanted %q", got, want) } +} + +func TestDOCXParse(t *testing.T) { + got, err := DOCXFileParseToString("test_file/test_file_docx.docx") + if err != nil { + panic(err) + } + want := "This is a test file to test library\n" + t.Logf("Parsed: %s", got) + if got != want { + t.Errorf("got %q, wanted %q", got, want) + } +} + +func TestDOCXMSParse(t *testing.T) { + got, err := DOCXFileParseToString("test_file/test_file_docx_ms.docx") + if err != nil { + panic(err) + } + want := "This is a test file to test library\n" + t.Logf("Parsed: %s", got) + if got != want { + t.Errorf("got %q, wanted %q", got, want) + } } \ No newline at end of file