Skip to content

Commit

Permalink
Add DOCXFileParserToString
Browse files Browse the repository at this point in the history
  • Loading branch information
Vytek committed Sep 4, 2023
1 parent 40e884c commit 31f997d
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 3 deletions.
5 changes: 3 additions & 2 deletions DOCX2Text.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ Copyright: 2018 Kleissner Investments s.r.o.
Author: Peter Kleissner
This code is forked from https://github.com/guylaor/goword and extracts text from DOCX files.
Project from: https://github.com/IntelligenceX/fileconversion
Modify by: Vytek (9/3/2023)
*/

package opencrucible
Expand All @@ -14,7 +16,6 @@ import (
"encoding/xml"
"fmt"
"io"
"io/ioutil"
"strings"
)

Expand Down Expand Up @@ -115,7 +116,7 @@ func openWordFile(file io.ReaderAt, size int64) (string, error) {
}
defer rc.Close()
if f.Name == "word/document.xml" {
doc, err := ioutil.ReadAll(rc)
doc, err := io.ReadAll(rc)
if err != nil {
return "", err
}
Expand Down
4 changes: 3 additions & 1 deletion opencrucible.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,13 @@ func DOCXFileParseToString(FileToParse string) (string, error) {
// extract text from an XLSX file
file, err := os.Open(FileToParse)
if err != nil {
return "", errors.New(fmt.Sprintf("Error opening file: %s\n", err))
return "", fmt.Errorf("error opening file: %s", err)
}

defer file.Close()
stat, _ := file.Stat()
docx, err := DOCX2Text(file, stat.Size())
return docx, err
}

func ODTParseToString(StreamToParse []byte) (string, error) {
Expand Down
24 changes: 24 additions & 0 deletions opencrucible_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,28 @@ func TestRTFarse(t *testing.T) {
if got != want {
t.Errorf("got %q, wanted %q", got, want)
}
}

func TestDOCXParse(t *testing.T) {
got, err := DOCXFileParseToString("test_file/test_file_docx.docx")
if err != nil {
panic(err)
}
want := "This is a test file to test library\n"
t.Logf("Parsed: %s", got)
if got != want {
t.Errorf("got %q, wanted %q", got, want)
}
}

func TestDOCXMSParse(t *testing.T) {
got, err := DOCXFileParseToString("test_file/test_file_docx_ms.docx")
if err != nil {
panic(err)
}
want := "This is a test file to test library\n"
t.Logf("Parsed: %s", got)
if got != want {
t.Errorf("got %q, wanted %q", got, want)
}
}

0 comments on commit 31f997d

Please sign in to comment.