Skip to content

Commit

Permalink
Attributes deduplicated more efficiently
Browse files Browse the repository at this point in the history
When reading an XML document, this package uses a more
time-efficient technique to detect and remove attributes
with duplicated names (within each element).
  • Loading branch information
beevik committed Jul 7, 2024
1 parent 97f4565 commit 562ef05
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 26 deletions.
38 changes: 25 additions & 13 deletions etree.go
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,7 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er
r = newXmlSimpleReader(ri)
}

attrCheck := make(map[xml.Name]int)
dec := newDecoder(r, settings)

var stack stack[*Element]
Expand Down Expand Up @@ -921,8 +922,20 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er
switch t := t.(type) {
case xml.StartElement:
e := newElement(t.Name.Space, t.Name.Local, top)
for _, a := range t.Attr {
e.createAttr(a.Name.Space, a.Name.Local, a.Value, e, settings.PreserveDuplicateAttrs)
if settings.PreserveDuplicateAttrs {
for _, a := range t.Attr {
e.addAttr(a.Name.Space, a.Name.Local, a.Value)
}
} else {
for _, a := range t.Attr {
if i, contains := attrCheck[a.Name]; contains {
e.Attr[i].Value = a.Value
} else {
attrCheck[a.Name] = len(e.Attr)
e.addAttr(a.Name.Space, a.Name.Local, a.Value)
}
}
clear(attrCheck)
}
stack.push(e)
case xml.EndElement:
Expand Down Expand Up @@ -1365,25 +1378,24 @@ func (e *Element) addChild(t Token) {
// prefix followed by a colon.
func (e *Element) CreateAttr(key, value string) *Attr {
space, skey := spaceDecompose(key)
return e.createAttr(space, skey, value, e, false)
}

// createAttr is a helper function that creates attributes.
func (e *Element) createAttr(space, key, value string, parent *Element, preserveDups bool) *Attr {
if !preserveDups {
for i, a := range e.Attr {
if space == a.Space && key == a.Key {
e.Attr[i].Value = value
return &e.Attr[i]
}
for i, a := range e.Attr {
if space == a.Space && skey == a.Key {
e.Attr[i].Value = value
return &e.Attr[i]
}
}

return e.addAttr(space, skey, value)
}

// addAttr is a helper function that adds an attribute to an element.
func (e *Element) addAttr(space, key, value string) *Attr {
a := Attr{
Space: space,
Key: key,
Value: value,
element: parent,
element: e,
}
e.Attr = append(e.Attr, a)
return &e.Attr[len(e.Attr)-1]
Expand Down
23 changes: 10 additions & 13 deletions etree_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1467,7 +1467,7 @@ func TestReindexChildren(t *testing.T) {
}

func TestPreserveDuplicateAttrs(t *testing.T) {
s := `<element attr="test" attr="test2"/>`
s := `<element x="value1" y="value2" x="value3" x="value4" y="value5"/>`

checkAttrCount := func(e *Element, n int) {
if len(e.Attr) != n {
Expand All @@ -1490,23 +1490,20 @@ func TestPreserveDuplicateAttrs(t *testing.T) {
t.Run("enabled", func(t *testing.T) {
doc := newDocumentFromString2(t, s, ReadSettings{PreserveDuplicateAttrs: true})
e := doc.FindElement("element")
checkAttrCount(e, 2)
checkAttr(e, 0, "attr", "test")
checkAttr(e, 1, "attr", "test2")
checkAttrCount(e, 5)
checkAttr(e, 0, "x", "value1")
checkAttr(e, 1, "y", "value2")
checkAttr(e, 2, "x", "value3")
checkAttr(e, 3, "x", "value4")
checkAttr(e, 4, "y", "value5")
})

t.Run("disabled", func(t *testing.T) {
doc := newDocumentFromString2(t, s, ReadSettings{PreserveDuplicateAttrs: false})
e := doc.FindElement("element")
checkAttrCount(e, 1)
checkAttr(e, 0, "attr", "test2")
})

t.Run("default", func(t *testing.T) {
doc := newDocumentFromString(t, s)
e := doc.FindElement("element")
checkAttrCount(e, 1)
checkAttr(e, 0, "attr", "test2")
checkAttrCount(e, 2)
checkAttr(e, 0, "x", "value4")
checkAttr(e, 1, "y", "value5")
})
}

Expand Down

0 comments on commit 562ef05

Please sign in to comment.