Skip to content

Commit

Permalink
Fix O(n!) tag name processing. Fixes #27. Also simplify constructor.
Browse files Browse the repository at this point in the history
  • Loading branch information
tfmorris committed Apr 3, 2016
1 parent f8450d3 commit a9d101d
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
public class Paragraph
extends LinkedList<Node>
{
private static final long serialVersionUID = 1L;

// private ArrayList<String> textNodes;
int charsCountInLinks = 0;
Expand All @@ -40,32 +41,21 @@ public class Paragraph
private String tagName = "";
private String rawText = "";

public Paragraph(Node firstNode)
public Paragraph(Node firstNode, String path)
{
add(firstNode);
initRawInfo();
}

private void initRawInfo()
{
StringBuilder sb = new StringBuilder();
// This is only called at initialization time, so will never have more than a single node
for (Node n : this) {
// NodeHelper.cleanEmptyElements(n);
if (n instanceof TextNode) {
this.setTagName(getPath(n));
String nodeRawText = ((TextNode) n).text();
sb.append(Utils.normalizeBreaks(nodeRawText).trim());
this.tagName = path;
if (firstNode instanceof TextNode) {
String nodeRawText = ((TextNode) firstNode).text();
this.rawText = Utils.normalizeBreaks(nodeRawText).trim();

if (NodeHelper.isLink(n)) {
charsCountInLinks += nodeRawText.length();
}
if (NodeHelper.isLink(firstNode)) {
charsCountInLinks += nodeRawText.length();
}
}

rawText = sb.toString();
}


public int getLinksLength()
{
return this.charsCountInLinks;
Expand Down Expand Up @@ -96,30 +86,6 @@ public String getTagName()
return this.tagName;
}

public String getPath(Node n)
{
String nodePath = "";
// FIXME: This is pathologically recomputing things that we know because we're doing a depth-first traverse
while (n != null) {
if (n instanceof TextNode) {
n = n.parent();
}
if (NodeHelper.isInnerText(n)) {
n = n.parent();
}
String parentNodeName = n.nodeName();
nodePath = parentNodeName + "." + nodePath;

if (!parentNodeName.equalsIgnoreCase("html")) {
n = n.parent();
}
else {
break;
}
}

return nodePath;
}

public void setTagName(String name)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public class ParagraphsExplorer

private final LinkedList<Paragraph> paragraphs;
private final LinkedList<Node> nodes;
private final LinkedList<String> tags;

public enum AncestorState
{
Expand All @@ -51,12 +52,14 @@ public enum AncestorState
public ParagraphsExplorer()
{
this.paragraphs = new LinkedList<>();
nodes = new LinkedList<>();
this.nodes = new LinkedList<>();
this.tags = new LinkedList<String>();
}

@Override
public void head(Node node, int depth)
{
tags.add(node.nodeName());
if (node.childNodeSize() == 0) {
if (node instanceof TextNode && StringUtil.isBlank(node.outerHtml())) {
return;
Expand All @@ -69,7 +72,7 @@ public void head(Node node, int depth)
@Override
public void tail(Node node, int depth)
{
//do nothing
tags.removeLast();
}

/**
Expand Down Expand Up @@ -147,7 +150,7 @@ public static AncestorState getAncestorState(Node lastNode, Node currentNode)

private void insertAsNewParagraph(Node node)
{
Paragraph p = new Paragraph(node);
Paragraph p = new Paragraph(node, String.join(".", tags));
// if (!p.getRawText().isEmpty()) {
paragraphs.add(p);
// }
Expand Down

0 comments on commit a9d101d

Please sign in to comment.