Skip to content

Commit 16723d2

Browse files
committed
Store cached child els in Attributes, not field
We keep a lazy initialized, cached copy of an Element's child Element list, to save recreating it every time they are requested. That was stored in a nullable field. This change moves it into the userdata section of the attributes instead. Also, rather than tracking changes to the backing node list via a ChangeNotifyingArrayList, I'm using the ArrayList's modcount instead directly. So we can deprecate the ChangeNotifyingArrayList. Ultimately this trims the shallow size of an Element from 40 to 32 bytes, and the NodeList from 32 to 24 (20% and 25% decreases respectively)
1 parent 47491d8 commit 16723d2

File tree

3 files changed

+44
-43
lines changed

3 files changed

+44
-43
lines changed

src/main/java/org/jsoup/helper/ChangeNotifyingArrayList.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55

66
/**
77
* Implementation of ArrayList that watches out for changes to the contents.
8+
@deprecated this class is no longer used and will be removed in jsoup 1.21.1.
89
*/
9-
public abstract class ChangeNotifyingArrayList<E> extends ArrayList<E> {
10+
@Deprecated public abstract class ChangeNotifyingArrayList<E> extends ArrayList<E> {
1011
public ChangeNotifyingArrayList(int initialCapacity) {
1112
super(initialCapacity);
1213
}

src/main/java/org/jsoup/nodes/Element.java

+42-38
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.jsoup.nodes;
22

3-
import org.jsoup.helper.ChangeNotifyingArrayList;
43
import org.jsoup.helper.Validate;
54
import org.jsoup.internal.Normalizer;
65
import org.jsoup.internal.StringUtil;
@@ -37,7 +36,6 @@
3736
import java.util.stream.Stream;
3837

3938
import static org.jsoup.internal.Normalizer.normalize;
40-
import static org.jsoup.nodes.Document.OutputSettings.Syntax.html;
4139
import static org.jsoup.nodes.Document.OutputSettings.Syntax.xml;
4240
import static org.jsoup.nodes.TextNode.lastCharIsWhitespace;
4341
import static org.jsoup.parser.Parser.NamespaceHtml;
@@ -50,11 +48,11 @@ An HTML Element consists of a tag name, attributes, and child nodes (including t
5048
*/
5149
public class Element extends Node implements Iterable<Element> {
5250
private static final List<Element> EmptyChildren = Collections.emptyList();
51+
private static final NodeList EmptyNodeList = new NodeList(0);
5352
private static final Pattern ClassSplit = Pattern.compile("\\s+");
5453
private static final String BaseUriKey = Attributes.internalKey("baseUri");
5554
Tag tag;
56-
private @Nullable WeakReference<List<Element>> shadowChildrenRef; // points to child elements shadowed from node children
57-
List<Node> childNodes;
55+
NodeList childNodes;
5856
@Nullable Attributes attributes; // field is nullable but all methods for attributes are non-null
5957

6058
/**
@@ -86,7 +84,7 @@ public Element(String tag) {
8684
*/
8785
public Element(Tag tag, @Nullable String baseUri, @Nullable Attributes attributes) {
8886
Validate.notNull(tag);
89-
childNodes = EmptyNodes;
87+
childNodes = EmptyNodeList;
9088
this.attributes = attributes;
9189
this.tag = tag;
9290
if (baseUri != null)
@@ -108,12 +106,12 @@ public Element(Tag tag, @Nullable String baseUri) {
108106
Internal test to check if a nodelist object has been created.
109107
*/
110108
protected boolean hasChildNodes() {
111-
return childNodes != EmptyNodes;
109+
return childNodes != EmptyNodeList;
112110
}
113111

114112
@Override protected List<Node> ensureChildNodes() {
115-
if (childNodes == EmptyNodes) {
116-
childNodes = new NodeList(this, 4);
113+
if (childNodes == EmptyNodeList) {
114+
childNodes = new NodeList(4);
117115
}
118116
return childNodes;
119117
}
@@ -393,31 +391,40 @@ public Elements children() {
393391
* @return a list of child elements
394392
*/
395393
List<Element> childElementsList() {
396-
if (childNodeSize() == 0)
397-
return EmptyChildren; // short circuit creating empty
398-
399-
List<Element> children;
400-
if (shadowChildrenRef == null || (children = shadowChildrenRef.get()) == null) {
401-
final int size = childNodes.size();
402-
children = new ArrayList<>(size);
403-
//noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here)
404-
for (int i = 0; i < size; i++) {
405-
final Node node = childNodes.get(i);
406-
if (node instanceof Element)
407-
children.add((Element) node);
408-
}
409-
shadowChildrenRef = new WeakReference<>(children);
394+
if (childNodeSize() == 0) return EmptyChildren; // short circuit creating empty
395+
List<Element> children = cachedChildren();
396+
if (children == null) {
397+
children = filterNodes(Element.class);
398+
stashChildren(children);
410399
}
411400
return children;
412401
}
413402

414-
/**
415-
* Clears the cached shadow child elements.
416-
*/
417-
@Override
418-
void nodelistChanged() {
419-
super.nodelistChanged();
420-
shadowChildrenRef = null;
403+
private static final String childElsKey = "jsoup.childEls";
404+
private static final String childElsMod = "jsoup.childElsMod";
405+
406+
/** returns the cached child els, if they exist, and the modcount of our childnodes matches the stashed modcount */
407+
private @Nullable List<Element> cachedChildren() {
408+
Map<String, Object> userData = attributes().userData();
409+
//noinspection unchecked
410+
WeakReference<List<Element>> ref = (WeakReference<List<Element>>) userData.get(childElsKey);
411+
if (ref != null) {
412+
List<Element> els = ref.get();
413+
if (els != null) {
414+
Integer modCount = (Integer) userData.get(childElsMod);
415+
if (modCount != null && modCount == childNodes.modCount())
416+
return els;
417+
}
418+
}
419+
return null;
420+
}
421+
422+
/** caches the child els into the Attribute user data. */
423+
private void stashChildren(List<Element> els) {
424+
Map<String, Object> userData = attributes().userData();
425+
WeakReference<List<Element>> ref = new WeakReference<>(els);
426+
userData.put(childElsKey, ref);
427+
userData.put(childElsMod, childNodes.modCount());
421428
}
422429

423430
/**
@@ -1898,7 +1905,7 @@ public Element shallowClone() {
18981905
protected Element doClone(@Nullable Node parent) {
18991906
Element clone = (Element) super.doClone(parent);
19001907
clone.attributes = attributes != null ? attributes.clone() : null;
1901-
clone.childNodes = new NodeList(clone, childNodes.size());
1908+
clone.childNodes = new NodeList(childNodes.size());
19021909
clone.childNodes.addAll(childNodes); // the children then get iterated and cloned in Node.clone
19031910

19041911
return clone;
@@ -1961,16 +1968,13 @@ public Element filter(NodeFilter nodeFilter) {
19611968
return (Element) super.filter(nodeFilter);
19621969
}
19631970

1964-
private static final class NodeList extends ChangeNotifyingArrayList<Node> {
1965-
private final Element owner;
1966-
1967-
NodeList(Element owner, int initialCapacity) {
1968-
super(initialCapacity);
1969-
this.owner = owner;
1971+
static final class NodeList extends ArrayList<Node> {
1972+
public NodeList(int size) {
1973+
super(size);
19701974
}
19711975

1972-
@Override public void onContentsChanged() {
1973-
owner.nodelistChanged();
1976+
int modCount() {
1977+
return this.modCount;
19741978
}
19751979
}
19761980
}

src/main/java/org/jsoup/nodes/Node.java

-4
Original file line numberDiff line numberDiff line change
@@ -501,10 +501,6 @@ private static Element getDeepChild(Element el) {
501501
return el;
502502
}
503503

504-
void nodelistChanged() {
505-
// Element overrides this to clear its shadow children elements
506-
}
507-
508504
/**
509505
* Replace this node in the DOM with the supplied node.
510506
* @param in the node that will replace the existing node.

0 commit comments

Comments
 (0)