Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add select methods returning element streams #2092

Merged
merged 11 commits into from
Dec 14, 2024
10 changes: 8 additions & 2 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,14 @@
### Changes

* Updated the minimum Android API Level validation from 10 to **21**. As with previous jsoup versions, Android
developers need to enable core library desugaring. The minimum Java version remains Java
8. [2173](https://github.com/jhy/jsoup/pull/2173)
developers need to enable core library desugaring. The minimum Java version remains Java 8.
[2173](https://github.com/jhy/jsoup/pull/2173)

### Improvements

* Added `Element#selectStream(String query)` and `Element#selectStream(Evaluator )` methods, that return a `Stream` of
matching elements. Elements are evaluated and returned as they are found, and the stream can be
terminated early. [2092](https://github.com/jhy/jsoup/pull/2092)

### Bug Fixes

Expand Down
40 changes: 34 additions & 6 deletions src/main/java/org/jsoup/nodes/Element.java
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,39 @@ public Elements select(Evaluator evaluator) {
return Selector.select(evaluator, this);
}

/**
Selects elements from the given root that match the specified {@link Selector} CSS query, with this element as the
starting context, and returns them as a lazy Stream. Matched elements may include this element, or any of its
children.
<p>
Unlike {@link #select(String query)}, which returns a complete list of all matching elements, this method returns a
{@link Stream} that processes elements lazily as they are needed. The stream operates in a "pull" model — elements
are fetched from the root as the stream is traversed. You can use standard {@code Stream} operations such as
{@code filter}, {@code map}, or {@code findFirst} to process elements on demand.
</p>

@param cssQuery a {@link Selector} CSS-like query
@return a {@link Stream} containing elements that match the query (empty if none match)
@throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
@see Selector selector query syntax
@see QueryParser#parse(String)
@since 1.19.1
*/
public Stream<Element> selectStream(String cssQuery) {
return Selector.selectStream(cssQuery, this);
}

/**
Find a Stream of elements that match the supplied Evaluator.

@param evaluator an element Evaluator
@return a {@link Stream} containing elements that match the query (empty if none match)
@since 1.19.1
*/
public Stream<Element> selectStream(Evaluator evaluator) {
return Selector.selectStream(evaluator, this);
}

/**
* Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context.
* <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query
Expand Down Expand Up @@ -1125,12 +1158,7 @@ public Elements getElementsByTag(String tagName) {
*/
public @Nullable Element getElementById(String id) {
Validate.notEmpty(id);

Elements elements = Collector.collect(new Evaluator.Id(id), this);
if (elements.size() > 0)
return elements.get(0);
else
return null;
return Collector.findFirst(new Evaluator.Id(id), this);
}

/**
Expand Down
28 changes: 17 additions & 11 deletions src/main/java/org/jsoup/select/Collector.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import org.jsoup.nodes.Element;
import org.jspecify.annotations.Nullable;

import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* Collects a list of elements that match the supplied criteria.
Expand All @@ -16,17 +16,26 @@ public class Collector {
private Collector() {}

/**
Build a list of elements, by visiting root and every descendant of root, and testing it against the evaluator.
Build a list of elements, by visiting the root and every descendant of root, and testing it against the Evaluator.
@param eval Evaluator to test elements against
@param root root of tree to descend
@return list of matches; empty if none
*/
public static Elements collect (Evaluator eval, Element root) {
eval.reset();
public static Elements collect(Evaluator eval, Element root) {
return stream(eval, root).collect(Collectors.toCollection(Elements::new));
}

/**
Obtain a Stream of elements by visiting the root and every descendant of root and testing it against the evaluator.

return root.stream()
.filter(eval.asPredicate(root))
.collect(Collectors.toCollection(Elements::new));
@param evaluator Evaluator to test elements against
@param root root of tree to descend
@return A {@link Stream} of matches
@since 1.19.1
*/
public static Stream<Element> stream(Evaluator evaluator, Element root) {
evaluator.reset();
return root.stream().filter(evaluator.asPredicate(root));
}

/**
Expand All @@ -37,9 +46,6 @@ public static Elements collect (Evaluator eval, Element root) {
@return the first match; {@code null} if none
*/
public static @Nullable Element findFirst(Evaluator eval, Element root) {
eval.reset();

Optional<Element> first = root.stream().filter(eval.asPredicate(root)).findFirst();
return first.orElse(null);
return stream(eval, root).findFirst().orElse(null);
}
}
70 changes: 50 additions & 20 deletions src/main/java/org/jsoup/select/Selector.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import java.util.Collection;
import java.util.IdentityHashMap;
import java.util.stream.Stream;

/**
* CSS-like element selector, that finds elements matching a query.
Expand Down Expand Up @@ -90,24 +91,24 @@ public class Selector {
private Selector() {}

/**
* Find elements matching selector.
*
* @param query CSS selector
* @param root root element to descend into
* @return matching elements, empty if none
* @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
Find Elements matching the CSS query.

@param query CSS selector
@param root root element to descend into
@return matching elements, empty if none
@throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
*/
public static Elements select(String query, Element root) {
Validate.notEmpty(query);
return select(QueryParser.parse(query), root);
}

/**
* Find elements matching selector.
*
* @param evaluator CSS selector
* @param root root element to descend into
* @return matching elements, empty if none
Find Elements matching the Evaluator.

@param evaluator CSS Evaluator
@param root root (context) element to start from
@return matching elements, empty if none
*/
public static Elements select(Evaluator evaluator, Element root) {
Validate.notNull(evaluator);
Expand All @@ -116,11 +117,39 @@ public static Elements select(Evaluator evaluator, Element root) {
}

/**
* Find elements matching selector.
*
* @param query CSS selector
* @param roots root elements to descend into
* @return matching elements, empty if none
Finds a Stream of elements matching the CSS query.

@param query CSS selector
@param root root element to descend into
@return a Stream of matching elements, empty if none
@throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
@since 1.19.1
*/
public static Stream<Element> selectStream(String query, Element root) {
Validate.notEmpty(query);
return selectStream(QueryParser.parse(query), root);
}

/**
Finds a Stream of elements matching the evaluator.

@param evaluator CSS selector
@param root root element to descend into
@return matching elements, empty if none
@since 1.19.1
*/
public static Stream<Element> selectStream(Evaluator evaluator, Element root) {
Validate.notNull(evaluator);
Validate.notNull(root);
return Collector.stream(evaluator, root);
}

/**
Find elements matching the query.

@param query CSS selector
@param roots root elements to descend into
@return matching elements, empty if none
*/
public static Elements select(String query, Iterable<Element> roots) {
Validate.notEmpty(query);
Expand Down Expand Up @@ -159,10 +188,11 @@ static Elements filterOut(Collection<Element> elements, Collection<Element> outs
}

/**
* Find the first element that matches the query.
* @param cssQuery CSS selector
* @param root root element to descend into
* @return the matching element, or <b>null</b> if none.
Find the first Element that matches the query.

@param cssQuery CSS selector
@param root root element to descend into
@return the matching element, or <b>null</b> if none.
*/
public static @Nullable Element selectFirst(String cssQuery, Element root) {
Validate.notEmpty(cssQuery);
Expand Down
11 changes: 11 additions & 0 deletions src/test/java/org/jsoup/nodes/ElementTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2981,4 +2981,15 @@ void prettySerializationRoundTrips(Document.OutputSettings settings) {
assertEquals("<p CLASS=\"YES\">One</p>", p.outerHtml());
assertEquals("CLASS=\"YES\"", attr.html());
}

@Test void testSelectStream() {
Document doc = Jsoup.parse("<div>Hello world</div>");
Element div = doc.select("div").stream().findFirst().orElse(null);

assertEquals("Hello world", div.text());

div = doc.selectStream("div").findFirst().orElse(null);

assertEquals("Hello world", div.text());
}
}
15 changes: 15 additions & 0 deletions src/test/java/org/jsoup/select/SelectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import org.junit.jupiter.api.Test;

import java.util.IdentityHashMap;
import java.util.List;
import java.util.Locale;
import java.util.stream.Collectors;

import static org.junit.jupiter.api.Assertions.*;

Expand Down Expand Up @@ -402,6 +404,19 @@ public void testByAttributeStarting(Locale locale) {
assertEquals("span", divChilds.get(2).tagName());
}

@Test public void streamParentChildStar() {
String h = "<div id=1><p>Hello<p><b>there</b></p></div><div id=2><span>Hi</span></div>";
Document doc = Jsoup.parse(h);

List<Element> divChilds = doc.selectStream("div > *")
.collect(Collectors.toList());

assertEquals(3, divChilds.size());
assertEquals("p", divChilds.get(0).tagName());
assertEquals("p", divChilds.get(1).tagName());
assertEquals("span", divChilds.get(2).tagName());
}

@Test public void multiChildDescent() {
String h = "<div id=foo><h1 class=bar><a href=http://example.com/>One</a></h1></div>";
Document doc = Jsoup.parse(h);
Expand Down
Loading