Skip to content

Commit 001b4dd

Browse files
committed
Consume escaped subqueries correctly
Now that escaped sequences can have spaces, that needs to be handled in consumeSubQuery correctly. The unescape -> escape step is a bit redundant. If perf critical we could have a method that consumes escape sequences without actually unescaping them. Or, we could refactor consumeSubQuery to parse into Evaluators directly. But the rest is already pretty stringy, and users can keep parsed Evaluators for reuse, so am deeming OK for now. Fixes issue noted in #2305
1 parent 44460bc commit 001b4dd

File tree

2 files changed

+26
-2
lines changed

2 files changed

+26
-2
lines changed

src/main/java/org/jsoup/select/QueryParser.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,7 @@ private String consumeSubQuery() {
160160
else if (tq.matches("["))
161161
sq.append("[").append(tq.chompBalanced('[', ']')).append("]");
162162
else if (tq.matches("\\")) { // bounce over escapes
163-
sq.append(tq.consume());
164-
if (!tq.isEmpty()) sq.append(tq.consume());
163+
sq.append(TokenQueue.escapeCssIdentifier(tq.consumeCssIdentifier()));
165164
} else
166165
sq.append(tq.consume());
167166
}

src/test/java/org/jsoup/select/QueryParserTest.java

+25
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import org.jsoup.Jsoup;
44
import org.jsoup.nodes.Document;
5+
import org.jsoup.nodes.Element;
56
import org.junit.jupiter.api.Test;
67

78
import static org.jsoup.select.EvaluatorDebug.asElement;
@@ -182,4 +183,28 @@ public void exceptOnUnhandledEvaluator() {
182183
assertEquals("(Or (And (Id '#el')(AttributeWithValueContaining '[class*=child]'))(Class '.some-other-selector'))", sexpr("#el[class*=child], .some-other-selector"));
183184
assertEquals("(Or (And (AttributeWithValueContaining '[class*=child]')(Ancestor (Id '#parent')))(And (Class '.nested')(Ancestor (Class '.some-other-selector'))))", sexpr("#parent [class*=child], .some-other-selector .nested"));
184185
}
186+
187+
@Test void parsesEscapedSubqueries() {
188+
String html = "<div class='-4a'>One</div> <div id='-4a'>Two</div>";
189+
Document doc = Jsoup.parse(html);
190+
191+
String classQ = "div.-\\34 a";
192+
Element div1 = doc.expectFirst(classQ);
193+
assertEquals("One", div1.wholeText());
194+
195+
String idQ = "#-\\34 a";
196+
Element div2 = doc.expectFirst(idQ);
197+
assertEquals("Two", div2.wholeText());
198+
199+
String genClassQ = "html > body > div.-\\34 a";
200+
assertEquals(genClassQ, div1.cssSelector());
201+
assertSame(div1, doc.expectFirst(genClassQ));
202+
203+
String deepIdQ = "html > body > #-\\34 a";
204+
assertEquals(idQ, div2.cssSelector());
205+
assertSame(div2, doc.expectFirst(deepIdQ));
206+
207+
assertEquals("(ImmediateParentRun (Tag 'html')(Tag 'body')(And (Tag 'div')(Class '.-4a')))", sexpr(genClassQ));
208+
assertEquals("(ImmediateParentRun (Tag 'html')(Tag 'body')(Id '#-4a'))", sexpr(deepIdQ));
209+
}
185210
}

0 commit comments

Comments
 (0)