From 15ecfd9cc2007f4770839f450db5526882d2078b Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Wed, 6 May 2026 01:19:22 +0900 Subject: [PATCH 1/2] Optimization of REXML::XPathParser#step ## Benchmark ``` $ benchmark-driver benchmark/xpath.yaml before after before(YJIT) after(YJIT) REXML::XPath.match(REXML::Document.new(xml), 'a//a') 4.170k 4.300k 4.091k 4.087k i/s - 100.000 times in 0.023979s 0.023257s 0.024444s 0.024470s REXML::XPath.match(REXML::Document.new(xml), '//a//a') 196.250 1.378k 293.984 1.701k i/s - 100.000 times in 0.509554s 0.072574s 0.340154s 0.058796s Comparison: REXML::XPath.match(REXML::Document.new(xml), 'a//a') after: 4299.8 i/s before: 4170.3 i/s - 1.03x slower before(YJIT): 4091.0 i/s - 1.05x slower after(YJIT): 4086.6 i/s - 1.05x slower REXML::XPath.match(REXML::Document.new(xml), '//a//a') after(YJIT): 1700.8 i/s after: 1377.9 i/s - 1.23x slower before(YJIT): 294.0 i/s - 5.79x slower before: 196.3 i/s - 8.67x slower ``` - YJIT=ON : 0.99x - 5.79x faster - YJIT=OFF : 1.03x - 7.01x faster --- benchmark/xpath.yaml | 3 ++- lib/rexml/xpath_parser.rb | 11 +++++------ test/xpath/test_base.rb | 30 ++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/benchmark/xpath.yaml b/benchmark/xpath.yaml index d6e970eb..072e3160 100644 --- a/benchmark/xpath.yaml +++ b/benchmark/xpath.yaml @@ -24,9 +24,10 @@ contexts: prelude: | require 'rexml/document' - DEPTH = 100 + DEPTH = 30 xml = '' * DEPTH + '' * DEPTH doc = REXML::Document.new(xml) benchmark: "REXML::XPath.match(REXML::Document.new(xml), 'a//a')" : REXML::XPath.match(doc, "a//a") + "REXML::XPath.match(REXML::Document.new(xml), '//a//a')" : REXML::XPath.match(doc, "//a//a") diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index 64c8846a..c5d420ce 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -462,21 +462,20 @@ def step(path_stack, any_type: :element, order: :forward) if nodesets.size == 1 ordered_nodeset = nodesets[0] else + seen = {}.compare_by_identity raw_nodes = [] nodesets.each do |nodeset| nodeset.each do |node| - if node.respond_to?(:raw_node) - raw_nodes << node.raw_node - else - raw_nodes << node - end + raw_node = node.respond_to?(:raw_node) ? node.raw_node : node + next if seen.key?(raw_node) + seen[raw_node] = true + raw_nodes << raw_node end end ordered_nodeset = sort(raw_nodes, order) end new_nodeset = [] ordered_nodeset.each do |node| - # TODO: Remove duplicated new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) end new_nodeset diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb index 764171ab..5670389f 100644 --- a/test/xpath/test_base.rb +++ b/test/xpath/test_base.rb @@ -1276,5 +1276,35 @@ def test_match_with_deprecated_usage ensure $VERBOSE = verbose end + + def test_descendant_no_duplicates + doc = <<-EOT + + + + + + + EOT + + xmldoc = Document.new(doc) + result = XPath.match(xmldoc, "//a//a") + assert_equal(["1", "2", "3"], result.map { |e| e.attributes["id"] }) + end + + def test_descendant_document_order + doc = <<-EOT + + + + + + + + EOT + xmldoc = Document.new(doc) + result = XPath.match(xmldoc, "//a//a") + assert_equal(["1", "2", "3"], result.map { |e| e.attributes["id"] }) + end end end From 6456800ef511354dcd591c8bfe8e20bdd0f86bd2 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Wed, 25 Mar 2026 08:25:47 +0900 Subject: [PATCH 2/2] Add test --- test/xpath/test_base.rb | 74 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb index 5670389f..1015014f 100644 --- a/test/xpath/test_base.rb +++ b/test/xpath/test_base.rb @@ -1306,5 +1306,79 @@ def test_descendant_document_order result = XPath.match(xmldoc, "//a//a") assert_equal(["1", "2", "3"], result.map { |e| e.attributes["id"] }) end + + def test_descendant_ancestor_descendant_overlap + doc = <<-EOT + + + + + + + + EOT + + xmldoc = Document.new(doc) + result = XPath.match(xmldoc, "//a//b") + assert_equal(["b1"], result.map { |e| e.attributes["id"] }) + end + + def test_descendant_sibling_branches + doc = <<-EOT + + + + + + + + + EOT + xmldoc = Document.new(doc) + result = XPath.match(xmldoc, "//a//b") + assert_equal(["b1", "b2"], result.map { |e| e.attributes["id"] }) + end + + def test_descendant_document_order_with_shared_subtree + doc = <<-EOT + + + + + + + + + EOT + + xmldoc = Document.new(doc) + result = XPath.match(xmldoc, "//a//b") + assert_equal(["b1", "b2"], result.map { |e| e.attributes["id"] }) + end + + def test_descendant_axis_on_leaf_element + doc = Document.new("") + result = XPath.match(doc, "//b/descendant::node()") + assert_equal([], result) + end + + def test_descendant_axis_position_predicate_per_context_node + doc = <<-EOT + + + + + + + + + + + EOT + + xmldoc = Document.new(doc) + result = XPath.match(xmldoc, "//a/descendant::b[1]") + assert_equal(["b1", "b2"], result.map { |e| e.attributes["id"] }) + end end end