From 81e8c62b5f5a6a45dce850f872a2cef196a7f1ff Mon Sep 17 00:00:00 2001 From: Eric Saxby <77073+sax@users.noreply.github.com> Date: Tue, 28 Apr 2026 11:09:12 -0700 Subject: [PATCH 1/2] Replace Stream.flat_map + Stream.transform with a single Stream.resource --- lib/xml_stream.ex | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/lib/xml_stream.ex b/lib/xml_stream.ex index 6eee993..3e1ba0d 100644 --- a/lib/xml_stream.ex +++ b/lib/xml_stream.ex @@ -101,14 +101,45 @@ defmodule XmlStream do options = Keyword.merge(@default_options, options) printer = options[:printer] - flatten(nodes) - |> Stream.transform(printer.init(options), &printer.print/2) + Stream.resource( + fn -> {[{:enum, nodes}], printer.init(options)} end, + fn {stack, state} -> + case step(stack, state, printer) do + :done -> {:halt, {stack, state}} + {output, new_stack, new_state} -> {[output], {new_stack, new_state}} + end + end, + fn {stack, _state} -> + Enum.each(stack, fn + {:cont, cont} -> cont.({:halt, nil}) + {:enum, _} -> :ok + end) + end + ) + end + + defp step([], _state, _printer), do: :done + + defp step([{:enum, enum} | rest], state, printer) do + case Enumerable.reduce(enum, {:cont, nil}, fn item, _ -> {:suspend, item} end) do + {:suspended, item, cont} -> handle(item, [{:cont, cont} | rest], state, printer) + _ -> step(rest, state, printer) + end + end + + defp step([{:cont, cont} | rest], state, printer) do + case cont.({:cont, nil}) do + {:suspended, item, cont2} -> handle(item, [{:cont, cont2} | rest], state, printer) + _ -> step(rest, state, printer) + end + end + + defp handle(item, stack, state, printer) when is_tuple(item) do + {output, new_state} = printer.print(item, state) + {output, stack, new_state} end - defp flatten(node) do - Stream.flat_map(node, fn - operation when is_tuple(operation) -> [operation] - operation -> flatten(operation) - end) + defp handle(item, stack, state, printer) do + step([{:enum, item} | stack], state, printer) end end From 99963b812d1e38d6cbe6caf12140f468df0d678a Mon Sep 17 00:00:00 2001 From: Eric Saxby <77073+sax@users.noreply.github.com> Date: Tue, 28 Apr 2026 18:02:57 -0700 Subject: [PATCH 2/2] Speed up XmlStream.Printer Rewrite escaping, encoding, and name validation to skip work on the common case: ASCII fast paths, zero-allocation pass-through for already-safe input, writable-binary accumulators in place of per-codepoint iolist construction, and direct dispatch in place of String.Chars protocol. --- lib/xml_stream/printer.ex | 192 +++++++++++++++++++++++++++++++++----- 1 file changed, 168 insertions(+), 24 deletions(-) diff --git a/lib/xml_stream/printer.ex b/lib/xml_stream/printer.ex index 5ae40cf..3cede66 100644 --- a/lib/xml_stream/printer.ex +++ b/lib/xml_stream/printer.ex @@ -5,25 +5,157 @@ defmodule XmlStream.Printer do @callback init(term) :: term @doc false + def attrs_to_string([]), do: [] + def attrs_to_string(attrs) do - Enum.map(attrs, fn {key, value} -> - [" ", encode_name(key), ~s(="), escape_binary(to_string(value)), ~s(")] + Enum.reduce(attrs, <<>>, fn {key, value}, acc -> + acc <> " " <> encode_name(key) <> ~s(=") <> escape_binary(to_string(value)) <> ~s(") end) end @doc false - def escape_binary(""), do: [] - def escape_binary("&" <> rest), do: ["&" | escape_binary(rest)] - def escape_binary("\"" <> rest), do: [""" | escape_binary(rest)] - def escape_binary("'" <> rest), do: ["'" | escape_binary(rest)] - def escape_binary("<" <> rest), do: ["<" | escape_binary(rest)] - def escape_binary(">" <> rest), do: [">" | escape_binary(rest)] - def escape_binary(<> <> rest), do: [<> | escape_binary(rest)] + def escape_binary(binary) when is_binary(binary), do: escape_binary(binary, binary) + + defp escape_binary(<<>>, original), do: original + + defp escape_binary(<>, original) do + pos = byte_size(original) - byte_size(rest) - 1 + escape_binary(rest, original, pos + 1, 0, binary_part(original, 0, pos) <> "&") + end + + defp escape_binary(<>, original) do + pos = byte_size(original) - byte_size(rest) - 1 + escape_binary(rest, original, pos + 1, 0, binary_part(original, 0, pos) <> """) + end + + defp escape_binary(<>, original) do + pos = byte_size(original) - byte_size(rest) - 1 + escape_binary(rest, original, pos + 1, 0, binary_part(original, 0, pos) <> "'") + end + + defp escape_binary(<>, original) do + pos = byte_size(original) - byte_size(rest) - 1 + escape_binary(rest, original, pos + 1, 0, binary_part(original, 0, pos) <> "<") + end + + defp escape_binary(<, rest::bits>>, original) do + pos = byte_size(original) - byte_size(rest) - 1 + escape_binary(rest, original, pos + 1, 0, binary_part(original, 0, pos) <> ">") + end + + defp escape_binary(<>, original) when char < 0x80, + do: escape_binary(rest, original) + + defp escape_binary(<<_::utf8, rest::bits>>, original), do: escape_binary(rest, original) + + defp escape_binary(<<>>, _original, _skip, 0, acc), do: acc + defp escape_binary(<<>>, original, skip, len, acc), do: acc <> binary_part(original, skip, len) + + defp escape_binary(<>, original, skip, 0, acc), + do: escape_binary(rest, original, skip + 1, 0, acc <> "&") + + defp escape_binary(<>, original, skip, len, acc), + do: + escape_binary( + rest, + original, + skip + len + 1, + 0, + acc <> binary_part(original, skip, len) <> "&" + ) + + defp escape_binary(<>, original, skip, 0, acc), + do: escape_binary(rest, original, skip + 1, 0, acc <> """) + + defp escape_binary(<>, original, skip, len, acc), + do: + escape_binary( + rest, + original, + skip + len + 1, + 0, + acc <> binary_part(original, skip, len) <> """ + ) + + defp escape_binary(<>, original, skip, 0, acc), + do: escape_binary(rest, original, skip + 1, 0, acc <> "'") + + defp escape_binary(<>, original, skip, len, acc), + do: + escape_binary( + rest, + original, + skip + len + 1, + 0, + acc <> binary_part(original, skip, len) <> "'" + ) + + defp escape_binary(<>, original, skip, 0, acc), + do: escape_binary(rest, original, skip + 1, 0, acc <> "<") + + defp escape_binary(<>, original, skip, len, acc), + do: + escape_binary( + rest, + original, + skip + len + 1, + 0, + acc <> binary_part(original, skip, len) <> "<" + ) + + defp escape_binary(<, rest::bits>>, original, skip, 0, acc), + do: escape_binary(rest, original, skip + 1, 0, acc <> ">") + + defp escape_binary(<, rest::bits>>, original, skip, len, acc), + do: + escape_binary( + rest, + original, + skip + len + 1, + 0, + acc <> binary_part(original, skip, len) <> ">" + ) + + defp escape_binary(<>, original, skip, len, acc) when char < 0x80, + do: escape_binary(rest, original, skip, len + 1, acc) + + defp escape_binary(<>, original, skip, len, acc) when cp < 0x800, + do: escape_binary(rest, original, skip, len + 2, acc) + + defp escape_binary(<>, original, skip, len, acc) when cp < 0x10000, + do: escape_binary(rest, original, skip, len + 3, acc) + + defp escape_binary(<<_::utf8, rest::bits>>, original, skip, len, acc), + do: escape_binary(rest, original, skip, len + 4, acc) @doc false - def escape_cdata(""), do: [] - def escape_cdata("]]>" <> rest), do: ["]]]]>" | escape_cdata(rest)] - def escape_cdata(<> <> rest), do: [<> | escape_cdata(rest)] + def escape_cdata(binary) when is_binary(binary) do + escape_cdata(binary, binary, 0, 0) + end + + defp escape_cdata(<<>>, original, 0, _len), do: original + defp escape_cdata(<<>>, original, skip, len), do: [binary_part(original, skip, len)] + + defp escape_cdata(<<"]]>", rest::bits>>, original, skip, 0), + do: ["]]]]>" | escape_cdata(rest, original, skip + 3, 0)] + + defp escape_cdata(<<"]]>", rest::bits>>, original, skip, len), + do: [ + binary_part(original, skip, len), + "]]]]>" | escape_cdata(rest, original, skip + len + 3, 0) + ] + + defp escape_cdata(<>, original, skip, len) when char < 0x80, + do: escape_cdata(rest, original, skip, len + 1) + + defp escape_cdata(<>, original, skip, len) when cp < 0x800, + do: escape_cdata(rest, original, skip, len + 2) + + defp escape_cdata(<>, original, skip, len) when cp < 0x10000, + do: escape_cdata(rest, original, skip, len + 3) + + defp escape_cdata(<<_::utf8, rest::bits>>, original, skip, len), + do: escape_cdata(rest, original, skip, len + 4) @doc false def encode_comment(text) do @@ -40,23 +172,32 @@ defmodule XmlStream.Printer do defp validate_comment!(<<_char::utf8>> <> rest), do: validate_comment!(rest) @doc false - def encode_name(name) do - name = to_string(name) + def encode_name(name) when is_binary(name) do validate_name!(name) name end + def encode_name(name) when is_atom(name) do + encode_name(Atom.to_string(name)) + end + + def encode_name(name), do: encode_name(to_string(name)) + @doc false - def pi_target_name(name) do - if String.downcase(name) == "xml" do - raise EncodeError, message: "'xml' is a reserved name" - else - encode_name(name) - end + def pi_target_name(<>) + when (x === ?x or x === ?X) and (m === ?m or m === ?M) and (l === ?l or l === ?L) do + raise EncodeError, message: "'xml' is a reserved name" end + def pi_target_name(name), do: encode_name(name) + defp validate_name!(""), do: raise(EncodeError, message: "Invalid tag name") + defp validate_name!(<>) + when char in ?A..?Z or char in ?a..?z or char === ?_ or char === ?: do + validate_name_rest!(rest) + end + defp validate_name!(<> <> rest) do validate_name_start!(char) validate_name_rest!(rest) @@ -88,11 +229,14 @@ defmodule XmlStream.Printer do defp validate_name_rest!(""), do: :ok + defp validate_name_rest!(<>) + when char in ?A..?Z or char in ?a..?z or char in ?0..?9 or + char === ?_ or char === ?: or char === ?- or char === ?. do + validate_name_rest!(rest) + end + defp validate_name_rest!(<> <> rest) - when char in [?:, ?_, ?-, ?., 0xB7] or - char in ?0..?9 or - char in ?A..?Z or - char in ?a..?z or + when char === 0xB7 or char in 0xC0..0xD6 or char in 0xD8..0xF6 or char in 0xF8..0x37D or