Skip to content
Open
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.Data;
using System.Data.SqlTypes;
using System.Text;
using System.Xml;
using Microsoft.Data.SqlClient;
Comment thread
AndriySvyryd marked this conversation as resolved.
using Microsoft.EntityFrameworkCore.Storage.Json;

Expand All @@ -19,6 +21,9 @@ public class SqlServerStringTypeMapping : StringTypeMapping
private const int UnicodeMax = 4000;
private const int AnsiMax = 8000;

private const string Utf8XmlDeclaration = "<?xml version=\"1.0\" encoding=\"utf-8\"?>";

@cincuranet cincuranet Jun 22, 2026

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think matching on this is not even good enough. The grammar is:

prolog	   ::=   	XMLDecl? Misc* (doctypedecl Misc*)?
XMLDecl	   ::=   	'<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
VersionInfo	   ::=   	S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
Eq	   ::=   	S? '=' S?
VersionNum	   ::=   	'1.' [0-9]+
Misc	   ::=   	Comment | PI | S 

So even without going into esoteric combinations, ...

  • S isn't just space - it's space, tab, CR, or LF.
  • Single or double quotes, independently per attribute: version='1.0' encoding="utf-8" is fine.
  • "UTF-8", "utf-8", "Utf-8" is all valid.
  • standalone may be present.

Without going into full System.Xml mode, maybe decent regex would cover majority of cases?

private const string Utf16XmlDeclaration = "<?xml version=\"1.0\" encoding=\"utf-16\"?>";

private static readonly CaseInsensitiveValueComparer CaseInsensitiveValueComparer = new();

private readonly bool _isUtf16;
Expand Down Expand Up @@ -110,8 +115,9 @@ protected SqlServerStringTypeMapping(RelationalTypeMappingParameters parameters,
_maxSize = AnsiMax;
}

_isUtf16 = parameters.Unicode && parameters.StoreType.StartsWith("n", StringComparison.OrdinalIgnoreCase);
_sqlDbType = sqlDbType;
_isUtf16 = _sqlDbType == SqlDbType.Xml
|| (parameters.Unicode && parameters.StoreType.StartsWith("n", StringComparison.OrdinalIgnoreCase));
}

/// <summary>
Expand Down Expand Up @@ -147,6 +153,14 @@ protected override RelationalTypeMapping Clone(RelationalTypeMappingParameters p
protected override void ConfigureParameter(DbParameter parameter)
{
var value = parameter.Value;

if (_sqlDbType == SqlDbType.Xml
&& value is string stringValue
&& stringValue.StartsWith(Utf8XmlDeclaration, StringComparison.Ordinal))

@Charlieface Charlieface Jun 20, 2026

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a valid way to check a prolog. It perhaps covers a lot of cases, but the version number can now be 1.1 as well as 1.0, and there are many encodings that are not UTF-8 or UTF-16, and the standalone parameter is also optional. And whitespace is optional as well. See https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-prolog-dtd for more info.

If you really want to go down the road of validating the declaration then you will definitely need an XmlReader, but maybe read it manually and dump it as soon as the declaration is confirmed correct. Might be difficult/impossible to rewind it in the case when the encoding needs to change, probably will need a new XmlReader in that case.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, sounds like just passing the XmlReader in SqlXml would be the more robust solution.
And perhaps you can open an issue for SqlClient to stream it instead of converting to a string.

{
value = parameter.Value = string.Concat(Utf16XmlDeclaration, stringValue.AsSpan(Utf8XmlDeclaration.Length));
}
Comment thread
AndriySvyryd marked this conversation as resolved.

var length = (value as string)?.Length;

if (_sqlDbType.HasValue
Expand Down Expand Up @@ -211,7 +225,17 @@ protected override string GenerateNonNullSqlLiteral(object value)
var concatCount = 1;
var concatStartList = new List<int>();
var insideConcat = false;
for (i = 0; i < stringValue.Length; i++)

if (_sqlDbType == SqlDbType.Xml
&& stringValue.StartsWith(Utf8XmlDeclaration, StringComparison.Ordinal))
{
// The value is sent to the server as 'xml', so a UTF-8 prolog is rewritten to UTF-16.
builder.Append('N').Append('\'').Append(Utf16XmlDeclaration);
openApostrophe = true;
start = Utf8XmlDeclaration.Length;
}

for (i = start; i < stringValue.Length; i++)
{
var lineFeed = stringValue[i] == '\n';
var carriageReturn = stringValue[i] == '\r';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ private static readonly GuidTypeMapping Uniqueidentifier
= new("uniqueidentifier");

private static readonly SqlServerStringTypeMapping Xml
= new("xml", unicode: true, storeTypePostfix: StoreTypePostfix.None);
= new("xml", unicode: true, sqlDbType: SqlDbType.Xml, storeTypePostfix: StoreTypePostfix.None);

private static readonly Dictionary<Type, RelationalTypeMapping> _clrTypeMappings;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3816,6 +3816,86 @@ FROM INFORMATION_SCHEMA.COLUMNS
return actual;
}

// The grinning-face emoji is outside the BMP (a UTF-16 surrogate pair, four UTF-8 bytes) and the euro sign
// is a single UTF-16 code unit but three UTF-8 bytes; both are represented differently in UTF-16 than in
// UTF-8 and are lost when an xml value is sent to the server as a non-Unicode string, which makes them good
// probes for the SqlXml/SqlDbType.Xml parameter path.
private const string XmlEmoji = "\U0001F600";
private const string XmlEuro = "\u20AC";

[Theory]
[InlineData("<root>" + XmlEmoji + XmlEuro + "</root>", "<root>" + XmlEmoji + XmlEuro + "</root>")]
// An explicit non-UTF-16 prolog is accepted because the value is sent as 'xml', not 'nvarchar(max)'.
[InlineData("<?xml version=\"1.0\" encoding=\"utf-8\"?><root>" + XmlEmoji + "</root>", "<root>" + XmlEmoji + "</root>")]
[InlineData("<?xml version=\"1.0\" encoding=\"utf-16\"?><root>" + XmlEuro + "</root>", "<root>" + XmlEuro + "</root>")]
// Content forms that the 'xml' store type accepts beyond a single well-formed document.
[InlineData("", "")]
[InlineData("text fragment", "text fragment")]
[InlineData("<a/><b/>", "<a /><b />")]
public async Task Xml_value_round_trips(string value, string expected)
{
await using var context = CreateContext();

var document = new XmlTestDocument { Content = value };
context.Add(document);
await context.SaveChangesAsync();

var id = document.Id;
context.ChangeTracker.Clear();

// xml columns cannot be compared directly in a WHERE clause, so the row is fetched by its key. Coalescing
// the column with the original value sends that value as an 'xml' parameter, exercising the SqlXml
// parameter path in a query in addition to the insert above.
var query = context.Set<XmlTestDocument>()
.Where(d => d.Id == id)
.Select(d => d.Content ?? value);

// A UTF-8 prolog is rewritten to UTF-16 because the value is sent as an 'xml' parameter.
var expectedParameterValue = value.StartsWith("<?xml version=\"1.0\" encoding=\"utf-8\"?>", StringComparison.Ordinal)
? "<?xml version=\"1.0\" encoding=\"utf-16\"?>" + value["<?xml version=\"1.0\" encoding=\"utf-8\"?>".Length..]
: value;

Assert.Equal(
$"""
DECLARE @value xml = N'{expectedParameterValue}';
DECLARE @id int = {id};

SELECT COALESCE([x].[Content], @value)
FROM [XmlTestDocument] AS [x]
WHERE [x].[Id] = @id
""",
query.ToQueryString());

var roundTripped = await query.SingleAsync();
Assert.Equal(expected, roundTripped);

AssertSql(
$"""
@p0='{expectedParameterValue}' (Size = 4000) (DbType = Xml)

SET IMPLICIT_TRANSACTIONS OFF;
SET NOCOUNT ON;
INSERT INTO [XmlTestDocument] ([Content])
OUTPUT INSERTED.[Id]
VALUES (@p0);
""",
//
$"""
@value='{expectedParameterValue}' (Size = 4000) (DbType = Xml)
@id='{id}'

SELECT TOP(2) COALESCE([x].[Content], @value)
FROM [XmlTestDocument] AS [x]
WHERE [x].[Id] = @id
""");
}

private class XmlTestDocument
Comment thread
AndriySvyryd marked this conversation as resolved.
{
public int Id { get; set; }
public string Content { get; set; }
}

private void AssertSql(params string[] expected)
=> Fixture.TestSqlLoggerFactory.AssertBaseline(expected);

Expand Down Expand Up @@ -3897,6 +3977,8 @@ protected override void OnModelCreating(ModelBuilder modelBuilder, DbContext con
b.Property(e => e.DecimalAsDec52).HasPrecision(7, 3);
});

modelBuilder.Entity<XmlTestDocument>().Property(e => e.Content).HasColumnType("xml");

MakeRequired<MappedDataTypes>(modelBuilder);
MakeRequired<MappedSquareDataTypes>(modelBuilder);
MakeRequired<MappedDataTypesWithIdentity>(modelBuilder);
Expand Down
16 changes: 16 additions & 0 deletions test/EFCore.SqlServer.Tests/Storage/SqlServerTypeMappingTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,22 @@ public virtual void Char_Utf8()
Assert.Equal(DbType.String, parameter.DbType);
}

[Fact]
public virtual void Xml_null_parameter_is_sent_as_SqlDbType_Xml()
{
var mapping = GetMapping("xml");

using var command = CreateTestCommand();
var parameter = (SqlParameter)mapping.CreateParameter(command, "foo", null, nullable: true);

Assert.Equal(SqlDbType.Xml, parameter.SqlDbType);
Assert.Equal(DBNull.Value, parameter.Value);
}

[Fact]
public virtual void Xml_literal_is_generated_as_unicode()
=> Test_GenerateSqlLiteral_helper(GetMapping("xml"), "<r>\U0001F62D</r>", "N'<r>\U0001F62D</r>'");

[Fact]
public virtual void DateOnly_code_literal_generated_correctly()
{
Expand Down
Loading