diff --git a/README-V2.md b/README-V2.md
index 5127a44c..575e78d4 100644
--- a/README-V2.md
+++ b/README-V2.md
@@ -1757,6 +1757,16 @@ var importer = MiniExcel.Importers.GetOpenXmlImporter();
var dim = importer.GetSheetDimensions(path);
```
+#### 8. Retrieve Table Data
+
+It is possible to query arbitrary tables from any worksheet.
+You can either keep it dynamic or map it to a strong-typed object like reqular queries:
+
+```csharp
+var importer = MiniExcel.Importers.GetOpenXmlImporter();
+var rows = importer.QueryTable(stream, "Sheet1", "YourTable").ToList();
+```
+
### FAQ
#### Q: Excel header title is not equal to my DTO class property name, how do I map it?
diff --git a/src/MiniExcel.Core/Reflection/MiniExcelMapper.cs b/src/MiniExcel.Core/Reflection/MiniExcelMapper.cs
index fb82fc7d..3997ef01 100644
--- a/src/MiniExcel.Core/Reflection/MiniExcelMapper.cs
+++ b/src/MiniExcel.Core/Reflection/MiniExcelMapper.cs
@@ -56,11 +56,11 @@ public static partial class MiniExcelMapper
//Q: Why need to check every time? A: it needs to check everytime, because it's dictionary
object? itemValue = null;
- if (map.ExcelIndexName is not null && (keys?.Contains(map.ExcelIndexName) is true))
+ if (map.ExcelIndexName is not null && keys?.Contains(map.ExcelIndexName) is true)
{
item.TryGetValue(map.ExcelIndexName, out itemValue);
}
- else if (map.ExcelColumnName is not null && (headersDic?.TryGetValue(map.ExcelColumnName, out var columnId) is true))
+ else if (map.ExcelColumnName is not null && headersDic?.TryGetValue(map.ExcelColumnName, out var columnId) is true)
{
var columnName = keys[columnId];
item.TryGetValue(columnName, out itemValue);
diff --git a/src/MiniExcel.OpenXml/Api/OpenXmlImporter.cs b/src/MiniExcel.OpenXml/Api/OpenXmlImporter.cs
index 113abdc8..45e678ac 100644
--- a/src/MiniExcel.OpenXml/Api/OpenXmlImporter.cs
+++ b/src/MiniExcel.OpenXml/Api/OpenXmlImporter.cs
@@ -1,3 +1,5 @@
+using MiniExcelLib.OpenXml.Reader;
+
// ReSharper disable once CheckNamespace
namespace MiniExcelLib.OpenXml;
@@ -326,7 +328,7 @@ public async Task> GetSheetNamesAsync(Stream stream, bool leaveOpen
await using var disposableArchive = archive.ConfigureAwait(false);
using var reader = await OpenXmlReader.CreateAsync(stream, null, leaveOpen, cancellationToken).ConfigureAwait(false);
- var rels = await reader.GetWorkbookRelsAsync(archive.EntryCollection, cancellationToken).ConfigureAwait(false);
+ var rels = await OpenXmlReader.GetWorkbookRelsAsync(archive.EntryCollection, cancellationToken).ConfigureAwait(false);
return rels?.Select(s => s.Name).ToList() ?? [];
}
@@ -366,7 +368,7 @@ public async Task> GetSheetInformationsAsync(Stream stream, bool
await using var disposableArchve = archive.ConfigureAwait(false);
using var reader = await OpenXmlReader.CreateAsync(stream, null, leaveOpen, cancellationToken).ConfigureAwait(false);
- var rels = await reader.GetWorkbookRelsAsync(archive.EntryCollection, cancellationToken).ConfigureAwait(false);
+ var rels = await OpenXmlReader.GetWorkbookRelsAsync(archive.EntryCollection, cancellationToken).ConfigureAwait(false);
return rels?.Select((s, i) => s.ToSheetInfo((uint)i)).ToList() ?? [];
}
@@ -433,7 +435,6 @@ public async Task> GetColumnNamesAsync(string path, bool has
return await GetColumnNamesAsync(stream, hasHeaderRow, sheetName, startCell, false, cancellationToken).ConfigureAwait(false);
}
-
///
/// Retrieves the column names from the first row (header row) of an Excel sheet.
///
@@ -497,6 +498,94 @@ public async Task RetrieveCommentsAsync(Stream stream, string?
return await reader.ReadCommentsAsync(sheetName, cancellationToken).ConfigureAwait(false);
}
+ ///
+ /// Queries a named table in an Excel worksheet and returns dynamic objects representing each row.
+ ///
+ /// The path to the Excel document.
+ /// The name of the worksheet containing the table. Default is "Sheet1".
+ /// The name of the table to query. Default is "Table1".
+ /// A token to cancel the asynchronous operation.
+ ///
+ /// Named tables in Excel are structured data ranges with defined column headers and a unique name.
+ /// This method reads from the specified table within a stream and yields rows as dynamic objects with properties based on the table's column names.
+ ///
+ [CreateSyncVersion]
+ public async IAsyncEnumerable QueryTableAsync(string path, string sheetName = "Sheet1", string tableName = "Table1", [EnumeratorCancellation] CancellationToken cancellationToken = default)
+ {
+ var stream = FileHelper.OpenSharedRead(path);
+ await using var disposableStream = stream.ConfigureAwait(false);
+
+ using var reader = await OpenXmlReader.CreateAsync(stream, null, false, cancellationToken).ConfigureAwait(false);
+ await foreach (var table in reader.QueryTableAsync(sheetName, tableName, false, cancellationToken).ConfigureAwait(false))
+ yield return table;
+ }
+
+ ///
+ /// Queries a named table in an Excel worksheet and returns dynamic objects representing each row.
+ ///
+ /// The stream containing the Excel file data. The stream position is not reset after reading.
+ /// The name of the worksheet containing the table. Default is "Sheet1".
+ /// The name of the table to query. Default is "Table1".
+ /// True to leave the stream open after the query is completed, otherwise false.
+ /// A token to cancel the asynchronous operation.
+ ///
+ /// Named tables in Excel are structured data ranges with defined column headers and a unique name.
+ /// This method reads from the specified table within a stream and yields rows as dynamic objects with properties based on the table's column names.
+ ///
+ [CreateSyncVersion]
+ public async IAsyncEnumerable QueryTableAsync(Stream stream, string sheetName = "Sheet1", string tableName = "Table1", bool leaveOpen = false, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+ {
+ using var reader = await OpenXmlReader.CreateAsync(stream, null, leaveOpen, cancellationToken).ConfigureAwait(false);
+ await foreach (var table in reader.QueryTableAsync(sheetName, tableName, false, cancellationToken).ConfigureAwait(false))
+ yield return table;
+ }
+
+ ///
+ /// Queries a named table in an Excel worksheet and returns strongly-typed objects representing each row.
+ ///
+ /// The class type to map each row to. Must have a parameterless constructor. Property names should match the table's column names.
+ /// The path to the Excel document. The stream position is not reset after reading.
+ /// The name of the worksheet containing the table. Default is "Sheet1".
+ /// The name of the table to query. Default is "Table1".
+ /// A token to cancel the asynchronous operation.
+ ///
+ /// Named tables in Excel are structured data ranges with defined column headers and a unique name.
+ /// This method reads from the specified table within a stream and maps each row to an instance of the provided type. The mapping is based on property/field names matching column headers.
+ ///
+ [CreateSyncVersion]
+ public async IAsyncEnumerable QueryTableAsync(string path, string sheetName = "Sheet1", string tableName = "Table1", [EnumeratorCancellation] CancellationToken cancellationToken = default)
+ where T : class, new()
+ {
+ var stream = FileHelper.OpenSharedRead(path);
+ await using var disposableStream = stream.ConfigureAwait(false);
+
+ using var reader = await OpenXmlReader.CreateAsync(stream, null, false, cancellationToken).ConfigureAwait(false);
+ await foreach (var table in reader.QueryTableAsync(sheetName, tableName, cancellationToken).ConfigureAwait(false))
+ yield return table;
+ }
+
+ ///
+ /// Queries a named table in an Excel worksheet and returns strongly-typed objects representing each row.
+ ///
+ /// The class type to map each row to. Must have a parameterless constructor. Property names should match the table's column names.
+ /// The stream containing the Excel file data. The stream position is not reset after reading.
+ /// The name of the worksheet containing the table. Default is "Sheet1".
+ /// The name of the table to query. Default is "Table1".
+ /// True to leave the stream open after the query is completed, otherwise false.
+ /// A token to cancel the asynchronous operation.
+ ///
+ /// Named tables in Excel are structured data ranges with defined column headers and a unique name.
+ /// This method reads from the specified table within a stream and maps each row to an instance of the provided type. The mapping is based on property/field names matching column headers.
+ ///
+ [CreateSyncVersion]
+ public async IAsyncEnumerable QueryTableAsync(Stream stream, string sheetName = "Sheet1", string tableName = "Table1", bool leaveOpen = false, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+ where T : class, new()
+ {
+ using var reader = await OpenXmlReader.CreateAsync(stream, null, leaveOpen, cancellationToken).ConfigureAwait(false);
+ await foreach (var table in reader.QueryTableAsync(sheetName, tableName, cancellationToken).ConfigureAwait(false))
+ yield return table;
+ }
+
#endregion
#region DataReader
diff --git a/src/MiniExcel.OpenXml/FluentMapping/Configuration/CollectionMappingBuilder.cs b/src/MiniExcel.OpenXml/FluentMapping/Configuration/CollectionMappingBuilder.cs
index 13fa931c..f2251049 100644
--- a/src/MiniExcel.OpenXml/FluentMapping/Configuration/CollectionMappingBuilder.cs
+++ b/src/MiniExcel.OpenXml/FluentMapping/Configuration/CollectionMappingBuilder.cs
@@ -1,5 +1,3 @@
-using System.Text.RegularExpressions;
-
namespace MiniExcelLib.OpenXml.FluentMapping.Configuration;
internal partial class CollectionMappingBuilder : ICollectionMappingBuilder where TCollection : IEnumerable
diff --git a/src/MiniExcel.OpenXml/FluentMapping/Configuration/PropertyMappingBuilder.cs b/src/MiniExcel.OpenXml/FluentMapping/Configuration/PropertyMappingBuilder.cs
index 03e0eb9b..8dc01a67 100644
--- a/src/MiniExcel.OpenXml/FluentMapping/Configuration/PropertyMappingBuilder.cs
+++ b/src/MiniExcel.OpenXml/FluentMapping/Configuration/PropertyMappingBuilder.cs
@@ -1,5 +1,3 @@
-using System.Text.RegularExpressions;
-
namespace MiniExcelLib.OpenXml.FluentMapping.Configuration;
internal partial class PropertyMappingBuilder : IPropertyMappingBuilder
diff --git a/src/MiniExcel.OpenXml/FluentMapping/Helpers/ConversionHelper.cs b/src/MiniExcel.OpenXml/FluentMapping/Helpers/ConversionHelper.cs
index 40089cae..a0838cd4 100644
--- a/src/MiniExcel.OpenXml/FluentMapping/Helpers/ConversionHelper.cs
+++ b/src/MiniExcel.OpenXml/FluentMapping/Helpers/ConversionHelper.cs
@@ -1,6 +1,4 @@
using System.Collections.Concurrent;
-using System.Globalization;
-using System.Reflection;
namespace MiniExcelLib.OpenXml.FluentMapping.Helpers;
diff --git a/src/MiniExcel.OpenXml/FluentMapping/Helpers/MappingMetadataExtractor.cs b/src/MiniExcel.OpenXml/FluentMapping/Helpers/MappingMetadataExtractor.cs
index 0a260557..a8336013 100644
--- a/src/MiniExcel.OpenXml/FluentMapping/Helpers/MappingMetadataExtractor.cs
+++ b/src/MiniExcel.OpenXml/FluentMapping/Helpers/MappingMetadataExtractor.cs
@@ -1,6 +1,3 @@
-using System.Reflection;
-using MiniExcelLib.Core.Helpers;
-using MiniExcelLib.Core.Reflection;
using MiniExcelLib.OpenXml.FluentMapping.Helpers;
namespace MiniExcelLib.OpenXml.FluentMapping;
diff --git a/src/MiniExcel.OpenXml/FluentMapping/MappingCellStream.cs b/src/MiniExcel.OpenXml/FluentMapping/MappingCellStream.cs
index 403cd52c..f7ecf324 100644
--- a/src/MiniExcel.OpenXml/FluentMapping/MappingCellStream.cs
+++ b/src/MiniExcel.OpenXml/FluentMapping/MappingCellStream.cs
@@ -1,5 +1,3 @@
-using MiniExcelLib.Core.Abstractions;
-
namespace MiniExcelLib.OpenXml.FluentMapping;
internal readonly struct MappingCellStream(IEnumerable items, CompiledMapping mapping, string[] columnLetters) : IMappingCellStream
diff --git a/src/MiniExcel.OpenXml/FluentMapping/MappingCellStreamAdapter.cs b/src/MiniExcel.OpenXml/FluentMapping/MappingCellStreamAdapter.cs
index 0596742e..2274d79a 100644
--- a/src/MiniExcel.OpenXml/FluentMapping/MappingCellStreamAdapter.cs
+++ b/src/MiniExcel.OpenXml/FluentMapping/MappingCellStreamAdapter.cs
@@ -1,6 +1,3 @@
-using MiniExcelLib.Core.Abstractions;
-using MiniExcelLib.Core.Reflection;
-
namespace MiniExcelLib.OpenXml.FluentMapping;
internal class MappingCellStreamAdapter(MappingCellStream cellStream, string[] columnLetters)
diff --git a/src/MiniExcel.OpenXml/FluentMapping/MappingCompiler.cs b/src/MiniExcel.OpenXml/FluentMapping/MappingCompiler.cs
index efc5b6c3..f6f36c15 100644
--- a/src/MiniExcel.OpenXml/FluentMapping/MappingCompiler.cs
+++ b/src/MiniExcel.OpenXml/FluentMapping/MappingCompiler.cs
@@ -1,9 +1,5 @@
-using System.Reflection;
-using MiniExcelLib.Core.Helpers;
-using MiniExcelLib.Core.Reflection;
using MiniExcelLib.OpenXml.FluentMapping.Configuration;
using MiniExcelLib.OpenXml.FluentMapping.Helpers;
-using MiniExcelLib.OpenXml.Utils;
namespace MiniExcelLib.OpenXml.FluentMapping;
diff --git a/src/MiniExcel.OpenXml/FluentMapping/MappingReader.cs b/src/MiniExcel.OpenXml/FluentMapping/MappingReader.cs
index 049fd3a5..1725adc1 100644
--- a/src/MiniExcel.OpenXml/FluentMapping/MappingReader.cs
+++ b/src/MiniExcel.OpenXml/FluentMapping/MappingReader.cs
@@ -1,3 +1,5 @@
+using MiniExcelLib.OpenXml.Reader;
+
namespace MiniExcelLib.OpenXml.FluentMapping;
internal static partial class MappingReader where T : class, new()
@@ -203,8 +205,7 @@ private static Dictionary InitializeCollections(CompiledMapping m
else
{
// This should never happen with properly optimized mappings
- throw new InvalidOperationException(
- "OptimizedCollectionHelpers is null. Ensure the mapping was properly compiled and optimized.");
+ throw new InvalidOperationException("OptimizedCollectionHelpers is null. Ensure the mapping was properly compiled and optimized.");
}
return collections;
@@ -469,4 +470,4 @@ private static bool HasAnyData(T item, CompiledMapping mapping)
bool b => !b,
_ => false
};
-}
\ No newline at end of file
+}
diff --git a/src/MiniExcel.OpenXml/FluentMapping/MappingTemplateProcessor.cs b/src/MiniExcel.OpenXml/FluentMapping/MappingTemplateProcessor.cs
index 10779bd6..e407356f 100644
--- a/src/MiniExcel.OpenXml/FluentMapping/MappingTemplateProcessor.cs
+++ b/src/MiniExcel.OpenXml/FluentMapping/MappingTemplateProcessor.cs
@@ -1,27 +1,11 @@
-using System.Text;
-using System.Xml;
-using MiniExcelLib.OpenXml.Helpers;
-using MiniExcelLib.OpenXml.Utils;
-using Zomp.SyncMethodGenerator;
-
namespace MiniExcelLib.OpenXml.FluentMapping;
internal partial struct MappingTemplateProcessor(CompiledMapping mapping) where T : class
{
[CreateSyncVersion]
- public async Task ProcessSheetAsync(
- Stream sourceStream,
- Stream targetStream,
- IEnumerator dataEnumerator,
- CancellationToken cancellationToken)
+ public async Task ProcessSheetAsync(Stream sourceStream, Stream targetStream, IEnumerator dataEnumerator, CancellationToken cancellationToken)
{
- var readerSettings = new XmlReaderSettings
- {
- Async = true,
- IgnoreWhitespace = false,
- IgnoreComments = false,
- CheckCharacters = false
- };
+ var readerSettings = XmlReaderHelper.GetXmlReaderSettings();
var writerSettings = new XmlWriterSettings
{
@@ -38,7 +22,6 @@ public async Task ProcessSheetAsync(
var currentItem = dataEnumerator.MoveNext() ? dataEnumerator.Current : null;
var currentItemIndex = currentItem is not null ? 0 : -1;
-
// Track which rows have been written from the template
var writtenRows = new HashSet();
diff --git a/src/MiniExcel.OpenXml/Models/ExcelRange.cs b/src/MiniExcel.OpenXml/Models/ExcelRange.cs
index bea1fd20..e8ce6531 100644
--- a/src/MiniExcel.OpenXml/Models/ExcelRange.cs
+++ b/src/MiniExcel.OpenXml/Models/ExcelRange.cs
@@ -19,8 +19,8 @@ internal ExcelRangeElement(int startIndex, int endIndex)
public class ExcelRange(int maxRow, int maxColumn)
{
- public string StartCell { get; internal set; }
- public string EndCell { get; internal set; }
+ public string? StartCell { get; internal set; }
+ public string? EndCell { get; internal set; }
public ExcelRangeElement Rows { get; } = new(1, maxRow);
public ExcelRangeElement Columns { get; } = new(1, maxColumn);
diff --git a/src/MiniExcel.OpenXml/Models/TableInfo.cs b/src/MiniExcel.OpenXml/Models/TableInfo.cs
new file mode 100644
index 00000000..aa75a349
--- /dev/null
+++ b/src/MiniExcel.OpenXml/Models/TableInfo.cs
@@ -0,0 +1,17 @@
+namespace MiniExcelLib.OpenXml.Models;
+
+public class TableInfo
+{
+ internal TableInfo(string name, IEnumerable columns, string? referenceCells, bool hiddenHeader)
+ {
+ Name = name;
+ Columns = [..columns];
+ ReferenceCells = referenceCells;
+ HiddenHeader = hiddenHeader;
+ }
+
+ public string Name { get; private set; }
+ public string[] Columns { get; private set; }
+ public string? ReferenceCells { get; private set; }
+ public bool HiddenHeader { get; private set; }
+}
diff --git a/src/MiniExcel.OpenXml/Picture/OpenXmlPictureImplement.cs b/src/MiniExcel.OpenXml/Picture/OpenXmlPictureImplement.cs
index 9e3113e8..0e658898 100644
--- a/src/MiniExcel.OpenXml/Picture/OpenXmlPictureImplement.cs
+++ b/src/MiniExcel.OpenXml/Picture/OpenXmlPictureImplement.cs
@@ -1,4 +1,5 @@
using System.Drawing;
+using MiniExcelLib.OpenXml.Reader;
namespace MiniExcelLib.OpenXml.Picture;
@@ -27,7 +28,7 @@ public static async Task AddPictureAsync(Stream excelStream, CancellationToken c
#else
using var archive = new ZipArchive(excelStream, ZipArchiveMode.Update, true);
#endif
- var rels = await reader.GetWorkbookRelsAsync(excelArchive.EntryCollection, cancellationToken).ConfigureAwait(false);
+ var rels = await OpenXmlReader.GetWorkbookRelsAsync(excelArchive.EntryCollection, cancellationToken).ConfigureAwait(false);
var sheetEntries = rels?.ToList() ?? [];
// Group images by sheet
diff --git a/src/MiniExcel.OpenXml/Reader/OpenXmlReader.Comments.cs b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.Comments.cs
new file mode 100644
index 00000000..dc0bfb44
--- /dev/null
+++ b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.Comments.cs
@@ -0,0 +1,154 @@
+namespace MiniExcelLib.OpenXml.Reader;
+
+internal partial class OpenXmlReader
+{
+ private static readonly XNamespace NsRel = Schemas.OpenXmlPackageRelationships;
+ private static readonly XNamespace Ns18Tc = Schemas.SpreadsheetmlXmlX18Tc;
+ private static readonly XNamespace NsMain = Schemas.SpreadsheetmlXmlMain;
+ private static readonly XNamespace Ns14R = Schemas.SpreadsheetmlXmlX14R;
+
+ [CreateSyncVersion]
+ internal async Task ReadCommentsAsync(string? sheetName, CancellationToken cancellationToken = default)
+ {
+ if (string.IsNullOrEmpty(sheetName))
+ throw new ArgumentException("sheetName cannot be null or empty", nameof(sheetName));
+
+ SetWorkbookRels(Archive.EntryCollection);
+ var sheetRecord = _sheetRecords?.SingleOrDefault(s => s.Name.Equals(sheetName, StringComparison.CurrentCultureIgnoreCase));
+ if (sheetRecord?.Path?.Split('/')[^1] is not { } sheetFile)
+ throw new InvalidDataException($"There is no sheet named {sheetName}");
+
+ if (Archive.GetEntry($"xl/worksheets/_rels/{sheetFile}.rels") is not { } rel)
+ return new CommentResultSet(sheetName, [], []);
+
+ var stream = await rel.OpenAsync(cancellationToken).ConfigureAwait(false);
+ await using var disposableStream = stream.ConfigureAwait(false);
+
+ var relDoc = await XDocument.LoadAsync(stream, LoadOptions.None, cancellationToken).ConfigureAwait(false);
+ HashSet refCells = [];
+
+ var people = await GetAuthorsAsync(cancellationToken).ConfigureAwait(false);
+ var commentThreads = await GetThreadedCommentsAsync(relDoc, refCells, people, cancellationToken).ConfigureAwait(false);
+ var notes = await GetNotesAsync(relDoc, refCells, cancellationToken).ConfigureAwait(false);
+
+ return new CommentResultSet(sheetName, commentThreads, notes);
+ }
+
+ [CreateSyncVersion]
+ private async Task> GetAuthorsAsync(CancellationToken cancellationToken)
+ {
+ if (Archive.GetEntry(ExcelFileNames.Person) is not { } persons)
+ return [];
+
+ var personStream = await persons.OpenAsync(cancellationToken).ConfigureAwait(false);
+ await using var disposablePersonStream = personStream.ConfigureAwait(false);
+
+ var personDoc = await XDocument.LoadAsync(personStream, LoadOptions.None, cancellationToken).ConfigureAwait(false);
+ var personElements = personDoc.Root?.Elements(Ns18Tc + "person");
+
+ return personElements
+ ?.Select(p => new Author
+ {
+ Id = Guid.Parse(p.Attribute("id")!.Value),
+ DisplayName = p.Attribute("displayName")?.Value is { } name and not "" ? name : "???",
+ ProviderId = p.Attribute("providerId")?.Value,
+ })
+ .ToList() ?? [];
+ }
+
+ [CreateSyncVersion]
+ private async Task> GetNotesAsync(XDocument relDoc, HashSet refCells, CancellationToken cancellationToken)
+ {
+ var noteRels = relDoc.Root?.Elements(NsRel + "Relationship");
+ var notesElement = noteRels?.FirstOrDefault(x => x.Attribute("Type")?.Value == Schemas.SpreadsheetmlXmlCommentsRelationship);
+ var notesTarget = notesElement?.Attribute("Target");
+ var notesPath = notesTarget?.Value.TrimStart('.', '/');
+
+ if (Archive.GetEntry($"xl/{notesPath}") is not { } noteEntry)
+ return [];
+
+ var noteEntryStream = await noteEntry.OpenAsync(cancellationToken).ConfigureAwait(false);
+ await using var disposableNoteEntryStream = noteEntryStream.ConfigureAwait(false);
+
+ var doc = await XDocument.LoadAsync(noteEntryStream, LoadOptions.None, cancellationToken).ConfigureAwait(false);
+
+ var authorElements = doc.Root?.Element(NsMain + "authors")?.Elements(NsMain + "author");
+ var authors = authorElements?.Select(a => a.Value).ToArray();
+
+ var commentElements = doc.Root
+ ?.Element(NsMain + "commentList")
+ ?.Elements(NsMain + "comment");
+
+ return commentElements
+ ?.Where(c => !refCells.Contains(c.Attribute("ref")?.Value))
+ .Select(c => new NoteComment
+ {
+ Id = Guid.TryParse(c.Attribute(Ns14R + "uid")?.Value.Trim('{', '}'), out var noteId) ? noteId : Guid.Empty,
+ Author = int.TryParse(c.Attribute("authorId")?.Value, out var authorId) ? authors?.ElementAtOrDefault(authorId) : "",
+ ReferenceCell = c.Attribute("ref")?.Value,
+ Text = string.Join("", GetTextFromComment(c))
+ })
+ .ToList() ?? [];
+ }
+
+ [CreateSyncVersion]
+ private async Task> GetThreadedCommentsAsync(XDocument relDoc, HashSet refCells, ICollection people, CancellationToken cancellationToken)
+ {
+ var threadedCommentRels = relDoc.Root?.Elements(NsRel + "Relationship");
+ var threadedCommentsElement = threadedCommentRels?.FirstOrDefault(x => x.Attribute("Type")?.Value == Schemas.SpreadsheetmlXmlThreadedCommentRelationship);
+ var threadedCommentsTarget = threadedCommentsElement?.Attribute("Target");
+ var threadedCommentsPath = threadedCommentsTarget?.Value.TrimStart('.', '/');
+
+ if (Archive.GetEntry($"xl/{threadedCommentsPath}") is not { } threadEntry)
+ return [];
+
+ var threadEntryStream = await threadEntry.OpenAsync(cancellationToken).ConfigureAwait(false);
+ await using var disposableThreadEntryStream = threadEntryStream.ConfigureAwait(false);
+
+ var doc = await XDocument.LoadAsync(threadEntryStream, LoadOptions.None, cancellationToken).ConfigureAwait(false);
+
+ var commentThreadElements = doc.Root?.Elements(Ns18Tc + "threadedComment");
+ var commentThreads = commentThreadElements
+ ?.Where(tc => tc.Attribute("parentId") is null)
+ .Select(tc => new ThreadedComment
+ {
+ Id = Guid.Parse(tc.Attribute("id")!.Value.Trim('{', '}')),
+ Author = people.FirstOrDefault(p => p.Id == (Guid.TryParse(tc.Attribute("personId")?.Value, out var person) ? person : Guid.Empty)),
+ CreatedAt = DateTime.Parse(tc.Attribute("dT")!.Value, CultureInfo.InvariantCulture),
+ ReferenceCell = tc.Attribute("ref")?.Value!,
+ Text = tc.Value,
+ Resolved = tc.Attribute("done")?.Value is not (null or "0")
+ })
+ .ToList() ?? [];
+
+ var replyElements = doc.Root?.Elements(Ns18Tc + "threadedComment");
+ var replies = replyElements
+ ?.Where(tc => tc.Attribute("parentId") is not null)
+ .Select(tc => new ThreadedCommentReply
+ {
+ Id = Guid.Parse(tc.Attribute("id")!.Value.Trim('{', '}')),
+ ParentId = Guid.Parse(tc.Attribute("parentId")!.Value),
+ Author = people.FirstOrDefault(p => p.Id == Guid.Parse(tc.Attribute("personId")!.Value)),
+ CreatedAt = DateTime.Parse(tc.Attribute("dT")!.Value, CultureInfo.InvariantCulture),
+ Text = tc.Value
+ })
+ .ToLookup(x => x.ParentId);
+
+ foreach (var thread in commentThreads)
+ {
+ refCells.Add(thread.ReferenceCell);
+
+ if (replies is not null)
+ thread.ThreadedComments = replies[thread.Id].ToList();
+ }
+
+ return commentThreads;
+ }
+
+ private static IEnumerable GetTextFromComment(XElement? comment)
+ {
+ return comment?.Element(NsMain + "text") is { } textElement
+ ? textElement.Descendants(NsMain + "t").Select(t => t.Value)
+ : [];
+ }
+}
diff --git a/src/MiniExcel.OpenXml/Reader/OpenXmlReader.Mapped.cs b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.Mapped.cs
new file mode 100644
index 00000000..6ef18057
--- /dev/null
+++ b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.Mapped.cs
@@ -0,0 +1,137 @@
+using MiniExcelLib.OpenXml.Styles;
+
+namespace MiniExcelLib.OpenXml.Reader;
+
+internal partial class OpenXmlReader
+{
+ ///
+ /// Direct mapped query that bypasses dictionary creation for better performance
+ ///
+ [CreateSyncVersion]
+ internal async IAsyncEnumerable QueryMappedAsync(string? sheetName, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+ {
+ cancellationToken.ThrowIfCancellationRequested();
+
+ const bool withoutCr = false;
+ var sheetEntry = GetSheetEntry(sheetName);
+
+ MergeCells? mergeCells = null;
+ if (_config.FillMergedCells)
+ {
+ var mergeCellsResult = await TryGetMergeCellsAsync(sheetEntry, cancellationToken).ConfigureAwait(false);
+ if (mergeCellsResult.Success)
+ mergeCells = mergeCellsResult.MergeCells;
+ }
+
+ // Direct XML reading without dictionary creation
+ var xmlSettings = XmlReaderHelper.GetXmlReaderSettings();
+
+ var sheetStream = await sheetEntry.OpenAsync(cancellationToken).ConfigureAwait(false);
+ await using var disposableSheetStream = sheetStream.ConfigureAwait(false);
+
+ using var reader = XmlReader.Create(sheetStream, xmlSettings);
+
+ if (!reader.IsStartElement("worksheet", Ns))
+ yield break;
+
+ if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false))
+ yield break;
+
+ while (!reader.EOF)
+ {
+ if (reader.IsStartElement("sheetData", Ns))
+ {
+ if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false))
+ continue;
+
+ int rowIndex = -1;
+ while (!reader.EOF)
+ {
+ if (reader.IsStartElement("row", Ns))
+ {
+ if (int.TryParse(reader.GetAttribute("r"), out int arValue))
+ rowIndex = arValue - 1; // The row attribute is 1-based
+ else
+ rowIndex++;
+
+ // Read row directly into mapped structure
+ await foreach (var mappedRow in ReadMappedRowAsync(reader, rowIndex, withoutCr, mergeCells, cancellationToken).ConfigureAwait(false))
+ {
+ yield return mappedRow;
+ }
+ }
+ else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false))
+ {
+ break;
+ }
+ }
+ }
+ else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false))
+ {
+ break;
+ }
+ }
+ }
+
+ [CreateSyncVersion]
+ private async IAsyncEnumerable ReadMappedRowAsync(
+ XmlReader reader,
+ int rowIndex,
+ bool withoutCr,
+ MergeCells? mergeCells,
+ [EnumeratorCancellation] CancellationToken cancellationToken = default)
+ {
+ if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false))
+ {
+ // Empty row
+ yield return new MappedRow(rowIndex);
+ yield break;
+ }
+
+ var row = new MappedRow(rowIndex);
+ var columnIndex = withoutCr ? -1 : 0;
+
+ while (!reader.EOF)
+ {
+ if (reader.IsStartElement("c", Ns))
+ {
+ var aS = reader.GetAttribute("s");
+ var aR = reader.GetAttribute("r");
+ var aT = reader.GetAttribute("t");
+
+ var cellAndColumn = await ReadCellAndSetColumnIndexAsync(reader, columnIndex, withoutCr, 0, aR, aT, cancellationToken).ConfigureAwait(false);
+ var cellValue = cellAndColumn.CellValue;
+ columnIndex = cellAndColumn.ColumnIndex;
+
+ if (_config.FillMergedCells && mergeCells is not null)
+ {
+ if (mergeCells.MergesValues.ContainsKey(aR))
+ {
+ mergeCells.MergesValues[aR] = cellValue;
+ }
+ else if (mergeCells.MergesMap.TryGetValue(aR, out var mergeKey))
+ {
+ mergeCells.MergesValues.TryGetValue(mergeKey, out cellValue);
+ }
+ }
+
+ if (!string.IsNullOrEmpty(aS)) // Custom style
+ {
+ if (int.TryParse(aS, NumberStyles.Any, CultureInfo.InvariantCulture, out var styleIndex))
+ {
+ _style ??= await OpenXmlStyles.CreateAsync(Archive, cancellationToken).ConfigureAwait(false);
+ cellValue = _style.ConvertValueByStyleFormat(styleIndex, cellValue);
+ }
+ }
+
+ row.SetCell(columnIndex, cellValue);
+ }
+ else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false))
+ {
+ break;
+ }
+ }
+
+ yield return row;
+ }
+}
diff --git a/src/MiniExcel.OpenXml/Reader/OpenXmlReader.MergeCells.cs b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.MergeCells.cs
new file mode 100644
index 00000000..4c260002
--- /dev/null
+++ b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.MergeCells.cs
@@ -0,0 +1,66 @@
+namespace MiniExcelLib.OpenXml.Reader;
+
+internal partial class OpenXmlReader
+{
+ [CreateSyncVersion]
+ internal static async Task<(bool Success, MergeCells? MergeCells)> TryGetMergeCellsAsync(ZipArchiveEntry sheetEntry, CancellationToken cancellationToken = default)
+ {
+ cancellationToken.ThrowIfCancellationRequested();
+
+ var xmlSettings = XmlReaderHelper.GetXmlReaderSettings();
+ var mergeCells = new MergeCells();
+
+ var sheetStream = await sheetEntry.OpenAsync(cancellationToken).ConfigureAwait(false);
+ await using var disposableSheetStream = sheetStream.ConfigureAwait(false);
+
+ using var reader = XmlReader.Create(sheetStream, xmlSettings);
+
+ if (!reader.IsStartElement("worksheet", Ns))
+ return (false, null);
+
+ while (await reader.ReadAsync().ConfigureAwait(false))
+ {
+ if (!reader.IsStartElement("mergeCells", Ns))
+ continue;
+
+ if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false))
+ return (false, null);
+
+ while (!reader.EOF)
+ {
+ if (reader.IsStartElement("mergeCell", Ns))
+ {
+ var refAttr = reader.GetAttribute("ref");
+ var refs = refAttr.Split(':');
+ if (refs.Length == 1)
+ continue;
+
+ CellReferenceConverter.TryParseCellReference(refs[0], out var x1, out var y1);
+ CellReferenceConverter.TryParseCellReference(refs[1], out var x2, out var y2);
+
+ mergeCells.MergesValues.Add(refs[0], null);
+
+ // foreach range
+ var isFirst = true;
+ for (int x = x1; x <= x2; x++)
+ {
+ for (int y = y1; y <= y2; y++)
+ {
+ if (!isFirst)
+ mergeCells.MergesMap.Add(CellReferenceConverter.GetCellFromCoordinates(x, y), refs[0]);
+ isFirst = false;
+ }
+ }
+
+ await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false);
+ }
+ else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false))
+ {
+ break;
+ }
+ }
+ }
+
+ return (true, mergeCells);
+ }
+}
diff --git a/src/MiniExcel.OpenXml/Reader/OpenXmlReader.Tables.cs b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.Tables.cs
new file mode 100644
index 00000000..7ed37456
--- /dev/null
+++ b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.Tables.cs
@@ -0,0 +1,147 @@
+namespace MiniExcelLib.OpenXml.Reader;
+
+internal partial class OpenXmlReader
+{
+ [CreateSyncVersion]
+ internal IAsyncEnumerable QueryTableAsync(string sheetName, string tableName, CancellationToken cancellationToken = default)
+ where T : class, new()
+ {
+ var query = QueryTableAsync(sheetName, tableName, true, cancellationToken);
+ return MiniExcelMapper.MapQueryAsync(query, 0, false, _config.TrimColumnNames, _config, XmlHelper.DecodeString, cancellationToken);
+ }
+
+ [CreateSyncVersion]
+ internal async IAsyncEnumerable> QueryTableAsync(string sheetName, string tableName, bool prependHeaders, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+ {
+ TableInfo? table = null;
+ await foreach (var item in GetTableInfosAsync(sheetName, cancellationToken).ConfigureAwait(false))
+ {
+ if (item.Name.Equals(tableName, StringComparison.OrdinalIgnoreCase))
+ {
+ table = item;
+ break;
+ }
+ }
+
+ if (table is null)
+ throw new InvalidDataException($"The table {tableName} was not found.");
+
+ if (table.ReferenceCells?.Split(':') is not [var start, var end] ||
+ !CellReferenceConverter.TryParseCellReference(start, out var startCol, out var startRow) ||
+ !CellReferenceConverter.TryParseCellReference(end, out var endCol, out var endRow))
+ {
+ throw new InvalidDataException("A valid cell range could not be extracted from the table metadata.");
+ }
+
+ if (!table.HiddenHeader)
+ startRow++;
+
+ if (prependHeaders)
+ {
+ var headers = ExpandoHelper.CreateEmptyByIndices(endCol - 1, startCol - 1);
+ var columnCount = Math.Min(headers.Count, table.Columns.Length);
+
+ for (int i = 0; i < columnCount; i++)
+ {
+ var index = CellReferenceConverter.GetAlphabeticalIndex(startCol + i - 1);
+ headers[index] = table.Columns[i];
+ }
+
+ yield return headers;
+ }
+
+ await foreach (var row in QueryRangeAsync(false, sheetName, startRow, startCol, endRow, endCol, cancellationToken).ConfigureAwait(false))
+ {
+ if (!prependHeaders)
+ {
+ for (var i = 0; i < table.Columns.Length; i++)
+ {
+ var oldHeader = CellReferenceConverter.GetAlphabeticalIndex(i + startCol - 1);
+ if (row.TryGetValue(oldHeader, out var cellValue))
+ {
+ var newHeader = table.Columns[i];
+ row[newHeader] = cellValue;
+ if (newHeader != oldHeader)
+ {
+ row.Remove(oldHeader);
+ }
+ }
+ }
+ }
+
+ yield return row;
+ }
+ }
+
+ [CreateSyncVersion]
+ private async IAsyncEnumerable GetTableInfosAsync(string sheetName, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+ {
+ var rels = await GetWorkbookRelsAsync(Archive.EntryCollection, cancellationToken).ConfigureAwait(false);
+ if (rels?.Find(x => x.Name.Equals(sheetName, StringComparison.OrdinalIgnoreCase)) is not { Path: { } path })
+ throw new InvalidDataException($"Worksheet {sheetName} was not found.");
+
+ List tables = [];
+ var sheetFilename = path.Split('/')[^1];
+
+ if (Archive.GetEntry($"xl/worksheets/_rels/{sheetFilename}.rels") is { } entry)
+ {
+ var entryStream = await entry.OpenAsync(cancellationToken).ConfigureAwait(false);
+ await using var disposableEntryStream = entryStream.ConfigureAwait(false);
+
+ var readerSettings = XmlReaderHelper.GetXmlReaderSettings();
+ using var reader = XmlReader.Create(entryStream, readerSettings);
+
+ if (!reader.ReadToFollowing("Relationship"))
+ yield break;
+
+ do
+ {
+ if (reader.GetAttribute("Type") == Schemas.SpreadsheetmlXmlTableRelationship)
+ {
+ if (reader.GetAttribute("Target") is { } target &&
+ target.Split('/').LastOrDefault() is { } table)
+ {
+ tables.Add(table);
+ }
+ }
+ }
+ while(reader.ReadToNextSibling("Relationship"));
+ }
+
+ foreach (var table in tables)
+ {
+ if (Archive.GetEntry($"xl/tables/{table}") is not { } tableEntry)
+ continue;
+
+ var entryStream = await tableEntry.OpenAsync(cancellationToken).ConfigureAwait(false);
+ await using var disposableEntryStream = entryStream.ConfigureAwait(false);
+ using var reader = XmlReader.Create(entryStream, XmlReaderHelper.GetXmlReaderSettings());
+
+ if (!reader.ReadToFollowing("table"))
+ continue;
+
+ if (reader.GetAttribute("name") is not { } tableName ||
+ reader.GetAttribute("ref") is not { } @ref)
+ {
+ continue;
+ }
+
+ var headerIsHidden = reader.GetAttribute("headerRowCount") == "0";
+ if (!reader.ReadToDescendant("tableColumn"))
+ continue;
+
+ List columns = [];
+ var colCount = 0;
+
+ do
+ {
+ var colName = reader.GetAttribute("name") ?? $"Column{colCount}";
+ columns.Add(colName);
+ colCount++;
+ }
+ while (reader.ReadToNextSibling("tableColumn"));
+
+ yield return new TableInfo(tableName, [..columns], @ref, headerIsHidden);
+ }
+ }
+}
diff --git a/src/MiniExcel.OpenXml/OpenXmlReader.cs b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.cs
similarity index 66%
rename from src/MiniExcel.OpenXml/OpenXmlReader.cs
rename to src/MiniExcel.OpenXml/Reader/OpenXmlReader.cs
index be3b2236..f8061cfa 100644
--- a/src/MiniExcel.OpenXml/OpenXmlReader.cs
+++ b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.cs
@@ -3,9 +3,9 @@
using MiniExcelMapper = MiniExcelLib.Core.Reflection.MiniExcelMapper;
using XmlReaderHelper = MiniExcelLib.OpenXml.Utils.XmlReaderHelper;
-namespace MiniExcelLib.OpenXml;
+namespace MiniExcelLib.OpenXml.Reader;
-internal partial class OpenXmlReader : IMiniExcelReader
+internal sealed partial class OpenXmlReader : IMiniExcelReader
{
private static readonly string[] Ns = [Schemas.SpreadsheetmlXmlMain, Schemas.SpreadsheetmlXmlStrictNs];
private static readonly string[] RelationshiopNs = [Schemas.SpreadsheetmlXmlRelationships, Schemas.SpreadsheetmlXmlStrictRelationships];
@@ -47,13 +47,12 @@ internal static async Task CreateAsync(Stream stream, IMiniExcelC
{
sheetName ??= MiniExcelPropertyHelper.GetExcelSheetInfo(typeof(T), _config)?.ExcelSheetName;
var query = QueryAsync(false, sheetName, startCell, cancellationToken);
+
if (!CellReferenceConverter.TryParseCellReference(startCell, out _, out var rowOffset))
- {
throw new InvalidDataException($"Value {startCell} is not a valid cell reference.");
- }
//Todo: Find a way if possible to remove the 'hasHeader' parameter to check whether or not to include
- // the first row in the result set in favor of modifying the already present 'useHeaderRow' to do the same job
+ // the first row in the result set in favor of modifying the already present 'hasHeaderRow' to do the same job
return MiniExcelMapper.MapQueryAsync(query, rowOffset, mapHeaderAsData, _config.TrimColumnNames, _config, XmlHelper.DecodeString, cancellationToken);
}
@@ -141,26 +140,29 @@ internal static async Task CreateAsync(Stream stream, IMiniExcelC
}
[CreateSyncVersion]
- private async IAsyncEnumerable> InternalQueryRangeAsync(bool useHeaderRow, string? sheetName, int startRowIndex, int startColumnIndex, int? endRowIndex, int? endColumnIndex, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+ private async IAsyncEnumerable> InternalQueryRangeAsync(bool hasHeaderRow, string? sheetName, int startRowIndex, int startColumnIndex, int? endRowIndex, int? endColumnIndex, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();
- var xmlSettings = XmlReaderHelper.GetXmlReaderSettings(
-#if SYNC_ONLY
- false
-#else
- true
-#endif
- );
-
+ var xmlSettings = XmlReaderHelper.GetXmlReaderSettings();
var sheetEntry = GetSheetEntry(sheetName);
// TODO: need to optimize performance
// Q. why need 3 times openstream merge one open read? A. no, zipstream can't use position = 0
- var mergeCellsContext = new MergeCellsContext();
- if (_config.FillMergedCells && !await TryGetMergeCellsAsync(sheetEntry, mergeCellsContext, cancellationToken).ConfigureAwait(false))
- yield break;
+ MergeCells? mergeCells = null;
+ if (_config.FillMergedCells)
+ {
+ var mergeCellsResult = await TryGetMergeCellsAsync(sheetEntry, cancellationToken).ConfigureAwait(false);
+ if (mergeCellsResult.Success)
+ {
+ mergeCells = mergeCellsResult.MergeCells;
+ }
+ else
+ {
+ yield break;
+ }
+ }
var maxRowColumnIndexResult = await TryGetMaxRowColumnIndexAsync(sheetEntry, cancellationToken).ConfigureAwait(false);
if (!maxRowColumnIndexResult.IsSuccess)
@@ -219,15 +221,16 @@ internal static async Task CreateAsync(Stream stream, IMiniExcelC
break;
}
- await foreach (var row in QueryRowAsync(reader, isFirstRow, startRowIndex, nextRowIndex,
- rowIndex, startColumnIndex, endColumnIndex, maxColumnIndex,
- withoutCr, useHeaderRow, headRows, mergeCellsContext.MergeCells,
- cancellationToken).ConfigureAwait(false))
+ var query = QueryRowAsync(reader, isFirstRow, startRowIndex, nextRowIndex, rowIndex,
+ startColumnIndex, endColumnIndex, maxColumnIndex, withoutCr, hasHeaderRow, headRows,
+ mergeCells, cancellationToken);
+
+ await foreach (var row in query.ConfigureAwait(false))
{
if (isFirstRow)
{
isFirstRow = false; // for startcell logic
- if (useHeaderRow)
+ if (hasHeaderRow)
continue;
}
@@ -271,7 +274,7 @@ internal static async Task CreateAsync(Stream stream, IMiniExcelC
{
for (int i = expectedRowIndex; i < rowIndex; i++)
{
- yield return GetCell(hasHeaderRow, maxColumnIndex, headRows, startColumnIndex);
+ yield return GetHeaders(hasHeaderRow, maxColumnIndex, headRows, startColumnIndex);
}
}
}
@@ -280,11 +283,11 @@ internal static async Task CreateAsync(Stream stream, IMiniExcelC
if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false) && !_config.IgnoreEmptyRows)
{
//Fill in case of self closed empty row tag eg.
- yield return GetCell(hasHeaderRow, maxColumnIndex, headRows, startColumnIndex);
+ yield return GetHeaders(hasHeaderRow, maxColumnIndex, headRows, startColumnIndex);
yield break;
}
- var cell = GetCell(hasHeaderRow, maxColumnIndex, headRows, startColumnIndex);
+ var cell = GetHeaders(hasHeaderRow, maxColumnIndex, headRows, startColumnIndex);
var columnIndex = withoutCr ? -1 : 0;
while (!reader.EOF)
{
@@ -320,7 +323,7 @@ internal static async Task CreateAsync(Stream stream, IMiniExcelC
xfIndex = styleIndex;
// only when have s attribute then load styles xml data
- _style ??= new OpenXmlStyles(Archive);
+ _style ??= await OpenXmlStyles.CreateAsync(Archive, cancellationToken).ConfigureAwait(false);
cellValue = _style.ConvertValueByStyleFormat(xfIndex, cellValue);
}
@@ -372,16 +375,16 @@ private ZipArchiveEntry GetSheetEntry(string? sheetName)
return sheetEntry;
}
- private static IDictionary GetCell(bool useHeaderRow, int maxColumnIndex, Dictionary headRows, int startColumnIndex)
+ private static IDictionary GetHeaders(bool hasHeaderRow, int maxColumnIndex, Dictionary headRows, int startColumnIndex)
{
- return useHeaderRow
+ return hasHeaderRow
? ExpandoHelper.CreateEmptyByHeaders(headRows)
: ExpandoHelper.CreateEmptyByIndices(maxColumnIndex, startColumnIndex);
}
- private static void SetCellsValueAndHeaders(object? cellValue, bool useHeaderRow, Dictionary headRows, bool isFirstRow, IDictionary cell, int columnIndex)
+ private static void SetCellsValueAndHeaders(object? cellValue, bool hasHeaderRow, Dictionary headRows, bool isFirstRow, IDictionary cell, int columnIndex)
{
- if (!useHeaderRow)
+ if (!hasHeaderRow)
{
//if not using First Head then using A,B,C as index
cell[CellReferenceConverter.GetAlphabeticalIndex(columnIndex)] = cellValue;
@@ -407,10 +410,10 @@ private async Task SetSharedStringsAsync(CancellationToken cancellationToken = d
if (SharedStrings is { Count: > 0 })
return;
-
+
if (Archive.GetEntry(ExcelFileNames.SharedStrings) is not { } sharedStringsEntry)
return;
-
+
var stream = await sharedStringsEntry.OpenAsync(cancellationToken).ConfigureAwait(false);
await using var disposableStream = stream.ConfigureAwait(false);
@@ -441,13 +444,7 @@ private void SetWorkbookRels(ReadOnlyCollection entries)
[CreateSyncVersion]
private static async IAsyncEnumerable ReadWorkbookAsync(ReadOnlyCollection entries, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{
- var xmlSettings = XmlReaderHelper.GetXmlReaderSettings(
-#if SYNC_ONLY
- false
-#else
- true
-#endif
- );
+ var xmlSettings = XmlReaderHelper.GetXmlReaderSettings();
var entry = entries.Single(w => w.FullName == ExcelFileNames.Workbook);
var stream = await entry.OpenAsync(cancellationToken).ConfigureAwait(false);
@@ -528,35 +525,28 @@ await reader.SkipAsync()
}
[CreateSyncVersion]
- internal async Task?> GetWorkbookRelsAsync(ReadOnlyCollection entries, CancellationToken cancellationToken = default)
+ internal static async Task?> GetWorkbookRelsAsync(ReadOnlyCollection entries, CancellationToken cancellationToken = default)
{
- var xmlSettings = XmlReaderHelper.GetXmlReaderSettings(
-#if SYNC_ONLY
- false
-#else
- true
-#endif
- );
-
var sheetRecords = await ReadWorkbookAsync(entries, cancellationToken)
.CreateListAsync(cancellationToken)
.ConfigureAwait(false);
var entry = entries.Single(w => w.FullName == ExcelFileNames.WorkbookRels);
-
var stream = await entry.OpenAsync(cancellationToken).ConfigureAwait(false);
await using var disposableStream = stream.ConfigureAwait(false);
- using var reader = XmlReader.Create(stream, xmlSettings);
+ var readerSettings = XmlReaderHelper.GetXmlReaderSettings();
+ using var reader = XmlReader.Create(stream, readerSettings);
if (!XmlReaderHelper.IsStartElement(reader, "Relationships", Schemas.OpenXmlPackageRelationships))
return null;
+
if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false))
return null;
while (!reader.EOF)
{
- if (XmlReaderHelper.IsStartElement(reader, "Relationship", Schemas.OpenXmlPackageRelationships))
+ if (reader.IsStartElement("Relationship", Schemas.OpenXmlPackageRelationships))
{
var rid = reader.GetAttribute("Id");
foreach (var sheet in sheetRecords.Where(sh => sh.Rid == rid))
@@ -720,14 +710,7 @@ internal async Task> GetDimensionsAsync(CancellationToken canc
{
cancellationToken.ThrowIfCancellationRequested();
- var xmlSettings = XmlReaderHelper.GetXmlReaderSettings(
-#if SYNC_ONLY
- false
-#else
- true
-#endif
- );
-
+ var xmlSettings = XmlReaderHelper.GetXmlReaderSettings();
var ranges = new List();
var sheets = Archive.EntryCollection.Where(e =>
@@ -877,13 +860,7 @@ internal static async Task TryGetMaxRowColumnIndexAs
{
cancellationToken.ThrowIfCancellationRequested();
- var xmlSettings = XmlReaderHelper.GetXmlReaderSettings(
-#if SYNC_ONLY
- false
-#else
- true
-#endif
- );
+ var xmlSettings = XmlReaderHelper.GetXmlReaderSettings();
bool withoutCr = false;
int maxRowIndex = -1;
@@ -997,380 +974,14 @@ internal static async Task TryGetMaxRowColumnIndexAs
return new GetMaxRowColumnIndexResult(true, withoutCr, maxRowIndex, maxColumnIndex);
}
- internal class MergeCellsContext
- {
- public MergeCells? MergeCells { get; set; }
- }
-
-
- [CreateSyncVersion]
- internal static async Task TryGetMergeCellsAsync(ZipArchiveEntry sheetEntry, MergeCellsContext mergeCellsContext, CancellationToken cancellationToken = default)
- {
- cancellationToken.ThrowIfCancellationRequested();
-
- var xmlSettings = XmlReaderHelper.GetXmlReaderSettings(
-#if SYNC_ONLY
- false
-#else
- true
-#endif
- );
- var mergeCells = new MergeCells();
-
- var sheetStream = await sheetEntry.OpenAsync(cancellationToken).ConfigureAwait(false);
- await using var disposableSheetStream = sheetStream.ConfigureAwait(false);
-
- using var reader = XmlReader.Create(sheetStream, xmlSettings);
-
- if (!reader.IsStartElement("worksheet", Ns))
- return false;
-
- while (await reader.ReadAsync().ConfigureAwait(false))
- {
- if (!reader.IsStartElement("mergeCells", Ns))
- continue;
-
- if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false))
- return false;
-
- while (!reader.EOF)
- {
- if (reader.IsStartElement("mergeCell", Ns))
- {
- var refAttr = reader.GetAttribute("ref");
- var refs = refAttr.Split(':');
- if (refs.Length == 1)
- continue;
-
- CellReferenceConverter.TryParseCellReference(refs[0], out var x1, out var y1);
- CellReferenceConverter.TryParseCellReference(refs[1], out var x2, out var y2);
-
- mergeCells.MergesValues.Add(refs[0], null);
-
- // foreach range
- var isFirst = true;
- for (int x = x1; x <= x2; x++)
- {
- for (int y = y1; y <= y2; y++)
- {
- if (!isFirst)
- mergeCells.MergesMap.Add(CellReferenceConverter.GetCellFromCoordinates(x, y), refs[0]);
- isFirst = false;
- }
- }
-
- await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false);
- }
- else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false))
- {
- break;
- }
- }
- }
-
- mergeCellsContext.MergeCells = mergeCells;
- return true;
- }
-
- [CreateSyncVersion]
- internal async Task ReadCommentsAsync(string? sheetName, CancellationToken cancellationToken = default)
- {
- if (string.IsNullOrEmpty(sheetName))
- throw new ArgumentException("sheetName cannot be null or empty", nameof(sheetName));
-
- XNamespace nsRel = Schemas.OpenXmlPackageRelationships;
- XNamespace ns18Tc = Schemas.SpreadsheetmlXmlX18Tc;
- XNamespace nsMain = Schemas.SpreadsheetmlXmlMain;
- XNamespace ns14R = Schemas.SpreadsheetmlXmlX14R;
-
- SetWorkbookRels(Archive.EntryCollection);
- var sheetRecord = _sheetRecords?.SingleOrDefault(s => s.Name.Equals(sheetName, StringComparison.CurrentCultureIgnoreCase));
- if (sheetRecord?.Path?.Split('/')[^1] is not { } sheetFile)
- throw new InvalidDataException($"There is no sheet named {sheetName}");
-
- List people = [];
- if (Archive.GetEntry(ExcelFileNames.Person) is { } persons)
- {
- var personStream = await persons.OpenAsync(cancellationToken).ConfigureAwait(false);
- await using var disposablePersonStream = personStream.ConfigureAwait(false);
-
- var personDoc = await XDocument.LoadAsync(personStream, LoadOptions.None, cancellationToken).ConfigureAwait(false);
- var personElements = personDoc.Root?.Elements(ns18Tc + "person");
- people = personElements
- ?.Select(p => new Author
- {
- Id = Guid.Parse(p.Attribute("id")!.Value),
- DisplayName = p.Attribute("displayName")?.Value is { } name and not "" ? name : "???",
- ProviderId = p.Attribute("providerId")?.Value,
- })
- .ToList() ?? [];
- }
-
- if (Archive.GetEntry($"xl/worksheets/_rels/{sheetFile}.rels") is not { } rel)
- return new CommentResultSet(sheetName, [], []);
-
- var stream = await rel.OpenAsync(cancellationToken).ConfigureAwait(false);
- await using var disposableStream = stream.ConfigureAwait(false);
-
- var relDoc = await XDocument.LoadAsync(stream, LoadOptions.None, cancellationToken).ConfigureAwait(false);
-
- var threadedCommentRels = relDoc.Root?.Elements(nsRel + "Relationship");
- var threadedCommentsElement = threadedCommentRels?.FirstOrDefault(x => x.Attribute("Type")?.Value == Schemas.SpreadsheetmlXmlThreadedCommentRelationship);
- var threadedCommentsTarget = threadedCommentsElement?.Attribute("Target");
- var threadedCommentsPath = threadedCommentsTarget?.Value.TrimStart('.', '/');
-
- var noteRels = relDoc.Root?.Elements(nsRel + "Relationship");
- var notesElement = noteRels?.FirstOrDefault(x => x.Attribute("Type")?.Value == Schemas.SpreadsheetmlXmlCommentsRelationship);
- var notesTarget = notesElement?.Attribute("Target");
- var notesPath = notesTarget?.Value.TrimStart('.', '/');
-
- List commentThreads = [];
- List notes = [];
- HashSet refCells = [];
- if (Archive.GetEntry($"xl/{threadedCommentsPath}") is { } threadEntry)
- {
- var threadEntryStream = await threadEntry.OpenAsync(cancellationToken).ConfigureAwait(false);
- await using var disposableThreadEntryStream = threadEntryStream.ConfigureAwait(false);
-
- var doc = await XDocument.LoadAsync(threadEntryStream, LoadOptions.None, cancellationToken).ConfigureAwait(false);
-
- var commentThreadElements = doc.Root?.Elements(ns18Tc + "threadedComment");
- commentThreads = commentThreadElements
- ?.Where(tc => tc.Attribute("parentId") is null)
- .Select(tc => new ThreadedComment
- {
- Id = Guid.Parse(tc.Attribute("id")!.Value.Trim('{', '}')),
- Author = people.FirstOrDefault(p => p.Id == (Guid.TryParse(tc.Attribute("personId")?.Value, out var person) ? person : Guid.Empty)),
- CreatedAt = DateTime.Parse(tc.Attribute("dT")!.Value, CultureInfo.InvariantCulture),
- ReferenceCell = tc.Attribute("ref")?.Value!,
- Text = tc.Value,
- Resolved = tc.Attribute("done")?.Value is not (null or "0")
- })
- .ToList() ?? [];
-
- var replyElements = doc.Root?.Elements(ns18Tc + "threadedComment");
- var replies = replyElements
- ?.Where(tc => tc.Attribute("parentId") is not null)
- .Select(tc => new ThreadedCommentReply
- {
- Id = Guid.Parse(tc.Attribute("id")!.Value.Trim('{', '}')),
- ParentId = Guid.Parse(tc.Attribute("parentId")!.Value),
- Author = people.FirstOrDefault(p => p.Id == Guid.Parse(tc.Attribute("personId")!.Value)),
- CreatedAt = DateTime.Parse(tc.Attribute("dT")!.Value, CultureInfo.InvariantCulture),
- Text = tc.Value
- })
- .ToLookup(x => x.ParentId);
-
- if (replies is not null)
- {
- foreach (var thread in commentThreads)
- {
- thread.ThreadedComments = replies[thread.Id].ToList();
- }
- }
-
- refCells = [..commentThreads.Select(x => x.ReferenceCell)];
- }
-
- if (Archive.GetEntry($"xl/{notesPath}") is { } noteEntry)
- {
- var noteEntryStream = await noteEntry.OpenAsync(cancellationToken).ConfigureAwait(false);
- await using var disposableNoteEntryStream = noteEntryStream.ConfigureAwait(false);
-
- var doc = await XDocument.LoadAsync(noteEntryStream, LoadOptions.None, cancellationToken).ConfigureAwait(false);
-
- var authorElements = doc.Root?.Element(nsMain + "authors")?.Elements(nsMain + "author");
- var authors = authorElements?.Select(a => a.Value).ToArray();
-
- var commentElements = doc.Root
- ?.Element(nsMain + "commentList")
- ?.Elements(nsMain + "comment");
-
- notes = commentElements
- ?.Where(c => !refCells.Contains(c.Attribute("ref")?.Value))
- .Select(c => new NoteComment
- {
- Id = Guid.TryParse(c.Attribute(ns14R + "uid")?.Value.Trim('{', '}'), out var noteId) ? noteId : Guid.Empty,
- Author = int.TryParse(c.Attribute("authorId")?.Value, out var authorId) ? authors?.ElementAtOrDefault(authorId) : "",
- ReferenceCell = c.Attribute("ref")?.Value,
- Text = string.Join("", GetTextFromComment(c))
- })
- .ToList() ?? [];
- }
-
- return new CommentResultSet(sheetName, commentThreads, notes);
-
- IEnumerable GetTextFromComment(XElement? comment)
- {
- return comment?.Element(nsMain + "text") is { } textElement
- ? textElement.Descendants(nsMain + "t").Select(t => t.Value)
- : [];
- }
- }
-
- ///
- /// Direct mapped query that bypasses dictionary creation for better performance
- ///
- [CreateSyncVersion]
- internal async IAsyncEnumerable QueryMappedAsync(
- string? sheetName,
- [EnumeratorCancellation] CancellationToken cancellationToken = default)
- {
- cancellationToken.ThrowIfCancellationRequested();
-
- var sheetEntry = GetSheetEntry(sheetName);
- var withoutCr = false;
-
- var mergeCellsContext = new MergeCellsContext();
- if (_config.FillMergedCells)
- {
- await TryGetMergeCellsAsync(sheetEntry, mergeCellsContext, cancellationToken).ConfigureAwait(false);
- }
- var mergeCells = _config.FillMergedCells ? mergeCellsContext.MergeCells : null;
-
- // Direct XML reading without dictionary creation
- var xmlSettings = new XmlReaderSettings
- {
- CheckCharacters = false,
- IgnoreWhitespace = true,
- IgnoreComments = true,
- XmlResolver = null,
- Async = true
- };
-
- var sheetStream = await sheetEntry.OpenAsync(cancellationToken).ConfigureAwait(false);
- await using var disposableSheetStream = sheetStream.ConfigureAwait(false);
-
- using var reader = XmlReader.Create(sheetStream, xmlSettings);
-
- if (!reader.IsStartElement("worksheet", Ns))
- yield break;
-
- if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false))
- yield break;
-
- while (!reader.EOF)
- {
- if (reader.IsStartElement("sheetData", Ns))
- {
- if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false))
- continue;
-
- int rowIndex = -1;
- while (!reader.EOF)
- {
- if (reader.IsStartElement("row", Ns))
- {
- if (int.TryParse(reader.GetAttribute("r"), out int arValue))
- rowIndex = arValue - 1; // The row attribute is 1-based
- else
- rowIndex++;
-
- // Read row directly into mapped structure
- await foreach (var mappedRow in ReadMappedRowAsync(reader, rowIndex, withoutCr, mergeCells, cancellationToken).ConfigureAwait(false))
- {
- yield return mappedRow;
- }
- }
- else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false))
- {
- break;
- }
- }
- }
- else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false))
- {
- break;
- }
- }
- }
-
- [CreateSyncVersion]
- private async IAsyncEnumerable ReadMappedRowAsync(
- XmlReader reader,
- int rowIndex,
- bool withoutCr,
- MergeCells? mergeCells,
- [EnumeratorCancellation] CancellationToken cancellationToken = default)
- {
- if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false))
- {
- // Empty row
- yield return new MappedRow(rowIndex);
- yield break;
- }
-
- var row = new MappedRow(rowIndex);
- var columnIndex = withoutCr ? -1 : 0;
-
- while (!reader.EOF)
- {
- if (reader.IsStartElement("c", Ns))
- {
- var aS = reader.GetAttribute("s");
- var aR = reader.GetAttribute("r");
- var aT = reader.GetAttribute("t");
-
- var cellAndColumn = await ReadCellAndSetColumnIndexAsync(reader, columnIndex, withoutCr, 0, aR, aT, cancellationToken).ConfigureAwait(false);
- var cellValue = cellAndColumn.CellValue;
- columnIndex = cellAndColumn.ColumnIndex;
-
- if (_config.FillMergedCells && mergeCells is not null)
- {
- if (mergeCells.MergesValues.ContainsKey(aR))
- {
- mergeCells.MergesValues[aR] = cellValue;
- }
- else if (mergeCells.MergesMap.TryGetValue(aR, out var mergeKey))
- {
- mergeCells.MergesValues.TryGetValue(mergeKey, out cellValue);
- }
- }
-
- if (!string.IsNullOrEmpty(aS)) // Custom style
- {
- if (int.TryParse(aS, NumberStyles.Any, CultureInfo.InvariantCulture, out var styleIndex))
- {
- _style ??= new OpenXmlStyles(Archive);
- cellValue = _style.ConvertValueByStyleFormat(styleIndex, cellValue);
- }
- }
-
- row.SetCell(columnIndex, cellValue);
- }
- else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false))
- {
- break;
- }
- }
-
- yield return row;
- }
-
public void Dispose()
{
- Dispose(true);
- GC.SuppressFinalize(this);
- }
+ if (_disposed)
+ return;
- protected void Dispose(bool disposing)
- {
- if (!_disposed)
- {
- if (disposing)
- {
- if (SharedStrings is SharedStringsDiskCache cache)
- {
- cache.Dispose();
- }
- }
+ if (SharedStrings is SharedStringsDiskCache cache)
+ cache.Dispose();
- _disposed = true;
- }
- }
-
- ~OpenXmlReader()
- {
- Dispose(false);
+ _disposed = true;
}
}
diff --git a/src/MiniExcel.OpenXml/Styles/Builder/SheetStyleBuildContext.cs b/src/MiniExcel.OpenXml/Styles/Builder/SheetStyleBuildContext.cs
index 83d3ccbe..35cd1826 100644
--- a/src/MiniExcel.OpenXml/Styles/Builder/SheetStyleBuildContext.cs
+++ b/src/MiniExcel.OpenXml/Styles/Builder/SheetStyleBuildContext.cs
@@ -49,7 +49,7 @@ public async Task CreateAsync(SheetStyleElementInfos generatedElementInfos, Canc
{
var oldStyleXmlStream = await styleEntry.OpenAsync(cancellationToken).ConfigureAwait(false);
await using var disposableStream = oldStyleXmlStream.ConfigureAwait(false);
- using var reader = XmlReader.Create(oldStyleXmlStream, XmlReaderHelper.GetXmlReaderSettings(isAsync));
+ using var reader = XmlReader.Create(oldStyleXmlStream, XmlReaderHelper.GetXmlReaderSettings());
infos = await ReadSheetStyleElementInfosAsync(reader, cancellationToken).ConfigureAwait(false);
}
@@ -67,20 +67,13 @@ public async Task InitializeAsync(SheetStyleElementInfos generatedElementInfos,
if (_initialized)
throw new InvalidOperationException("The context has already been initialized.");
- const bool isAsync =
-#if SYNC_ONLY
- false;
-#else
- true;
-#endif
-
GeneratedElementInfos = generatedElementInfos;
_oldStyleXmlZipEntry = _archive.Mode == ZipArchiveMode.Update
? _archive.Entries.SingleOrDefault(s => s.FullName == ExcelFileNames.Styles)
: null;
- var xmlReaderSettings = XmlReaderHelper.GetXmlReaderSettings(isAsync);
+ var xmlReaderSettings = XmlReaderHelper.GetXmlReaderSettings();
if (_oldStyleXmlZipEntry is not null)
{
var oldStyleXmlStream = await _oldStyleXmlZipEntry.OpenAsync(cancellationToken).ConfigureAwait(false);
@@ -104,7 +97,16 @@ public async Task InitializeAsync(SheetStyleElementInfos generatedElementInfos,
}
_newXmlWriterStream = await _newStyleXmlZipEntry.OpenAsync(cancellationToken).ConfigureAwait(false);
- NewXmlWriter = XmlWriter.Create(_newXmlWriterStream, new XmlWriterSettings { Indent = true, Encoding = _encoding, Async = isAsync });
+ NewXmlWriter = XmlWriter.Create(
+ _newXmlWriterStream,
+ new XmlWriterSettings
+ {
+ Indent = true,
+ Encoding = _encoding,
+#if !SYNC_ONLY
+ Async = true
+#endif
+ });
_initialized = true;
}
diff --git a/src/MiniExcel.OpenXml/Styles/OpenXmlStyles.cs b/src/MiniExcel.OpenXml/Styles/OpenXmlStyles.cs
index 70937eb3..110d7885 100644
--- a/src/MiniExcel.OpenXml/Styles/OpenXmlStyles.cs
+++ b/src/MiniExcel.OpenXml/Styles/OpenXmlStyles.cs
@@ -2,7 +2,7 @@
namespace MiniExcelLib.OpenXml.Styles;
-internal class OpenXmlStyles
+internal partial class OpenXmlStyles
{
private static readonly string[] Ns = [Schemas.SpreadsheetmlXmlMain, Schemas.SpreadsheetmlXmlStrictNs];
@@ -10,22 +10,30 @@ internal class OpenXmlStyles
private readonly Dictionary _cellStyleXfs = new();
private readonly Dictionary _customFormats = new();
- public OpenXmlStyles(OpenXmlZip zip)
+ private OpenXmlStyles() { }
+
+ [CreateSyncVersion]
+ internal static async Task CreateAsync(OpenXmlZip zip, CancellationToken cancellationToken = default)
{
- using var reader = zip.GetXmlReader(ExcelFileNames.Styles);
- if (reader is null)
- throw new InvalidDataException("The OpenXml styles could not be found, the file might be malformed.");
-
- if (!reader.IsStartElement("styleSheet", Ns))
- return;
- if (!reader.ReadFirstContent())
- return;
+ if (zip.GetEntry(ExcelFileNames.Styles) is not { } entry)
+ throw new InvalidDataException("The OpenXml styles.xml file could not be found, the document might be malformed.");
+
+ var entryStream = await entry.OpenAsync(cancellationToken).ConfigureAwait(false);
+ await using var disposableEntryStream = entryStream.ConfigureAwait(false);
+ using var reader = XmlReader.Create(entryStream, XmlReaderHelper.GetXmlReaderSettings());
+ var openXmlStyles = new OpenXmlStyles();
+ if (!reader.IsStartElement("styleSheet", Ns) ||
+ !await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false))
+ {
+ return openXmlStyles;
+ }
+
while (!reader.EOF)
{
if (reader.IsStartElement("cellXfs", Ns))
{
- if (!XmlReaderHelper.ReadFirstContent(reader))
+ if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false))
continue;
var index = 0;
@@ -35,17 +43,17 @@ public OpenXmlStyles(OpenXmlZip zip)
{
int.TryParse(reader.GetAttribute("xfId"), out var xfId);
int.TryParse(reader.GetAttribute("numFmtId"), out var numFmtId);
- _cellXfs.Add(index, new StyleRecord() { XfId = xfId, NumFmtId = numFmtId });
- reader.Skip();
+ openXmlStyles._cellXfs.Add(index, new StyleRecord { XfId = xfId, NumFmtId = numFmtId });
+ await reader.SkipAsync().ConfigureAwait(false);
index++;
}
- else if (!reader.SkipContent())
+ else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false))
break;
}
}
else if (reader.IsStartElement("cellStyleXfs", Ns))
{
- if (!reader.ReadFirstContent())
+ if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false))
continue;
var index = 0;
@@ -56,11 +64,11 @@ public OpenXmlStyles(OpenXmlZip zip)
int.TryParse(reader.GetAttribute("xfId"), out var xfId);
int.TryParse(reader.GetAttribute("numFmtId"), out var numFmtId);
- _cellStyleXfs.Add(index, new StyleRecord() { XfId = xfId, NumFmtId = numFmtId });
- reader.Skip();
+ openXmlStyles._cellStyleXfs.Add(index, new StyleRecord() { XfId = xfId, NumFmtId = numFmtId });
+ await reader.SkipAsync().ConfigureAwait(false);
index++;
}
- else if (!reader.SkipContent())
+ else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false))
{
break;
}
@@ -68,7 +76,7 @@ public OpenXmlStyles(OpenXmlZip zip)
}
else if (reader.IsStartElement("numFmts", Ns))
{
- if (!reader.ReadFirstContent())
+ if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false))
continue;
while (!reader.EOF)
@@ -85,20 +93,22 @@ public OpenXmlStyles(OpenXmlZip zip)
type = typeof(DateTime?);
}
- _customFormats.TryAdd(numFmtId, new NumberFormatString(formatCode, type));
- reader.Skip();
+ openXmlStyles._customFormats.TryAdd(numFmtId, new NumberFormatString(formatCode, type));
+ await reader.SkipAsync().ConfigureAwait(false);
}
- else if (!reader.SkipContent())
+ else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false))
{
break;
}
}
}
- else if (!reader.SkipContent())
+ else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false))
{
break;
}
}
+
+ return openXmlStyles;
}
internal NumberFormatString? GetStyleFormat(int index)
diff --git a/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.Impl.cs b/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.Impl.cs
index fa170553..367704b2 100644
--- a/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.Impl.cs
+++ b/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.Impl.cs
@@ -112,7 +112,7 @@ private void GetMergeCells(XElement worksheet)
}
}
- private static IEnumerable NewParseConditionalFormatRanges(XElement worksheet)
+ private static IEnumerable ParseConditionalFormatRanges(XElement worksheet)
{
var conditionalFormatting = worksheet.Element(SpreadsheetNs + "conditionalFormatting");
if (conditionalFormatting is null)
@@ -173,7 +173,7 @@ private async Task WriteSheetXmlAsync(XmlWriter writer, XElement worksheet, XEle
{
// TODO: Can we make this less complex?
- var conditionalFormatRanges = NewParseConditionalFormatRanges(worksheet).ToList();
+ var conditionalFormatRanges = ParseConditionalFormatRanges(worksheet).ToList();
var newConditionalFormatRanges = new List();
newConditionalFormatRanges.AddRange(conditionalFormatRanges);
diff --git a/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.MergeCells.cs b/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.MergeCells.cs
index 1ff7f05d..334db75e 100644
--- a/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.MergeCells.cs
+++ b/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.MergeCells.cs
@@ -1,3 +1,5 @@
+using MiniExcelLib.OpenXml.Reader;
+
namespace MiniExcelLib.OpenXml.Templates;
internal partial class OpenXmlTemplate
diff --git a/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.cs b/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.cs
index d14b36b5..e7d71c91 100644
--- a/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.cs
+++ b/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.cs
@@ -1,3 +1,4 @@
+using MiniExcelLib.OpenXml.Reader;
using CalcChainHelper = MiniExcelLib.OpenXml.Utils.CalcChainHelper;
namespace MiniExcelLib.OpenXml.Templates;
diff --git a/src/MiniExcel.OpenXml/Utils/OpenXmlZip.cs b/src/MiniExcel.OpenXml/Utils/OpenXmlZip.cs
index 28387d13..f32027d4 100644
--- a/src/MiniExcel.OpenXml/Utils/OpenXmlZip.cs
+++ b/src/MiniExcel.OpenXml/Utils/OpenXmlZip.cs
@@ -5,13 +5,6 @@ namespace MiniExcelLib.OpenXml.Utils;
/// Copied & modified from ExcelDataReader ZipWorker @MIT License
internal sealed partial class OpenXmlZip : IDisposable, IAsyncDisposable
{
- private static readonly XmlReaderSettings XmlSettings = new()
- {
- IgnoreComments = true,
- IgnoreWhitespace = true,
- XmlResolver = null,
- };
-
private bool _disposed;
internal ZipArchive ZipFile { get; }
@@ -51,10 +44,6 @@ internal static async Task CreateAsync(Stream fileStream, ZipArchive
public ZipArchiveEntry? GetEntry(string path) => Entries.GetValueOrDefault(path);
- public XmlReader? GetXmlReader(string path) => GetEntry(path) is { } entry
- ? XmlReader.Create(entry.Open(), XmlSettings)
- : null;
-
public void Dispose()
{
diff --git a/src/MiniExcel.OpenXml/Utils/XmlReaderHelper.cs b/src/MiniExcel.OpenXml/Utils/XmlReaderHelper.cs
index 2266bfb8..dacd9e0b 100644
--- a/src/MiniExcel.OpenXml/Utils/XmlReaderHelper.cs
+++ b/src/MiniExcel.OpenXml/Utils/XmlReaderHelper.cs
@@ -141,13 +141,7 @@ private static async Task ReadRichTextRunAsync(this XmlReader reader, Ca
[CreateSyncVersion]
public static async IAsyncEnumerable GetSharedStringsAsync(Stream stream, [EnumeratorCancellation]CancellationToken cancellationToken = default, params string[] nss)
{
- var xmlSettings = GetXmlReaderSettings(
-#if SYNC_ONLY
- false
-#else
- true
-#endif
- );
+ var xmlSettings = GetXmlReaderSettings();
using var reader = XmlReader.Create(stream, xmlSettings);
if (!reader.IsStartElement("sst", nss))
@@ -170,11 +164,14 @@ public static async IAsyncEnumerable GetSharedStringsAsync(Stream stream
}
}
- internal static XmlReaderSettings GetXmlReaderSettings(bool async) => new()
+ internal static XmlReaderSettings GetXmlReaderSettings(bool forceSynchronous = false) => new()
{
+ CheckCharacters = false,
IgnoreComments = true,
IgnoreWhitespace = true,
XmlResolver = null,
- Async = async
+#if !SYNC_ONLY
+ Async = !forceSynchronous
+#endif
};
}
diff --git a/src/MiniExcel.OpenXml/Writer/OpenXmlWriter.CopyInsert.cs b/src/MiniExcel.OpenXml/Writer/OpenXmlWriter.CopyInsert.cs
index 24b0f3aa..cef48071 100644
--- a/src/MiniExcel.OpenXml/Writer/OpenXmlWriter.CopyInsert.cs
+++ b/src/MiniExcel.OpenXml/Writer/OpenXmlWriter.CopyInsert.cs
@@ -1,3 +1,4 @@
+using MiniExcelLib.OpenXml.Reader;
using MiniExcelLib.OpenXml.Styles.Builder;
namespace MiniExcelLib.OpenXml.Writer;
@@ -48,7 +49,7 @@ public async Task CopyAndInsertAsync(bool overwriteSheet = false, IProgress
using var disposableNewArchive = _archive;
#endif
using var reader = await OpenXmlReader.CreateAsync(_oldStream!, _configuration, cancellationToken: cancellationToken).ConfigureAwait(false);
- var rels = await reader.GetWorkbookRelsAsync(_oldArchive!.Entries, cancellationToken).ConfigureAwait(false) ?? [];
+ var rels = await OpenXmlReader.GetWorkbookRelsAsync(_oldArchive!.Entries, cancellationToken).ConfigureAwait(false) ?? [];
_sheets.AddRange(rels
.OrderBy(sheet => sheet.Id)
diff --git a/src/MiniExcel.OpenXml/Writer/OpenXmlWriter.cs b/src/MiniExcel.OpenXml/Writer/OpenXmlWriter.cs
index e59694df..a0af25a3 100644
--- a/src/MiniExcel.OpenXml/Writer/OpenXmlWriter.cs
+++ b/src/MiniExcel.OpenXml/Writer/OpenXmlWriter.cs
@@ -1,9 +1,10 @@
using MiniExcelLib.Core.WriteAdapters;
+using MiniExcelLib.OpenXml.Reader;
using MiniExcelLib.OpenXml.Styles.Builder;
namespace MiniExcelLib.OpenXml.Writer;
-internal partial class OpenXmlWriter : IMiniExcelWriter
+internal sealed partial class OpenXmlWriter : IMiniExcelWriter
{
private static readonly UTF8Encoding Utf8WithBom = new(true);
@@ -105,7 +106,7 @@ public async Task InsertAsync(bool overwriteSheet = false, IProgress?
await using var sbc = _sheetStyleBuilderContext.ConfigureAwait(false);
using var reader = await OpenXmlReader.CreateAsync(_stream, _configuration, cancellationToken: cancellationToken).ConfigureAwait(false);
- var rels = await reader.GetWorkbookRelsAsync(_archive.Entries, cancellationToken).ConfigureAwait(false) ?? [];
+ var rels = await OpenXmlReader.GetWorkbookRelsAsync(_archive.Entries, cancellationToken).ConfigureAwait(false) ?? [];
_sheets.AddRange(rels
.OrderBy(sheet => sheet.Id)
@@ -674,7 +675,7 @@ private async Task CreateZipEntryAsync(string path, string? contentType, string
[CreateSyncVersion]
/* Todo: this method is not very efficient, but workbook.xml is generally a very small file so at the moment it's not worth over-optimizing it.
- Also, consider adding active sheet as one of the editable properties. */
+ Also, consider adding active sheet as one of the editable properties.*/
internal async Task AlterWorksheetAsync(string sheetName, string? newSheetName, int? newSheetIndex, SheetState? newSheetState, CancellationToken cancellationToken = default)
{
if (newSheetName is null && newSheetIndex is null && newSheetState is null)
diff --git a/tests/MiniExcel.OpenXml.Tests/MiniExcelIssueAsyncTests.cs b/tests/MiniExcel.OpenXml.Tests/MiniExcelIssueAsyncTests.cs
index 981feab2..be0bad0c 100644
--- a/tests/MiniExcel.OpenXml.Tests/MiniExcelIssueAsyncTests.cs
+++ b/tests/MiniExcel.OpenXml.Tests/MiniExcelIssueAsyncTests.cs
@@ -953,7 +953,7 @@ public async Task Issue193()
};
await _excelTemplater.FillTemplateAsync(path, templatePath, value);
- foreach (var sheetName in await _excelImporter.GetSheetNamesAsync(path))
+ foreach (var sheetName in await _excelImporter.GetSheetNamesAsync(path))
{
var rows = await _excelImporter.QueryAsync(path, sheetName: sheetName).ToListAsync();
Assert.Equal(9, rows.Count);
diff --git a/tests/MiniExcel.OpenXml.Tests/MiniExcelOpenXmlConfigurationTest.cs b/tests/MiniExcel.OpenXml.Tests/MiniExcelOpenXmlConfigurationTest.cs
index 251d7396..ca540042 100644
--- a/tests/MiniExcel.OpenXml.Tests/MiniExcelOpenXmlConfigurationTest.cs
+++ b/tests/MiniExcel.OpenXml.Tests/MiniExcelOpenXmlConfigurationTest.cs
@@ -23,7 +23,7 @@ public async Task DisableWriteFilePathTest()
await _excelExporter.ExportAsync(path, value, configuration: new OpenXmlConfiguration { EnableWriteFilePath = false }, overwriteFile: true);
Assert.True(File.Exists(path));
- var rows = await _excelImporter.QueryAsync(path).CreateListAsync();
+ var rows = await _excelImporter.QueryAsync(path).ToListAsync();
Assert.True(rows.All(x => x.Img is null or []));
}
diff --git a/tests/MiniExcel.OpenXml.Tests/Tables/MiniExcelOpenXmlTableAsyncTests.cs b/tests/MiniExcel.OpenXml.Tests/Tables/MiniExcelOpenXmlTableAsyncTests.cs
new file mode 100644
index 00000000..4dad49db
--- /dev/null
+++ b/tests/MiniExcel.OpenXml.Tests/Tables/MiniExcelOpenXmlTableAsyncTests.cs
@@ -0,0 +1,132 @@
+using MiniExcelLib.Tests.Common.Utils;
+
+namespace MiniExcelLib.OpenXml.Tests.Tables;
+
+public class MiniExcelOpenXmlTableAsyncTests
+{
+ private readonly OpenXmlImporter _excelImporter = MiniExcel.Importers.GetOpenXmlImporter();
+
+ ///
+ /// Tests querying a named table from a file path with dynamic results.
+ ///
+ [Fact]
+ public async Task QueryTableAsync_FromFilePath_ReturnsDynamicRows()
+ {
+ // Arrange
+ var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx");
+
+ // Act
+ var rows = await _excelImporter.QueryTableAsync(path, "Sheet1", "Table1").ToListAsync();
+
+ // Assert
+ Assert.Equal(3, rows.Count);
+ Assert.Equal("aaa", rows[0].Col1);
+ Assert.Equal(123D, rows[0].Col2);
+ Assert.Equal(new DateTime(2026, 5, 17), rows[0].Col3);
+ }
+
+ ///
+ /// Tests querying a named table from a stream with dynamic results.
+ ///
+ [Fact]
+ public async Task QueryTableAsync_FromStream_ReturnsDynamicRows()
+ {
+ // Arrange
+ var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx");
+ await using var stream = File.OpenRead(path);
+
+ // Act
+ var rows = await _excelImporter.QueryTableAsync(stream, "Sheet1", "Table1").ToListAsync();
+
+ // Assert
+ Assert.Equal(3, rows.Count);
+ Assert.Equal("bbb", rows[1].Col1);
+ Assert.Equal(456D, rows[1].Col2);
+ Assert.Equal(new DateTime(2026, 5, 18), rows[1].Col3);
+ }
+
+ ///
+ /// Tests querying a named table from a file path with strongly-typed results.
+ ///
+ [Fact]
+ public async Task QueryTableAsync_Generic_FromFilePath_ReturnsTypedRows()
+ {
+ // Arrange
+ var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx");
+
+ // Act
+ var rows = await _excelImporter.QueryTableAsync(path, "Sheet1", "Table1").ToListAsync();
+
+ // Assert
+ Assert.Equal(3, rows.Count);
+ Assert.Equal("aaa", rows[0].Col1);
+ Assert.Equal(123D, rows[0].Col2);
+ Assert.Equal(new DateTime(2026, 5, 17), rows[0].Col3);
+ }
+
+ ///
+ /// Tests querying a named table from a stream with strongly-typed results.
+ ///
+ [Fact]
+ public async Task QueryTableAsync_Generic_FromStream_ReturnsTypedRows()
+ {
+ // Arrange
+ var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx");
+ await using var stream = File.OpenRead(path);
+
+ // Act
+ var rows = await _excelImporter.QueryTableAsync(stream).ToListAsync();
+
+ // Assert
+ Assert.Equal(3, rows.Count);
+ Assert.Equal("ccc", rows[2].Col1);
+ Assert.Equal(789D, rows[2].Col2);
+ Assert.Equal(new DateTime(2026, 5, 19), rows[2].Col3);
+ }
+
+ ///
+ /// Tests querying multiple tables from the same sheet.
+ ///
+ [Fact]
+ public async Task QueryTableAsync_MultipleTablesInSheet_ReturnsCorrectTableData()
+ {
+ // Arrange
+ var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx");
+
+ // Act
+ var table1 = await _excelImporter.QueryTableAsync(path).ToListAsync();
+ var table2 = await _excelImporter.QueryTableAsync(path, "Sheet1", "Table2").ToListAsync();
+
+ // Assert
+ Assert.NotEmpty(table1);
+ Assert.NotEmpty(table2);
+
+ // Assert
+ Assert.Equal(3, table1.Count);
+ Assert.Equal("aaa", table1[0].Col1);
+ Assert.Equal(123D, table1[0].Col2);
+ Assert.Equal(new DateTime(2026, 5, 17), table1[0].Col3);
+
+ Assert.Equal(2, table2.Count);
+ Assert.Equal("test", table2[0].Prop1);
+ Assert.Equal(11D, table2[0].Prop2);
+ Assert.Equal("aaa", table2[0].Prop3);
+ Assert.Equal(new TimeSpan(10, 30, 0), table2[0].Prop4.TimeOfDay);
+ }
+
+ ///
+ /// Tests QueryTableAsync with custom sheet and table names.
+ ///
+ [Fact]
+ public async Task QueryTableAsync_WithCustomSheetAndTableNames_ReturnsCorrectData()
+ {
+ // Arrange
+ var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx");
+
+ // Act
+ var rows = await _excelImporter.QueryTableAsync(path, "CustomSheet", "CustomTable").ToListAsync();
+
+ // Assert
+ Assert.NotEmpty(rows);
+ }
+}
diff --git a/tests/MiniExcel.OpenXml.Tests/Tables/MiniExcelOpenXmlTableTests.cs b/tests/MiniExcel.OpenXml.Tests/Tables/MiniExcelOpenXmlTableTests.cs
new file mode 100644
index 00000000..f282521a
--- /dev/null
+++ b/tests/MiniExcel.OpenXml.Tests/Tables/MiniExcelOpenXmlTableTests.cs
@@ -0,0 +1,133 @@
+using MiniExcelLib.Tests.Common.Utils;
+
+namespace MiniExcelLib.OpenXml.Tests.Tables;
+
+public class MiniExcelOpenXmlTableTests
+{
+ private readonly OpenXmlImporter _excelImporter = MiniExcel.Importers.GetOpenXmlImporter();
+
+
+ ///
+ /// Tests querying a named table from a file path with dynamic results.
+ ///
+ [Fact]
+ public void QueryTable_FromFilePath_ReturnsDynamicRows()
+ {
+ // Arrange
+ var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx");
+
+ // Act
+ var rows = _excelImporter.QueryTable(path, "Sheet1", "Table1").ToList();
+
+ // Assert
+ Assert.Equal(3, rows.Count);
+ Assert.Equal("aaa", rows[0].Col1);
+ Assert.Equal(123D, rows[0].Col2);
+ Assert.Equal(new DateTime(2026, 5, 17), rows[0].Col3);
+ }
+
+ ///
+ /// Tests querying a named table from a stream with dynamic results.
+ ///
+ [Fact]
+ public void QueryTable_FromStream_ReturnsDynamicRows()
+ {
+ // Arrange
+ var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx");
+ using var stream = File.OpenRead(path);
+
+ // Act
+ var rows = _excelImporter.QueryTable(stream, "Sheet1", "Table1").ToList();
+
+ // Assert
+ Assert.Equal(3, rows.Count);
+ Assert.Equal("bbb", rows[1].Col1);
+ Assert.Equal(456D, rows[1].Col2);
+ Assert.Equal(new DateTime(2026, 5, 18), rows[1].Col3);
+ }
+
+ ///
+ /// Tests querying a named table from a file path with strongly-typed results.
+ ///
+ [Fact]
+ public void QueryTable_Generic_FromFilePath_ReturnsTypedRows()
+ {
+ // Arrange
+ var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx");
+
+ // Act
+ var rows = _excelImporter.QueryTable(path, "Sheet1", "Table1").ToList();
+
+ // Assert
+ Assert.Equal(3, rows.Count);
+ Assert.Equal("aaa", rows[0].Col1);
+ Assert.Equal(123D, rows[0].Col2);
+ Assert.Equal(new DateTime(2026, 5, 17), rows[0].Col3);
+ }
+
+ ///
+ /// Tests querying a named table from a stream with strongly-typed results.
+ ///
+ [Fact]
+ public void QueryTable_Generic_FromStream_ReturnsTypedRows()
+ {
+ // Arrange
+ var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx");
+ using var stream = File.OpenRead(path);
+
+ // Act
+ var rows = _excelImporter.QueryTable(stream).ToList();
+
+ // Assert
+ Assert.Equal(3, rows.Count);
+ Assert.Equal("ccc", rows[2].Col1);
+ Assert.Equal(789D, rows[2].Col2);
+ Assert.Equal(new DateTime(2026, 5, 19), rows[2].Col3);
+ }
+
+ ///
+ /// Tests querying multiple tables from the same sheet.
+ ///
+ [Fact]
+ public void QueryTable_MultipleTablesInSheet_ReturnsCorrectTableData()
+ {
+ // Arrange
+ var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx");
+
+ // Act
+ var table1 = _excelImporter.QueryTable(path).ToList();
+ var table2 = _excelImporter.QueryTable(path, "Sheet1", "Table2").ToList();
+
+ // Assert
+ Assert.NotEmpty(table1);
+ Assert.NotEmpty(table2);
+
+ // Assert
+ Assert.Equal(3, table1.Count);
+ Assert.Equal("aaa", table1[0].Col1);
+ Assert.Equal(123D, table1[0].Col2);
+ Assert.Equal(new DateTime(2026, 5, 17), table1[0].Col3);
+
+ Assert.Equal(2, table2.Count);
+ Assert.Equal("test", table2[0].Prop1);
+ Assert.Equal(11D, table2[0].Prop2);
+ Assert.Equal("aaa", table2[0].Prop3);
+ Assert.Equal(new TimeSpan(10, 30, 0), table2[0].Prop4.TimeOfDay);
+ }
+
+ ///
+ /// Tests QueryTable with custom sheet and table names.
+ ///
+ [Fact]
+ public void QueryTable_WithCustomSheetAndTableNames_ReturnsCorrectData()
+ {
+ // Arrange
+ var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx");
+
+ // Act
+ var rows = _excelImporter.QueryTable(path, "CustomSheet", "CustomTable").ToList();
+
+ // Assert
+ Assert.NotEmpty(rows);
+ }
+}
diff --git a/tests/MiniExcel.OpenXml.Tests/Tables/Models.cs b/tests/MiniExcel.OpenXml.Tests/Tables/Models.cs
new file mode 100644
index 00000000..674e6df6
--- /dev/null
+++ b/tests/MiniExcel.OpenXml.Tests/Tables/Models.cs
@@ -0,0 +1,8 @@
+namespace MiniExcelLib.OpenXml.Tests.Tables;
+
+internal class QueryTableTestModel
+{
+ public string? Col1 { get; set; }
+ public int Col2 { get; set; }
+ public DateTime Col3 { get; set; }
+}
diff --git a/tests/data/xlsx/TestQueryTable.xlsx b/tests/data/xlsx/TestQueryTable.xlsx
new file mode 100644
index 00000000..c5039015
Binary files /dev/null and b/tests/data/xlsx/TestQueryTable.xlsx differ