diff --git a/README-V2.md b/README-V2.md index 5127a44c..575e78d4 100644 --- a/README-V2.md +++ b/README-V2.md @@ -1757,6 +1757,16 @@ var importer = MiniExcel.Importers.GetOpenXmlImporter(); var dim = importer.GetSheetDimensions(path); ``` +#### 8. Retrieve Table Data + +It is possible to query arbitrary tables from any worksheet. +You can either keep it dynamic or map it to a strong-typed object like reqular queries: + +```csharp +var importer = MiniExcel.Importers.GetOpenXmlImporter(); +var rows = importer.QueryTable(stream, "Sheet1", "YourTable").ToList(); +``` + ### FAQ #### Q: Excel header title is not equal to my DTO class property name, how do I map it? diff --git a/src/MiniExcel.Core/Reflection/MiniExcelMapper.cs b/src/MiniExcel.Core/Reflection/MiniExcelMapper.cs index fb82fc7d..3997ef01 100644 --- a/src/MiniExcel.Core/Reflection/MiniExcelMapper.cs +++ b/src/MiniExcel.Core/Reflection/MiniExcelMapper.cs @@ -56,11 +56,11 @@ public static partial class MiniExcelMapper //Q: Why need to check every time? A: it needs to check everytime, because it's dictionary object? itemValue = null; - if (map.ExcelIndexName is not null && (keys?.Contains(map.ExcelIndexName) is true)) + if (map.ExcelIndexName is not null && keys?.Contains(map.ExcelIndexName) is true) { item.TryGetValue(map.ExcelIndexName, out itemValue); } - else if (map.ExcelColumnName is not null && (headersDic?.TryGetValue(map.ExcelColumnName, out var columnId) is true)) + else if (map.ExcelColumnName is not null && headersDic?.TryGetValue(map.ExcelColumnName, out var columnId) is true) { var columnName = keys[columnId]; item.TryGetValue(columnName, out itemValue); diff --git a/src/MiniExcel.OpenXml/Api/OpenXmlImporter.cs b/src/MiniExcel.OpenXml/Api/OpenXmlImporter.cs index 113abdc8..45e678ac 100644 --- a/src/MiniExcel.OpenXml/Api/OpenXmlImporter.cs +++ b/src/MiniExcel.OpenXml/Api/OpenXmlImporter.cs @@ -1,3 +1,5 @@ +using MiniExcelLib.OpenXml.Reader; + // ReSharper disable once CheckNamespace namespace MiniExcelLib.OpenXml; @@ -326,7 +328,7 @@ public async Task> GetSheetNamesAsync(Stream stream, bool leaveOpen await using var disposableArchive = archive.ConfigureAwait(false); using var reader = await OpenXmlReader.CreateAsync(stream, null, leaveOpen, cancellationToken).ConfigureAwait(false); - var rels = await reader.GetWorkbookRelsAsync(archive.EntryCollection, cancellationToken).ConfigureAwait(false); + var rels = await OpenXmlReader.GetWorkbookRelsAsync(archive.EntryCollection, cancellationToken).ConfigureAwait(false); return rels?.Select(s => s.Name).ToList() ?? []; } @@ -366,7 +368,7 @@ public async Task> GetSheetInformationsAsync(Stream stream, bool await using var disposableArchve = archive.ConfigureAwait(false); using var reader = await OpenXmlReader.CreateAsync(stream, null, leaveOpen, cancellationToken).ConfigureAwait(false); - var rels = await reader.GetWorkbookRelsAsync(archive.EntryCollection, cancellationToken).ConfigureAwait(false); + var rels = await OpenXmlReader.GetWorkbookRelsAsync(archive.EntryCollection, cancellationToken).ConfigureAwait(false); return rels?.Select((s, i) => s.ToSheetInfo((uint)i)).ToList() ?? []; } @@ -433,7 +435,6 @@ public async Task> GetColumnNamesAsync(string path, bool has return await GetColumnNamesAsync(stream, hasHeaderRow, sheetName, startCell, false, cancellationToken).ConfigureAwait(false); } - /// /// Retrieves the column names from the first row (header row) of an Excel sheet. /// @@ -497,6 +498,94 @@ public async Task RetrieveCommentsAsync(Stream stream, string? return await reader.ReadCommentsAsync(sheetName, cancellationToken).ConfigureAwait(false); } + /// + /// Queries a named table in an Excel worksheet and returns dynamic objects representing each row. + /// + /// The path to the Excel document. + /// The name of the worksheet containing the table. Default is "Sheet1". + /// The name of the table to query. Default is "Table1". + /// A token to cancel the asynchronous operation. + /// + /// Named tables in Excel are structured data ranges with defined column headers and a unique name. + /// This method reads from the specified table within a stream and yields rows as dynamic objects with properties based on the table's column names. + /// + [CreateSyncVersion] + public async IAsyncEnumerable QueryTableAsync(string path, string sheetName = "Sheet1", string tableName = "Table1", [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + var stream = FileHelper.OpenSharedRead(path); + await using var disposableStream = stream.ConfigureAwait(false); + + using var reader = await OpenXmlReader.CreateAsync(stream, null, false, cancellationToken).ConfigureAwait(false); + await foreach (var table in reader.QueryTableAsync(sheetName, tableName, false, cancellationToken).ConfigureAwait(false)) + yield return table; + } + + /// + /// Queries a named table in an Excel worksheet and returns dynamic objects representing each row. + /// + /// The stream containing the Excel file data. The stream position is not reset after reading. + /// The name of the worksheet containing the table. Default is "Sheet1". + /// The name of the table to query. Default is "Table1". + /// True to leave the stream open after the query is completed, otherwise false. + /// A token to cancel the asynchronous operation. + /// + /// Named tables in Excel are structured data ranges with defined column headers and a unique name. + /// This method reads from the specified table within a stream and yields rows as dynamic objects with properties based on the table's column names. + /// + [CreateSyncVersion] + public async IAsyncEnumerable QueryTableAsync(Stream stream, string sheetName = "Sheet1", string tableName = "Table1", bool leaveOpen = false, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + using var reader = await OpenXmlReader.CreateAsync(stream, null, leaveOpen, cancellationToken).ConfigureAwait(false); + await foreach (var table in reader.QueryTableAsync(sheetName, tableName, false, cancellationToken).ConfigureAwait(false)) + yield return table; + } + + /// + /// Queries a named table in an Excel worksheet and returns strongly-typed objects representing each row. + /// + /// The class type to map each row to. Must have a parameterless constructor. Property names should match the table's column names. + /// The path to the Excel document. The stream position is not reset after reading. + /// The name of the worksheet containing the table. Default is "Sheet1". + /// The name of the table to query. Default is "Table1". + /// A token to cancel the asynchronous operation. + /// + /// Named tables in Excel are structured data ranges with defined column headers and a unique name. + /// This method reads from the specified table within a stream and maps each row to an instance of the provided type. The mapping is based on property/field names matching column headers. + /// + [CreateSyncVersion] + public async IAsyncEnumerable QueryTableAsync(string path, string sheetName = "Sheet1", string tableName = "Table1", [EnumeratorCancellation] CancellationToken cancellationToken = default) + where T : class, new() + { + var stream = FileHelper.OpenSharedRead(path); + await using var disposableStream = stream.ConfigureAwait(false); + + using var reader = await OpenXmlReader.CreateAsync(stream, null, false, cancellationToken).ConfigureAwait(false); + await foreach (var table in reader.QueryTableAsync(sheetName, tableName, cancellationToken).ConfigureAwait(false)) + yield return table; + } + + /// + /// Queries a named table in an Excel worksheet and returns strongly-typed objects representing each row. + /// + /// The class type to map each row to. Must have a parameterless constructor. Property names should match the table's column names. + /// The stream containing the Excel file data. The stream position is not reset after reading. + /// The name of the worksheet containing the table. Default is "Sheet1". + /// The name of the table to query. Default is "Table1". + /// True to leave the stream open after the query is completed, otherwise false. + /// A token to cancel the asynchronous operation. + /// + /// Named tables in Excel are structured data ranges with defined column headers and a unique name. + /// This method reads from the specified table within a stream and maps each row to an instance of the provided type. The mapping is based on property/field names matching column headers. + /// + [CreateSyncVersion] + public async IAsyncEnumerable QueryTableAsync(Stream stream, string sheetName = "Sheet1", string tableName = "Table1", bool leaveOpen = false, [EnumeratorCancellation] CancellationToken cancellationToken = default) + where T : class, new() + { + using var reader = await OpenXmlReader.CreateAsync(stream, null, leaveOpen, cancellationToken).ConfigureAwait(false); + await foreach (var table in reader.QueryTableAsync(sheetName, tableName, cancellationToken).ConfigureAwait(false)) + yield return table; + } + #endregion #region DataReader diff --git a/src/MiniExcel.OpenXml/FluentMapping/Configuration/CollectionMappingBuilder.cs b/src/MiniExcel.OpenXml/FluentMapping/Configuration/CollectionMappingBuilder.cs index 13fa931c..f2251049 100644 --- a/src/MiniExcel.OpenXml/FluentMapping/Configuration/CollectionMappingBuilder.cs +++ b/src/MiniExcel.OpenXml/FluentMapping/Configuration/CollectionMappingBuilder.cs @@ -1,5 +1,3 @@ -using System.Text.RegularExpressions; - namespace MiniExcelLib.OpenXml.FluentMapping.Configuration; internal partial class CollectionMappingBuilder : ICollectionMappingBuilder where TCollection : IEnumerable diff --git a/src/MiniExcel.OpenXml/FluentMapping/Configuration/PropertyMappingBuilder.cs b/src/MiniExcel.OpenXml/FluentMapping/Configuration/PropertyMappingBuilder.cs index 03e0eb9b..8dc01a67 100644 --- a/src/MiniExcel.OpenXml/FluentMapping/Configuration/PropertyMappingBuilder.cs +++ b/src/MiniExcel.OpenXml/FluentMapping/Configuration/PropertyMappingBuilder.cs @@ -1,5 +1,3 @@ -using System.Text.RegularExpressions; - namespace MiniExcelLib.OpenXml.FluentMapping.Configuration; internal partial class PropertyMappingBuilder : IPropertyMappingBuilder diff --git a/src/MiniExcel.OpenXml/FluentMapping/Helpers/ConversionHelper.cs b/src/MiniExcel.OpenXml/FluentMapping/Helpers/ConversionHelper.cs index 40089cae..a0838cd4 100644 --- a/src/MiniExcel.OpenXml/FluentMapping/Helpers/ConversionHelper.cs +++ b/src/MiniExcel.OpenXml/FluentMapping/Helpers/ConversionHelper.cs @@ -1,6 +1,4 @@ using System.Collections.Concurrent; -using System.Globalization; -using System.Reflection; namespace MiniExcelLib.OpenXml.FluentMapping.Helpers; diff --git a/src/MiniExcel.OpenXml/FluentMapping/Helpers/MappingMetadataExtractor.cs b/src/MiniExcel.OpenXml/FluentMapping/Helpers/MappingMetadataExtractor.cs index 0a260557..a8336013 100644 --- a/src/MiniExcel.OpenXml/FluentMapping/Helpers/MappingMetadataExtractor.cs +++ b/src/MiniExcel.OpenXml/FluentMapping/Helpers/MappingMetadataExtractor.cs @@ -1,6 +1,3 @@ -using System.Reflection; -using MiniExcelLib.Core.Helpers; -using MiniExcelLib.Core.Reflection; using MiniExcelLib.OpenXml.FluentMapping.Helpers; namespace MiniExcelLib.OpenXml.FluentMapping; diff --git a/src/MiniExcel.OpenXml/FluentMapping/MappingCellStream.cs b/src/MiniExcel.OpenXml/FluentMapping/MappingCellStream.cs index 403cd52c..f7ecf324 100644 --- a/src/MiniExcel.OpenXml/FluentMapping/MappingCellStream.cs +++ b/src/MiniExcel.OpenXml/FluentMapping/MappingCellStream.cs @@ -1,5 +1,3 @@ -using MiniExcelLib.Core.Abstractions; - namespace MiniExcelLib.OpenXml.FluentMapping; internal readonly struct MappingCellStream(IEnumerable items, CompiledMapping mapping, string[] columnLetters) : IMappingCellStream diff --git a/src/MiniExcel.OpenXml/FluentMapping/MappingCellStreamAdapter.cs b/src/MiniExcel.OpenXml/FluentMapping/MappingCellStreamAdapter.cs index 0596742e..2274d79a 100644 --- a/src/MiniExcel.OpenXml/FluentMapping/MappingCellStreamAdapter.cs +++ b/src/MiniExcel.OpenXml/FluentMapping/MappingCellStreamAdapter.cs @@ -1,6 +1,3 @@ -using MiniExcelLib.Core.Abstractions; -using MiniExcelLib.Core.Reflection; - namespace MiniExcelLib.OpenXml.FluentMapping; internal class MappingCellStreamAdapter(MappingCellStream cellStream, string[] columnLetters) diff --git a/src/MiniExcel.OpenXml/FluentMapping/MappingCompiler.cs b/src/MiniExcel.OpenXml/FluentMapping/MappingCompiler.cs index efc5b6c3..f6f36c15 100644 --- a/src/MiniExcel.OpenXml/FluentMapping/MappingCompiler.cs +++ b/src/MiniExcel.OpenXml/FluentMapping/MappingCompiler.cs @@ -1,9 +1,5 @@ -using System.Reflection; -using MiniExcelLib.Core.Helpers; -using MiniExcelLib.Core.Reflection; using MiniExcelLib.OpenXml.FluentMapping.Configuration; using MiniExcelLib.OpenXml.FluentMapping.Helpers; -using MiniExcelLib.OpenXml.Utils; namespace MiniExcelLib.OpenXml.FluentMapping; diff --git a/src/MiniExcel.OpenXml/FluentMapping/MappingReader.cs b/src/MiniExcel.OpenXml/FluentMapping/MappingReader.cs index 049fd3a5..1725adc1 100644 --- a/src/MiniExcel.OpenXml/FluentMapping/MappingReader.cs +++ b/src/MiniExcel.OpenXml/FluentMapping/MappingReader.cs @@ -1,3 +1,5 @@ +using MiniExcelLib.OpenXml.Reader; + namespace MiniExcelLib.OpenXml.FluentMapping; internal static partial class MappingReader where T : class, new() @@ -203,8 +205,7 @@ private static Dictionary InitializeCollections(CompiledMapping m else { // This should never happen with properly optimized mappings - throw new InvalidOperationException( - "OptimizedCollectionHelpers is null. Ensure the mapping was properly compiled and optimized."); + throw new InvalidOperationException("OptimizedCollectionHelpers is null. Ensure the mapping was properly compiled and optimized."); } return collections; @@ -469,4 +470,4 @@ private static bool HasAnyData(T item, CompiledMapping mapping) bool b => !b, _ => false }; -} \ No newline at end of file +} diff --git a/src/MiniExcel.OpenXml/FluentMapping/MappingTemplateProcessor.cs b/src/MiniExcel.OpenXml/FluentMapping/MappingTemplateProcessor.cs index 10779bd6..e407356f 100644 --- a/src/MiniExcel.OpenXml/FluentMapping/MappingTemplateProcessor.cs +++ b/src/MiniExcel.OpenXml/FluentMapping/MappingTemplateProcessor.cs @@ -1,27 +1,11 @@ -using System.Text; -using System.Xml; -using MiniExcelLib.OpenXml.Helpers; -using MiniExcelLib.OpenXml.Utils; -using Zomp.SyncMethodGenerator; - namespace MiniExcelLib.OpenXml.FluentMapping; internal partial struct MappingTemplateProcessor(CompiledMapping mapping) where T : class { [CreateSyncVersion] - public async Task ProcessSheetAsync( - Stream sourceStream, - Stream targetStream, - IEnumerator dataEnumerator, - CancellationToken cancellationToken) + public async Task ProcessSheetAsync(Stream sourceStream, Stream targetStream, IEnumerator dataEnumerator, CancellationToken cancellationToken) { - var readerSettings = new XmlReaderSettings - { - Async = true, - IgnoreWhitespace = false, - IgnoreComments = false, - CheckCharacters = false - }; + var readerSettings = XmlReaderHelper.GetXmlReaderSettings(); var writerSettings = new XmlWriterSettings { @@ -38,7 +22,6 @@ public async Task ProcessSheetAsync( var currentItem = dataEnumerator.MoveNext() ? dataEnumerator.Current : null; var currentItemIndex = currentItem is not null ? 0 : -1; - // Track which rows have been written from the template var writtenRows = new HashSet(); diff --git a/src/MiniExcel.OpenXml/Models/ExcelRange.cs b/src/MiniExcel.OpenXml/Models/ExcelRange.cs index bea1fd20..e8ce6531 100644 --- a/src/MiniExcel.OpenXml/Models/ExcelRange.cs +++ b/src/MiniExcel.OpenXml/Models/ExcelRange.cs @@ -19,8 +19,8 @@ internal ExcelRangeElement(int startIndex, int endIndex) public class ExcelRange(int maxRow, int maxColumn) { - public string StartCell { get; internal set; } - public string EndCell { get; internal set; } + public string? StartCell { get; internal set; } + public string? EndCell { get; internal set; } public ExcelRangeElement Rows { get; } = new(1, maxRow); public ExcelRangeElement Columns { get; } = new(1, maxColumn); diff --git a/src/MiniExcel.OpenXml/Models/TableInfo.cs b/src/MiniExcel.OpenXml/Models/TableInfo.cs new file mode 100644 index 00000000..aa75a349 --- /dev/null +++ b/src/MiniExcel.OpenXml/Models/TableInfo.cs @@ -0,0 +1,17 @@ +namespace MiniExcelLib.OpenXml.Models; + +public class TableInfo +{ + internal TableInfo(string name, IEnumerable columns, string? referenceCells, bool hiddenHeader) + { + Name = name; + Columns = [..columns]; + ReferenceCells = referenceCells; + HiddenHeader = hiddenHeader; + } + + public string Name { get; private set; } + public string[] Columns { get; private set; } + public string? ReferenceCells { get; private set; } + public bool HiddenHeader { get; private set; } +} diff --git a/src/MiniExcel.OpenXml/Picture/OpenXmlPictureImplement.cs b/src/MiniExcel.OpenXml/Picture/OpenXmlPictureImplement.cs index 9e3113e8..0e658898 100644 --- a/src/MiniExcel.OpenXml/Picture/OpenXmlPictureImplement.cs +++ b/src/MiniExcel.OpenXml/Picture/OpenXmlPictureImplement.cs @@ -1,4 +1,5 @@ using System.Drawing; +using MiniExcelLib.OpenXml.Reader; namespace MiniExcelLib.OpenXml.Picture; @@ -27,7 +28,7 @@ public static async Task AddPictureAsync(Stream excelStream, CancellationToken c #else using var archive = new ZipArchive(excelStream, ZipArchiveMode.Update, true); #endif - var rels = await reader.GetWorkbookRelsAsync(excelArchive.EntryCollection, cancellationToken).ConfigureAwait(false); + var rels = await OpenXmlReader.GetWorkbookRelsAsync(excelArchive.EntryCollection, cancellationToken).ConfigureAwait(false); var sheetEntries = rels?.ToList() ?? []; // Group images by sheet diff --git a/src/MiniExcel.OpenXml/Reader/OpenXmlReader.Comments.cs b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.Comments.cs new file mode 100644 index 00000000..dc0bfb44 --- /dev/null +++ b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.Comments.cs @@ -0,0 +1,154 @@ +namespace MiniExcelLib.OpenXml.Reader; + +internal partial class OpenXmlReader +{ + private static readonly XNamespace NsRel = Schemas.OpenXmlPackageRelationships; + private static readonly XNamespace Ns18Tc = Schemas.SpreadsheetmlXmlX18Tc; + private static readonly XNamespace NsMain = Schemas.SpreadsheetmlXmlMain; + private static readonly XNamespace Ns14R = Schemas.SpreadsheetmlXmlX14R; + + [CreateSyncVersion] + internal async Task ReadCommentsAsync(string? sheetName, CancellationToken cancellationToken = default) + { + if (string.IsNullOrEmpty(sheetName)) + throw new ArgumentException("sheetName cannot be null or empty", nameof(sheetName)); + + SetWorkbookRels(Archive.EntryCollection); + var sheetRecord = _sheetRecords?.SingleOrDefault(s => s.Name.Equals(sheetName, StringComparison.CurrentCultureIgnoreCase)); + if (sheetRecord?.Path?.Split('/')[^1] is not { } sheetFile) + throw new InvalidDataException($"There is no sheet named {sheetName}"); + + if (Archive.GetEntry($"xl/worksheets/_rels/{sheetFile}.rels") is not { } rel) + return new CommentResultSet(sheetName, [], []); + + var stream = await rel.OpenAsync(cancellationToken).ConfigureAwait(false); + await using var disposableStream = stream.ConfigureAwait(false); + + var relDoc = await XDocument.LoadAsync(stream, LoadOptions.None, cancellationToken).ConfigureAwait(false); + HashSet refCells = []; + + var people = await GetAuthorsAsync(cancellationToken).ConfigureAwait(false); + var commentThreads = await GetThreadedCommentsAsync(relDoc, refCells, people, cancellationToken).ConfigureAwait(false); + var notes = await GetNotesAsync(relDoc, refCells, cancellationToken).ConfigureAwait(false); + + return new CommentResultSet(sheetName, commentThreads, notes); + } + + [CreateSyncVersion] + private async Task> GetAuthorsAsync(CancellationToken cancellationToken) + { + if (Archive.GetEntry(ExcelFileNames.Person) is not { } persons) + return []; + + var personStream = await persons.OpenAsync(cancellationToken).ConfigureAwait(false); + await using var disposablePersonStream = personStream.ConfigureAwait(false); + + var personDoc = await XDocument.LoadAsync(personStream, LoadOptions.None, cancellationToken).ConfigureAwait(false); + var personElements = personDoc.Root?.Elements(Ns18Tc + "person"); + + return personElements + ?.Select(p => new Author + { + Id = Guid.Parse(p.Attribute("id")!.Value), + DisplayName = p.Attribute("displayName")?.Value is { } name and not "" ? name : "???", + ProviderId = p.Attribute("providerId")?.Value, + }) + .ToList() ?? []; + } + + [CreateSyncVersion] + private async Task> GetNotesAsync(XDocument relDoc, HashSet refCells, CancellationToken cancellationToken) + { + var noteRels = relDoc.Root?.Elements(NsRel + "Relationship"); + var notesElement = noteRels?.FirstOrDefault(x => x.Attribute("Type")?.Value == Schemas.SpreadsheetmlXmlCommentsRelationship); + var notesTarget = notesElement?.Attribute("Target"); + var notesPath = notesTarget?.Value.TrimStart('.', '/'); + + if (Archive.GetEntry($"xl/{notesPath}") is not { } noteEntry) + return []; + + var noteEntryStream = await noteEntry.OpenAsync(cancellationToken).ConfigureAwait(false); + await using var disposableNoteEntryStream = noteEntryStream.ConfigureAwait(false); + + var doc = await XDocument.LoadAsync(noteEntryStream, LoadOptions.None, cancellationToken).ConfigureAwait(false); + + var authorElements = doc.Root?.Element(NsMain + "authors")?.Elements(NsMain + "author"); + var authors = authorElements?.Select(a => a.Value).ToArray(); + + var commentElements = doc.Root + ?.Element(NsMain + "commentList") + ?.Elements(NsMain + "comment"); + + return commentElements + ?.Where(c => !refCells.Contains(c.Attribute("ref")?.Value)) + .Select(c => new NoteComment + { + Id = Guid.TryParse(c.Attribute(Ns14R + "uid")?.Value.Trim('{', '}'), out var noteId) ? noteId : Guid.Empty, + Author = int.TryParse(c.Attribute("authorId")?.Value, out var authorId) ? authors?.ElementAtOrDefault(authorId) : "", + ReferenceCell = c.Attribute("ref")?.Value, + Text = string.Join("", GetTextFromComment(c)) + }) + .ToList() ?? []; + } + + [CreateSyncVersion] + private async Task> GetThreadedCommentsAsync(XDocument relDoc, HashSet refCells, ICollection people, CancellationToken cancellationToken) + { + var threadedCommentRels = relDoc.Root?.Elements(NsRel + "Relationship"); + var threadedCommentsElement = threadedCommentRels?.FirstOrDefault(x => x.Attribute("Type")?.Value == Schemas.SpreadsheetmlXmlThreadedCommentRelationship); + var threadedCommentsTarget = threadedCommentsElement?.Attribute("Target"); + var threadedCommentsPath = threadedCommentsTarget?.Value.TrimStart('.', '/'); + + if (Archive.GetEntry($"xl/{threadedCommentsPath}") is not { } threadEntry) + return []; + + var threadEntryStream = await threadEntry.OpenAsync(cancellationToken).ConfigureAwait(false); + await using var disposableThreadEntryStream = threadEntryStream.ConfigureAwait(false); + + var doc = await XDocument.LoadAsync(threadEntryStream, LoadOptions.None, cancellationToken).ConfigureAwait(false); + + var commentThreadElements = doc.Root?.Elements(Ns18Tc + "threadedComment"); + var commentThreads = commentThreadElements + ?.Where(tc => tc.Attribute("parentId") is null) + .Select(tc => new ThreadedComment + { + Id = Guid.Parse(tc.Attribute("id")!.Value.Trim('{', '}')), + Author = people.FirstOrDefault(p => p.Id == (Guid.TryParse(tc.Attribute("personId")?.Value, out var person) ? person : Guid.Empty)), + CreatedAt = DateTime.Parse(tc.Attribute("dT")!.Value, CultureInfo.InvariantCulture), + ReferenceCell = tc.Attribute("ref")?.Value!, + Text = tc.Value, + Resolved = tc.Attribute("done")?.Value is not (null or "0") + }) + .ToList() ?? []; + + var replyElements = doc.Root?.Elements(Ns18Tc + "threadedComment"); + var replies = replyElements + ?.Where(tc => tc.Attribute("parentId") is not null) + .Select(tc => new ThreadedCommentReply + { + Id = Guid.Parse(tc.Attribute("id")!.Value.Trim('{', '}')), + ParentId = Guid.Parse(tc.Attribute("parentId")!.Value), + Author = people.FirstOrDefault(p => p.Id == Guid.Parse(tc.Attribute("personId")!.Value)), + CreatedAt = DateTime.Parse(tc.Attribute("dT")!.Value, CultureInfo.InvariantCulture), + Text = tc.Value + }) + .ToLookup(x => x.ParentId); + + foreach (var thread in commentThreads) + { + refCells.Add(thread.ReferenceCell); + + if (replies is not null) + thread.ThreadedComments = replies[thread.Id].ToList(); + } + + return commentThreads; + } + + private static IEnumerable GetTextFromComment(XElement? comment) + { + return comment?.Element(NsMain + "text") is { } textElement + ? textElement.Descendants(NsMain + "t").Select(t => t.Value) + : []; + } +} diff --git a/src/MiniExcel.OpenXml/Reader/OpenXmlReader.Mapped.cs b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.Mapped.cs new file mode 100644 index 00000000..6ef18057 --- /dev/null +++ b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.Mapped.cs @@ -0,0 +1,137 @@ +using MiniExcelLib.OpenXml.Styles; + +namespace MiniExcelLib.OpenXml.Reader; + +internal partial class OpenXmlReader +{ + /// + /// Direct mapped query that bypasses dictionary creation for better performance + /// + [CreateSyncVersion] + internal async IAsyncEnumerable QueryMappedAsync(string? sheetName, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + cancellationToken.ThrowIfCancellationRequested(); + + const bool withoutCr = false; + var sheetEntry = GetSheetEntry(sheetName); + + MergeCells? mergeCells = null; + if (_config.FillMergedCells) + { + var mergeCellsResult = await TryGetMergeCellsAsync(sheetEntry, cancellationToken).ConfigureAwait(false); + if (mergeCellsResult.Success) + mergeCells = mergeCellsResult.MergeCells; + } + + // Direct XML reading without dictionary creation + var xmlSettings = XmlReaderHelper.GetXmlReaderSettings(); + + var sheetStream = await sheetEntry.OpenAsync(cancellationToken).ConfigureAwait(false); + await using var disposableSheetStream = sheetStream.ConfigureAwait(false); + + using var reader = XmlReader.Create(sheetStream, xmlSettings); + + if (!reader.IsStartElement("worksheet", Ns)) + yield break; + + if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false)) + yield break; + + while (!reader.EOF) + { + if (reader.IsStartElement("sheetData", Ns)) + { + if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false)) + continue; + + int rowIndex = -1; + while (!reader.EOF) + { + if (reader.IsStartElement("row", Ns)) + { + if (int.TryParse(reader.GetAttribute("r"), out int arValue)) + rowIndex = arValue - 1; // The row attribute is 1-based + else + rowIndex++; + + // Read row directly into mapped structure + await foreach (var mappedRow in ReadMappedRowAsync(reader, rowIndex, withoutCr, mergeCells, cancellationToken).ConfigureAwait(false)) + { + yield return mappedRow; + } + } + else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false)) + { + break; + } + } + } + else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false)) + { + break; + } + } + } + + [CreateSyncVersion] + private async IAsyncEnumerable ReadMappedRowAsync( + XmlReader reader, + int rowIndex, + bool withoutCr, + MergeCells? mergeCells, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false)) + { + // Empty row + yield return new MappedRow(rowIndex); + yield break; + } + + var row = new MappedRow(rowIndex); + var columnIndex = withoutCr ? -1 : 0; + + while (!reader.EOF) + { + if (reader.IsStartElement("c", Ns)) + { + var aS = reader.GetAttribute("s"); + var aR = reader.GetAttribute("r"); + var aT = reader.GetAttribute("t"); + + var cellAndColumn = await ReadCellAndSetColumnIndexAsync(reader, columnIndex, withoutCr, 0, aR, aT, cancellationToken).ConfigureAwait(false); + var cellValue = cellAndColumn.CellValue; + columnIndex = cellAndColumn.ColumnIndex; + + if (_config.FillMergedCells && mergeCells is not null) + { + if (mergeCells.MergesValues.ContainsKey(aR)) + { + mergeCells.MergesValues[aR] = cellValue; + } + else if (mergeCells.MergesMap.TryGetValue(aR, out var mergeKey)) + { + mergeCells.MergesValues.TryGetValue(mergeKey, out cellValue); + } + } + + if (!string.IsNullOrEmpty(aS)) // Custom style + { + if (int.TryParse(aS, NumberStyles.Any, CultureInfo.InvariantCulture, out var styleIndex)) + { + _style ??= await OpenXmlStyles.CreateAsync(Archive, cancellationToken).ConfigureAwait(false); + cellValue = _style.ConvertValueByStyleFormat(styleIndex, cellValue); + } + } + + row.SetCell(columnIndex, cellValue); + } + else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false)) + { + break; + } + } + + yield return row; + } +} diff --git a/src/MiniExcel.OpenXml/Reader/OpenXmlReader.MergeCells.cs b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.MergeCells.cs new file mode 100644 index 00000000..4c260002 --- /dev/null +++ b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.MergeCells.cs @@ -0,0 +1,66 @@ +namespace MiniExcelLib.OpenXml.Reader; + +internal partial class OpenXmlReader +{ + [CreateSyncVersion] + internal static async Task<(bool Success, MergeCells? MergeCells)> TryGetMergeCellsAsync(ZipArchiveEntry sheetEntry, CancellationToken cancellationToken = default) + { + cancellationToken.ThrowIfCancellationRequested(); + + var xmlSettings = XmlReaderHelper.GetXmlReaderSettings(); + var mergeCells = new MergeCells(); + + var sheetStream = await sheetEntry.OpenAsync(cancellationToken).ConfigureAwait(false); + await using var disposableSheetStream = sheetStream.ConfigureAwait(false); + + using var reader = XmlReader.Create(sheetStream, xmlSettings); + + if (!reader.IsStartElement("worksheet", Ns)) + return (false, null); + + while (await reader.ReadAsync().ConfigureAwait(false)) + { + if (!reader.IsStartElement("mergeCells", Ns)) + continue; + + if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false)) + return (false, null); + + while (!reader.EOF) + { + if (reader.IsStartElement("mergeCell", Ns)) + { + var refAttr = reader.GetAttribute("ref"); + var refs = refAttr.Split(':'); + if (refs.Length == 1) + continue; + + CellReferenceConverter.TryParseCellReference(refs[0], out var x1, out var y1); + CellReferenceConverter.TryParseCellReference(refs[1], out var x2, out var y2); + + mergeCells.MergesValues.Add(refs[0], null); + + // foreach range + var isFirst = true; + for (int x = x1; x <= x2; x++) + { + for (int y = y1; y <= y2; y++) + { + if (!isFirst) + mergeCells.MergesMap.Add(CellReferenceConverter.GetCellFromCoordinates(x, y), refs[0]); + isFirst = false; + } + } + + await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false); + } + else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false)) + { + break; + } + } + } + + return (true, mergeCells); + } +} diff --git a/src/MiniExcel.OpenXml/Reader/OpenXmlReader.Tables.cs b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.Tables.cs new file mode 100644 index 00000000..7ed37456 --- /dev/null +++ b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.Tables.cs @@ -0,0 +1,147 @@ +namespace MiniExcelLib.OpenXml.Reader; + +internal partial class OpenXmlReader +{ + [CreateSyncVersion] + internal IAsyncEnumerable QueryTableAsync(string sheetName, string tableName, CancellationToken cancellationToken = default) + where T : class, new() + { + var query = QueryTableAsync(sheetName, tableName, true, cancellationToken); + return MiniExcelMapper.MapQueryAsync(query, 0, false, _config.TrimColumnNames, _config, XmlHelper.DecodeString, cancellationToken); + } + + [CreateSyncVersion] + internal async IAsyncEnumerable> QueryTableAsync(string sheetName, string tableName, bool prependHeaders, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + TableInfo? table = null; + await foreach (var item in GetTableInfosAsync(sheetName, cancellationToken).ConfigureAwait(false)) + { + if (item.Name.Equals(tableName, StringComparison.OrdinalIgnoreCase)) + { + table = item; + break; + } + } + + if (table is null) + throw new InvalidDataException($"The table {tableName} was not found."); + + if (table.ReferenceCells?.Split(':') is not [var start, var end] || + !CellReferenceConverter.TryParseCellReference(start, out var startCol, out var startRow) || + !CellReferenceConverter.TryParseCellReference(end, out var endCol, out var endRow)) + { + throw new InvalidDataException("A valid cell range could not be extracted from the table metadata."); + } + + if (!table.HiddenHeader) + startRow++; + + if (prependHeaders) + { + var headers = ExpandoHelper.CreateEmptyByIndices(endCol - 1, startCol - 1); + var columnCount = Math.Min(headers.Count, table.Columns.Length); + + for (int i = 0; i < columnCount; i++) + { + var index = CellReferenceConverter.GetAlphabeticalIndex(startCol + i - 1); + headers[index] = table.Columns[i]; + } + + yield return headers; + } + + await foreach (var row in QueryRangeAsync(false, sheetName, startRow, startCol, endRow, endCol, cancellationToken).ConfigureAwait(false)) + { + if (!prependHeaders) + { + for (var i = 0; i < table.Columns.Length; i++) + { + var oldHeader = CellReferenceConverter.GetAlphabeticalIndex(i + startCol - 1); + if (row.TryGetValue(oldHeader, out var cellValue)) + { + var newHeader = table.Columns[i]; + row[newHeader] = cellValue; + if (newHeader != oldHeader) + { + row.Remove(oldHeader); + } + } + } + } + + yield return row; + } + } + + [CreateSyncVersion] + private async IAsyncEnumerable GetTableInfosAsync(string sheetName, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + var rels = await GetWorkbookRelsAsync(Archive.EntryCollection, cancellationToken).ConfigureAwait(false); + if (rels?.Find(x => x.Name.Equals(sheetName, StringComparison.OrdinalIgnoreCase)) is not { Path: { } path }) + throw new InvalidDataException($"Worksheet {sheetName} was not found."); + + List tables = []; + var sheetFilename = path.Split('/')[^1]; + + if (Archive.GetEntry($"xl/worksheets/_rels/{sheetFilename}.rels") is { } entry) + { + var entryStream = await entry.OpenAsync(cancellationToken).ConfigureAwait(false); + await using var disposableEntryStream = entryStream.ConfigureAwait(false); + + var readerSettings = XmlReaderHelper.GetXmlReaderSettings(); + using var reader = XmlReader.Create(entryStream, readerSettings); + + if (!reader.ReadToFollowing("Relationship")) + yield break; + + do + { + if (reader.GetAttribute("Type") == Schemas.SpreadsheetmlXmlTableRelationship) + { + if (reader.GetAttribute("Target") is { } target && + target.Split('/').LastOrDefault() is { } table) + { + tables.Add(table); + } + } + } + while(reader.ReadToNextSibling("Relationship")); + } + + foreach (var table in tables) + { + if (Archive.GetEntry($"xl/tables/{table}") is not { } tableEntry) + continue; + + var entryStream = await tableEntry.OpenAsync(cancellationToken).ConfigureAwait(false); + await using var disposableEntryStream = entryStream.ConfigureAwait(false); + using var reader = XmlReader.Create(entryStream, XmlReaderHelper.GetXmlReaderSettings()); + + if (!reader.ReadToFollowing("table")) + continue; + + if (reader.GetAttribute("name") is not { } tableName || + reader.GetAttribute("ref") is not { } @ref) + { + continue; + } + + var headerIsHidden = reader.GetAttribute("headerRowCount") == "0"; + if (!reader.ReadToDescendant("tableColumn")) + continue; + + List columns = []; + var colCount = 0; + + do + { + var colName = reader.GetAttribute("name") ?? $"Column{colCount}"; + columns.Add(colName); + colCount++; + } + while (reader.ReadToNextSibling("tableColumn")); + + yield return new TableInfo(tableName, [..columns], @ref, headerIsHidden); + } + } +} diff --git a/src/MiniExcel.OpenXml/OpenXmlReader.cs b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.cs similarity index 66% rename from src/MiniExcel.OpenXml/OpenXmlReader.cs rename to src/MiniExcel.OpenXml/Reader/OpenXmlReader.cs index be3b2236..f8061cfa 100644 --- a/src/MiniExcel.OpenXml/OpenXmlReader.cs +++ b/src/MiniExcel.OpenXml/Reader/OpenXmlReader.cs @@ -3,9 +3,9 @@ using MiniExcelMapper = MiniExcelLib.Core.Reflection.MiniExcelMapper; using XmlReaderHelper = MiniExcelLib.OpenXml.Utils.XmlReaderHelper; -namespace MiniExcelLib.OpenXml; +namespace MiniExcelLib.OpenXml.Reader; -internal partial class OpenXmlReader : IMiniExcelReader +internal sealed partial class OpenXmlReader : IMiniExcelReader { private static readonly string[] Ns = [Schemas.SpreadsheetmlXmlMain, Schemas.SpreadsheetmlXmlStrictNs]; private static readonly string[] RelationshiopNs = [Schemas.SpreadsheetmlXmlRelationships, Schemas.SpreadsheetmlXmlStrictRelationships]; @@ -47,13 +47,12 @@ internal static async Task CreateAsync(Stream stream, IMiniExcelC { sheetName ??= MiniExcelPropertyHelper.GetExcelSheetInfo(typeof(T), _config)?.ExcelSheetName; var query = QueryAsync(false, sheetName, startCell, cancellationToken); + if (!CellReferenceConverter.TryParseCellReference(startCell, out _, out var rowOffset)) - { throw new InvalidDataException($"Value {startCell} is not a valid cell reference."); - } //Todo: Find a way if possible to remove the 'hasHeader' parameter to check whether or not to include - // the first row in the result set in favor of modifying the already present 'useHeaderRow' to do the same job + // the first row in the result set in favor of modifying the already present 'hasHeaderRow' to do the same job return MiniExcelMapper.MapQueryAsync(query, rowOffset, mapHeaderAsData, _config.TrimColumnNames, _config, XmlHelper.DecodeString, cancellationToken); } @@ -141,26 +140,29 @@ internal static async Task CreateAsync(Stream stream, IMiniExcelC } [CreateSyncVersion] - private async IAsyncEnumerable> InternalQueryRangeAsync(bool useHeaderRow, string? sheetName, int startRowIndex, int startColumnIndex, int? endRowIndex, int? endColumnIndex, [EnumeratorCancellation] CancellationToken cancellationToken = default) + private async IAsyncEnumerable> InternalQueryRangeAsync(bool hasHeaderRow, string? sheetName, int startRowIndex, int startColumnIndex, int? endRowIndex, int? endColumnIndex, [EnumeratorCancellation] CancellationToken cancellationToken = default) { cancellationToken.ThrowIfCancellationRequested(); - var xmlSettings = XmlReaderHelper.GetXmlReaderSettings( -#if SYNC_ONLY - false -#else - true -#endif - ); - + var xmlSettings = XmlReaderHelper.GetXmlReaderSettings(); var sheetEntry = GetSheetEntry(sheetName); // TODO: need to optimize performance // Q. why need 3 times openstream merge one open read? A. no, zipstream can't use position = 0 - var mergeCellsContext = new MergeCellsContext(); - if (_config.FillMergedCells && !await TryGetMergeCellsAsync(sheetEntry, mergeCellsContext, cancellationToken).ConfigureAwait(false)) - yield break; + MergeCells? mergeCells = null; + if (_config.FillMergedCells) + { + var mergeCellsResult = await TryGetMergeCellsAsync(sheetEntry, cancellationToken).ConfigureAwait(false); + if (mergeCellsResult.Success) + { + mergeCells = mergeCellsResult.MergeCells; + } + else + { + yield break; + } + } var maxRowColumnIndexResult = await TryGetMaxRowColumnIndexAsync(sheetEntry, cancellationToken).ConfigureAwait(false); if (!maxRowColumnIndexResult.IsSuccess) @@ -219,15 +221,16 @@ internal static async Task CreateAsync(Stream stream, IMiniExcelC break; } - await foreach (var row in QueryRowAsync(reader, isFirstRow, startRowIndex, nextRowIndex, - rowIndex, startColumnIndex, endColumnIndex, maxColumnIndex, - withoutCr, useHeaderRow, headRows, mergeCellsContext.MergeCells, - cancellationToken).ConfigureAwait(false)) + var query = QueryRowAsync(reader, isFirstRow, startRowIndex, nextRowIndex, rowIndex, + startColumnIndex, endColumnIndex, maxColumnIndex, withoutCr, hasHeaderRow, headRows, + mergeCells, cancellationToken); + + await foreach (var row in query.ConfigureAwait(false)) { if (isFirstRow) { isFirstRow = false; // for startcell logic - if (useHeaderRow) + if (hasHeaderRow) continue; } @@ -271,7 +274,7 @@ internal static async Task CreateAsync(Stream stream, IMiniExcelC { for (int i = expectedRowIndex; i < rowIndex; i++) { - yield return GetCell(hasHeaderRow, maxColumnIndex, headRows, startColumnIndex); + yield return GetHeaders(hasHeaderRow, maxColumnIndex, headRows, startColumnIndex); } } } @@ -280,11 +283,11 @@ internal static async Task CreateAsync(Stream stream, IMiniExcelC if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false) && !_config.IgnoreEmptyRows) { //Fill in case of self closed empty row tag eg. - yield return GetCell(hasHeaderRow, maxColumnIndex, headRows, startColumnIndex); + yield return GetHeaders(hasHeaderRow, maxColumnIndex, headRows, startColumnIndex); yield break; } - var cell = GetCell(hasHeaderRow, maxColumnIndex, headRows, startColumnIndex); + var cell = GetHeaders(hasHeaderRow, maxColumnIndex, headRows, startColumnIndex); var columnIndex = withoutCr ? -1 : 0; while (!reader.EOF) { @@ -320,7 +323,7 @@ internal static async Task CreateAsync(Stream stream, IMiniExcelC xfIndex = styleIndex; // only when have s attribute then load styles xml data - _style ??= new OpenXmlStyles(Archive); + _style ??= await OpenXmlStyles.CreateAsync(Archive, cancellationToken).ConfigureAwait(false); cellValue = _style.ConvertValueByStyleFormat(xfIndex, cellValue); } @@ -372,16 +375,16 @@ private ZipArchiveEntry GetSheetEntry(string? sheetName) return sheetEntry; } - private static IDictionary GetCell(bool useHeaderRow, int maxColumnIndex, Dictionary headRows, int startColumnIndex) + private static IDictionary GetHeaders(bool hasHeaderRow, int maxColumnIndex, Dictionary headRows, int startColumnIndex) { - return useHeaderRow + return hasHeaderRow ? ExpandoHelper.CreateEmptyByHeaders(headRows) : ExpandoHelper.CreateEmptyByIndices(maxColumnIndex, startColumnIndex); } - private static void SetCellsValueAndHeaders(object? cellValue, bool useHeaderRow, Dictionary headRows, bool isFirstRow, IDictionary cell, int columnIndex) + private static void SetCellsValueAndHeaders(object? cellValue, bool hasHeaderRow, Dictionary headRows, bool isFirstRow, IDictionary cell, int columnIndex) { - if (!useHeaderRow) + if (!hasHeaderRow) { //if not using First Head then using A,B,C as index cell[CellReferenceConverter.GetAlphabeticalIndex(columnIndex)] = cellValue; @@ -407,10 +410,10 @@ private async Task SetSharedStringsAsync(CancellationToken cancellationToken = d if (SharedStrings is { Count: > 0 }) return; - + if (Archive.GetEntry(ExcelFileNames.SharedStrings) is not { } sharedStringsEntry) return; - + var stream = await sharedStringsEntry.OpenAsync(cancellationToken).ConfigureAwait(false); await using var disposableStream = stream.ConfigureAwait(false); @@ -441,13 +444,7 @@ private void SetWorkbookRels(ReadOnlyCollection entries) [CreateSyncVersion] private static async IAsyncEnumerable ReadWorkbookAsync(ReadOnlyCollection entries, [EnumeratorCancellation] CancellationToken cancellationToken = default) { - var xmlSettings = XmlReaderHelper.GetXmlReaderSettings( -#if SYNC_ONLY - false -#else - true -#endif - ); + var xmlSettings = XmlReaderHelper.GetXmlReaderSettings(); var entry = entries.Single(w => w.FullName == ExcelFileNames.Workbook); var stream = await entry.OpenAsync(cancellationToken).ConfigureAwait(false); @@ -528,35 +525,28 @@ await reader.SkipAsync() } [CreateSyncVersion] - internal async Task?> GetWorkbookRelsAsync(ReadOnlyCollection entries, CancellationToken cancellationToken = default) + internal static async Task?> GetWorkbookRelsAsync(ReadOnlyCollection entries, CancellationToken cancellationToken = default) { - var xmlSettings = XmlReaderHelper.GetXmlReaderSettings( -#if SYNC_ONLY - false -#else - true -#endif - ); - var sheetRecords = await ReadWorkbookAsync(entries, cancellationToken) .CreateListAsync(cancellationToken) .ConfigureAwait(false); var entry = entries.Single(w => w.FullName == ExcelFileNames.WorkbookRels); - var stream = await entry.OpenAsync(cancellationToken).ConfigureAwait(false); await using var disposableStream = stream.ConfigureAwait(false); - using var reader = XmlReader.Create(stream, xmlSettings); + var readerSettings = XmlReaderHelper.GetXmlReaderSettings(); + using var reader = XmlReader.Create(stream, readerSettings); if (!XmlReaderHelper.IsStartElement(reader, "Relationships", Schemas.OpenXmlPackageRelationships)) return null; + if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false)) return null; while (!reader.EOF) { - if (XmlReaderHelper.IsStartElement(reader, "Relationship", Schemas.OpenXmlPackageRelationships)) + if (reader.IsStartElement("Relationship", Schemas.OpenXmlPackageRelationships)) { var rid = reader.GetAttribute("Id"); foreach (var sheet in sheetRecords.Where(sh => sh.Rid == rid)) @@ -720,14 +710,7 @@ internal async Task> GetDimensionsAsync(CancellationToken canc { cancellationToken.ThrowIfCancellationRequested(); - var xmlSettings = XmlReaderHelper.GetXmlReaderSettings( -#if SYNC_ONLY - false -#else - true -#endif - ); - + var xmlSettings = XmlReaderHelper.GetXmlReaderSettings(); var ranges = new List(); var sheets = Archive.EntryCollection.Where(e => @@ -877,13 +860,7 @@ internal static async Task TryGetMaxRowColumnIndexAs { cancellationToken.ThrowIfCancellationRequested(); - var xmlSettings = XmlReaderHelper.GetXmlReaderSettings( -#if SYNC_ONLY - false -#else - true -#endif - ); + var xmlSettings = XmlReaderHelper.GetXmlReaderSettings(); bool withoutCr = false; int maxRowIndex = -1; @@ -997,380 +974,14 @@ internal static async Task TryGetMaxRowColumnIndexAs return new GetMaxRowColumnIndexResult(true, withoutCr, maxRowIndex, maxColumnIndex); } - internal class MergeCellsContext - { - public MergeCells? MergeCells { get; set; } - } - - - [CreateSyncVersion] - internal static async Task TryGetMergeCellsAsync(ZipArchiveEntry sheetEntry, MergeCellsContext mergeCellsContext, CancellationToken cancellationToken = default) - { - cancellationToken.ThrowIfCancellationRequested(); - - var xmlSettings = XmlReaderHelper.GetXmlReaderSettings( -#if SYNC_ONLY - false -#else - true -#endif - ); - var mergeCells = new MergeCells(); - - var sheetStream = await sheetEntry.OpenAsync(cancellationToken).ConfigureAwait(false); - await using var disposableSheetStream = sheetStream.ConfigureAwait(false); - - using var reader = XmlReader.Create(sheetStream, xmlSettings); - - if (!reader.IsStartElement("worksheet", Ns)) - return false; - - while (await reader.ReadAsync().ConfigureAwait(false)) - { - if (!reader.IsStartElement("mergeCells", Ns)) - continue; - - if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false)) - return false; - - while (!reader.EOF) - { - if (reader.IsStartElement("mergeCell", Ns)) - { - var refAttr = reader.GetAttribute("ref"); - var refs = refAttr.Split(':'); - if (refs.Length == 1) - continue; - - CellReferenceConverter.TryParseCellReference(refs[0], out var x1, out var y1); - CellReferenceConverter.TryParseCellReference(refs[1], out var x2, out var y2); - - mergeCells.MergesValues.Add(refs[0], null); - - // foreach range - var isFirst = true; - for (int x = x1; x <= x2; x++) - { - for (int y = y1; y <= y2; y++) - { - if (!isFirst) - mergeCells.MergesMap.Add(CellReferenceConverter.GetCellFromCoordinates(x, y), refs[0]); - isFirst = false; - } - } - - await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false); - } - else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false)) - { - break; - } - } - } - - mergeCellsContext.MergeCells = mergeCells; - return true; - } - - [CreateSyncVersion] - internal async Task ReadCommentsAsync(string? sheetName, CancellationToken cancellationToken = default) - { - if (string.IsNullOrEmpty(sheetName)) - throw new ArgumentException("sheetName cannot be null or empty", nameof(sheetName)); - - XNamespace nsRel = Schemas.OpenXmlPackageRelationships; - XNamespace ns18Tc = Schemas.SpreadsheetmlXmlX18Tc; - XNamespace nsMain = Schemas.SpreadsheetmlXmlMain; - XNamespace ns14R = Schemas.SpreadsheetmlXmlX14R; - - SetWorkbookRels(Archive.EntryCollection); - var sheetRecord = _sheetRecords?.SingleOrDefault(s => s.Name.Equals(sheetName, StringComparison.CurrentCultureIgnoreCase)); - if (sheetRecord?.Path?.Split('/')[^1] is not { } sheetFile) - throw new InvalidDataException($"There is no sheet named {sheetName}"); - - List people = []; - if (Archive.GetEntry(ExcelFileNames.Person) is { } persons) - { - var personStream = await persons.OpenAsync(cancellationToken).ConfigureAwait(false); - await using var disposablePersonStream = personStream.ConfigureAwait(false); - - var personDoc = await XDocument.LoadAsync(personStream, LoadOptions.None, cancellationToken).ConfigureAwait(false); - var personElements = personDoc.Root?.Elements(ns18Tc + "person"); - people = personElements - ?.Select(p => new Author - { - Id = Guid.Parse(p.Attribute("id")!.Value), - DisplayName = p.Attribute("displayName")?.Value is { } name and not "" ? name : "???", - ProviderId = p.Attribute("providerId")?.Value, - }) - .ToList() ?? []; - } - - if (Archive.GetEntry($"xl/worksheets/_rels/{sheetFile}.rels") is not { } rel) - return new CommentResultSet(sheetName, [], []); - - var stream = await rel.OpenAsync(cancellationToken).ConfigureAwait(false); - await using var disposableStream = stream.ConfigureAwait(false); - - var relDoc = await XDocument.LoadAsync(stream, LoadOptions.None, cancellationToken).ConfigureAwait(false); - - var threadedCommentRels = relDoc.Root?.Elements(nsRel + "Relationship"); - var threadedCommentsElement = threadedCommentRels?.FirstOrDefault(x => x.Attribute("Type")?.Value == Schemas.SpreadsheetmlXmlThreadedCommentRelationship); - var threadedCommentsTarget = threadedCommentsElement?.Attribute("Target"); - var threadedCommentsPath = threadedCommentsTarget?.Value.TrimStart('.', '/'); - - var noteRels = relDoc.Root?.Elements(nsRel + "Relationship"); - var notesElement = noteRels?.FirstOrDefault(x => x.Attribute("Type")?.Value == Schemas.SpreadsheetmlXmlCommentsRelationship); - var notesTarget = notesElement?.Attribute("Target"); - var notesPath = notesTarget?.Value.TrimStart('.', '/'); - - List commentThreads = []; - List notes = []; - HashSet refCells = []; - if (Archive.GetEntry($"xl/{threadedCommentsPath}") is { } threadEntry) - { - var threadEntryStream = await threadEntry.OpenAsync(cancellationToken).ConfigureAwait(false); - await using var disposableThreadEntryStream = threadEntryStream.ConfigureAwait(false); - - var doc = await XDocument.LoadAsync(threadEntryStream, LoadOptions.None, cancellationToken).ConfigureAwait(false); - - var commentThreadElements = doc.Root?.Elements(ns18Tc + "threadedComment"); - commentThreads = commentThreadElements - ?.Where(tc => tc.Attribute("parentId") is null) - .Select(tc => new ThreadedComment - { - Id = Guid.Parse(tc.Attribute("id")!.Value.Trim('{', '}')), - Author = people.FirstOrDefault(p => p.Id == (Guid.TryParse(tc.Attribute("personId")?.Value, out var person) ? person : Guid.Empty)), - CreatedAt = DateTime.Parse(tc.Attribute("dT")!.Value, CultureInfo.InvariantCulture), - ReferenceCell = tc.Attribute("ref")?.Value!, - Text = tc.Value, - Resolved = tc.Attribute("done")?.Value is not (null or "0") - }) - .ToList() ?? []; - - var replyElements = doc.Root?.Elements(ns18Tc + "threadedComment"); - var replies = replyElements - ?.Where(tc => tc.Attribute("parentId") is not null) - .Select(tc => new ThreadedCommentReply - { - Id = Guid.Parse(tc.Attribute("id")!.Value.Trim('{', '}')), - ParentId = Guid.Parse(tc.Attribute("parentId")!.Value), - Author = people.FirstOrDefault(p => p.Id == Guid.Parse(tc.Attribute("personId")!.Value)), - CreatedAt = DateTime.Parse(tc.Attribute("dT")!.Value, CultureInfo.InvariantCulture), - Text = tc.Value - }) - .ToLookup(x => x.ParentId); - - if (replies is not null) - { - foreach (var thread in commentThreads) - { - thread.ThreadedComments = replies[thread.Id].ToList(); - } - } - - refCells = [..commentThreads.Select(x => x.ReferenceCell)]; - } - - if (Archive.GetEntry($"xl/{notesPath}") is { } noteEntry) - { - var noteEntryStream = await noteEntry.OpenAsync(cancellationToken).ConfigureAwait(false); - await using var disposableNoteEntryStream = noteEntryStream.ConfigureAwait(false); - - var doc = await XDocument.LoadAsync(noteEntryStream, LoadOptions.None, cancellationToken).ConfigureAwait(false); - - var authorElements = doc.Root?.Element(nsMain + "authors")?.Elements(nsMain + "author"); - var authors = authorElements?.Select(a => a.Value).ToArray(); - - var commentElements = doc.Root - ?.Element(nsMain + "commentList") - ?.Elements(nsMain + "comment"); - - notes = commentElements - ?.Where(c => !refCells.Contains(c.Attribute("ref")?.Value)) - .Select(c => new NoteComment - { - Id = Guid.TryParse(c.Attribute(ns14R + "uid")?.Value.Trim('{', '}'), out var noteId) ? noteId : Guid.Empty, - Author = int.TryParse(c.Attribute("authorId")?.Value, out var authorId) ? authors?.ElementAtOrDefault(authorId) : "", - ReferenceCell = c.Attribute("ref")?.Value, - Text = string.Join("", GetTextFromComment(c)) - }) - .ToList() ?? []; - } - - return new CommentResultSet(sheetName, commentThreads, notes); - - IEnumerable GetTextFromComment(XElement? comment) - { - return comment?.Element(nsMain + "text") is { } textElement - ? textElement.Descendants(nsMain + "t").Select(t => t.Value) - : []; - } - } - - /// - /// Direct mapped query that bypasses dictionary creation for better performance - /// - [CreateSyncVersion] - internal async IAsyncEnumerable QueryMappedAsync( - string? sheetName, - [EnumeratorCancellation] CancellationToken cancellationToken = default) - { - cancellationToken.ThrowIfCancellationRequested(); - - var sheetEntry = GetSheetEntry(sheetName); - var withoutCr = false; - - var mergeCellsContext = new MergeCellsContext(); - if (_config.FillMergedCells) - { - await TryGetMergeCellsAsync(sheetEntry, mergeCellsContext, cancellationToken).ConfigureAwait(false); - } - var mergeCells = _config.FillMergedCells ? mergeCellsContext.MergeCells : null; - - // Direct XML reading without dictionary creation - var xmlSettings = new XmlReaderSettings - { - CheckCharacters = false, - IgnoreWhitespace = true, - IgnoreComments = true, - XmlResolver = null, - Async = true - }; - - var sheetStream = await sheetEntry.OpenAsync(cancellationToken).ConfigureAwait(false); - await using var disposableSheetStream = sheetStream.ConfigureAwait(false); - - using var reader = XmlReader.Create(sheetStream, xmlSettings); - - if (!reader.IsStartElement("worksheet", Ns)) - yield break; - - if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false)) - yield break; - - while (!reader.EOF) - { - if (reader.IsStartElement("sheetData", Ns)) - { - if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false)) - continue; - - int rowIndex = -1; - while (!reader.EOF) - { - if (reader.IsStartElement("row", Ns)) - { - if (int.TryParse(reader.GetAttribute("r"), out int arValue)) - rowIndex = arValue - 1; // The row attribute is 1-based - else - rowIndex++; - - // Read row directly into mapped structure - await foreach (var mappedRow in ReadMappedRowAsync(reader, rowIndex, withoutCr, mergeCells, cancellationToken).ConfigureAwait(false)) - { - yield return mappedRow; - } - } - else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false)) - { - break; - } - } - } - else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false)) - { - break; - } - } - } - - [CreateSyncVersion] - private async IAsyncEnumerable ReadMappedRowAsync( - XmlReader reader, - int rowIndex, - bool withoutCr, - MergeCells? mergeCells, - [EnumeratorCancellation] CancellationToken cancellationToken = default) - { - if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false)) - { - // Empty row - yield return new MappedRow(rowIndex); - yield break; - } - - var row = new MappedRow(rowIndex); - var columnIndex = withoutCr ? -1 : 0; - - while (!reader.EOF) - { - if (reader.IsStartElement("c", Ns)) - { - var aS = reader.GetAttribute("s"); - var aR = reader.GetAttribute("r"); - var aT = reader.GetAttribute("t"); - - var cellAndColumn = await ReadCellAndSetColumnIndexAsync(reader, columnIndex, withoutCr, 0, aR, aT, cancellationToken).ConfigureAwait(false); - var cellValue = cellAndColumn.CellValue; - columnIndex = cellAndColumn.ColumnIndex; - - if (_config.FillMergedCells && mergeCells is not null) - { - if (mergeCells.MergesValues.ContainsKey(aR)) - { - mergeCells.MergesValues[aR] = cellValue; - } - else if (mergeCells.MergesMap.TryGetValue(aR, out var mergeKey)) - { - mergeCells.MergesValues.TryGetValue(mergeKey, out cellValue); - } - } - - if (!string.IsNullOrEmpty(aS)) // Custom style - { - if (int.TryParse(aS, NumberStyles.Any, CultureInfo.InvariantCulture, out var styleIndex)) - { - _style ??= new OpenXmlStyles(Archive); - cellValue = _style.ConvertValueByStyleFormat(styleIndex, cellValue); - } - } - - row.SetCell(columnIndex, cellValue); - } - else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false)) - { - break; - } - } - - yield return row; - } - public void Dispose() { - Dispose(true); - GC.SuppressFinalize(this); - } + if (_disposed) + return; - protected void Dispose(bool disposing) - { - if (!_disposed) - { - if (disposing) - { - if (SharedStrings is SharedStringsDiskCache cache) - { - cache.Dispose(); - } - } + if (SharedStrings is SharedStringsDiskCache cache) + cache.Dispose(); - _disposed = true; - } - } - - ~OpenXmlReader() - { - Dispose(false); + _disposed = true; } } diff --git a/src/MiniExcel.OpenXml/Styles/Builder/SheetStyleBuildContext.cs b/src/MiniExcel.OpenXml/Styles/Builder/SheetStyleBuildContext.cs index 83d3ccbe..35cd1826 100644 --- a/src/MiniExcel.OpenXml/Styles/Builder/SheetStyleBuildContext.cs +++ b/src/MiniExcel.OpenXml/Styles/Builder/SheetStyleBuildContext.cs @@ -49,7 +49,7 @@ public async Task CreateAsync(SheetStyleElementInfos generatedElementInfos, Canc { var oldStyleXmlStream = await styleEntry.OpenAsync(cancellationToken).ConfigureAwait(false); await using var disposableStream = oldStyleXmlStream.ConfigureAwait(false); - using var reader = XmlReader.Create(oldStyleXmlStream, XmlReaderHelper.GetXmlReaderSettings(isAsync)); + using var reader = XmlReader.Create(oldStyleXmlStream, XmlReaderHelper.GetXmlReaderSettings()); infos = await ReadSheetStyleElementInfosAsync(reader, cancellationToken).ConfigureAwait(false); } @@ -67,20 +67,13 @@ public async Task InitializeAsync(SheetStyleElementInfos generatedElementInfos, if (_initialized) throw new InvalidOperationException("The context has already been initialized."); - const bool isAsync = -#if SYNC_ONLY - false; -#else - true; -#endif - GeneratedElementInfos = generatedElementInfos; _oldStyleXmlZipEntry = _archive.Mode == ZipArchiveMode.Update ? _archive.Entries.SingleOrDefault(s => s.FullName == ExcelFileNames.Styles) : null; - var xmlReaderSettings = XmlReaderHelper.GetXmlReaderSettings(isAsync); + var xmlReaderSettings = XmlReaderHelper.GetXmlReaderSettings(); if (_oldStyleXmlZipEntry is not null) { var oldStyleXmlStream = await _oldStyleXmlZipEntry.OpenAsync(cancellationToken).ConfigureAwait(false); @@ -104,7 +97,16 @@ public async Task InitializeAsync(SheetStyleElementInfos generatedElementInfos, } _newXmlWriterStream = await _newStyleXmlZipEntry.OpenAsync(cancellationToken).ConfigureAwait(false); - NewXmlWriter = XmlWriter.Create(_newXmlWriterStream, new XmlWriterSettings { Indent = true, Encoding = _encoding, Async = isAsync }); + NewXmlWriter = XmlWriter.Create( + _newXmlWriterStream, + new XmlWriterSettings + { + Indent = true, + Encoding = _encoding, +#if !SYNC_ONLY + Async = true +#endif + }); _initialized = true; } diff --git a/src/MiniExcel.OpenXml/Styles/OpenXmlStyles.cs b/src/MiniExcel.OpenXml/Styles/OpenXmlStyles.cs index 70937eb3..110d7885 100644 --- a/src/MiniExcel.OpenXml/Styles/OpenXmlStyles.cs +++ b/src/MiniExcel.OpenXml/Styles/OpenXmlStyles.cs @@ -2,7 +2,7 @@ namespace MiniExcelLib.OpenXml.Styles; -internal class OpenXmlStyles +internal partial class OpenXmlStyles { private static readonly string[] Ns = [Schemas.SpreadsheetmlXmlMain, Schemas.SpreadsheetmlXmlStrictNs]; @@ -10,22 +10,30 @@ internal class OpenXmlStyles private readonly Dictionary _cellStyleXfs = new(); private readonly Dictionary _customFormats = new(); - public OpenXmlStyles(OpenXmlZip zip) + private OpenXmlStyles() { } + + [CreateSyncVersion] + internal static async Task CreateAsync(OpenXmlZip zip, CancellationToken cancellationToken = default) { - using var reader = zip.GetXmlReader(ExcelFileNames.Styles); - if (reader is null) - throw new InvalidDataException("The OpenXml styles could not be found, the file might be malformed."); - - if (!reader.IsStartElement("styleSheet", Ns)) - return; - if (!reader.ReadFirstContent()) - return; + if (zip.GetEntry(ExcelFileNames.Styles) is not { } entry) + throw new InvalidDataException("The OpenXml styles.xml file could not be found, the document might be malformed."); + + var entryStream = await entry.OpenAsync(cancellationToken).ConfigureAwait(false); + await using var disposableEntryStream = entryStream.ConfigureAwait(false); + using var reader = XmlReader.Create(entryStream, XmlReaderHelper.GetXmlReaderSettings()); + var openXmlStyles = new OpenXmlStyles(); + if (!reader.IsStartElement("styleSheet", Ns) || + !await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false)) + { + return openXmlStyles; + } + while (!reader.EOF) { if (reader.IsStartElement("cellXfs", Ns)) { - if (!XmlReaderHelper.ReadFirstContent(reader)) + if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false)) continue; var index = 0; @@ -35,17 +43,17 @@ public OpenXmlStyles(OpenXmlZip zip) { int.TryParse(reader.GetAttribute("xfId"), out var xfId); int.TryParse(reader.GetAttribute("numFmtId"), out var numFmtId); - _cellXfs.Add(index, new StyleRecord() { XfId = xfId, NumFmtId = numFmtId }); - reader.Skip(); + openXmlStyles._cellXfs.Add(index, new StyleRecord { XfId = xfId, NumFmtId = numFmtId }); + await reader.SkipAsync().ConfigureAwait(false); index++; } - else if (!reader.SkipContent()) + else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false)) break; } } else if (reader.IsStartElement("cellStyleXfs", Ns)) { - if (!reader.ReadFirstContent()) + if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false)) continue; var index = 0; @@ -56,11 +64,11 @@ public OpenXmlStyles(OpenXmlZip zip) int.TryParse(reader.GetAttribute("xfId"), out var xfId); int.TryParse(reader.GetAttribute("numFmtId"), out var numFmtId); - _cellStyleXfs.Add(index, new StyleRecord() { XfId = xfId, NumFmtId = numFmtId }); - reader.Skip(); + openXmlStyles._cellStyleXfs.Add(index, new StyleRecord() { XfId = xfId, NumFmtId = numFmtId }); + await reader.SkipAsync().ConfigureAwait(false); index++; } - else if (!reader.SkipContent()) + else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false)) { break; } @@ -68,7 +76,7 @@ public OpenXmlStyles(OpenXmlZip zip) } else if (reader.IsStartElement("numFmts", Ns)) { - if (!reader.ReadFirstContent()) + if (!await reader.ReadFirstContentAsync(cancellationToken).ConfigureAwait(false)) continue; while (!reader.EOF) @@ -85,20 +93,22 @@ public OpenXmlStyles(OpenXmlZip zip) type = typeof(DateTime?); } - _customFormats.TryAdd(numFmtId, new NumberFormatString(formatCode, type)); - reader.Skip(); + openXmlStyles._customFormats.TryAdd(numFmtId, new NumberFormatString(formatCode, type)); + await reader.SkipAsync().ConfigureAwait(false); } - else if (!reader.SkipContent()) + else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false)) { break; } } } - else if (!reader.SkipContent()) + else if (!await reader.SkipContentAsync(cancellationToken).ConfigureAwait(false)) { break; } } + + return openXmlStyles; } internal NumberFormatString? GetStyleFormat(int index) diff --git a/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.Impl.cs b/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.Impl.cs index fa170553..367704b2 100644 --- a/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.Impl.cs +++ b/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.Impl.cs @@ -112,7 +112,7 @@ private void GetMergeCells(XElement worksheet) } } - private static IEnumerable NewParseConditionalFormatRanges(XElement worksheet) + private static IEnumerable ParseConditionalFormatRanges(XElement worksheet) { var conditionalFormatting = worksheet.Element(SpreadsheetNs + "conditionalFormatting"); if (conditionalFormatting is null) @@ -173,7 +173,7 @@ private async Task WriteSheetXmlAsync(XmlWriter writer, XElement worksheet, XEle { // TODO: Can we make this less complex? - var conditionalFormatRanges = NewParseConditionalFormatRanges(worksheet).ToList(); + var conditionalFormatRanges = ParseConditionalFormatRanges(worksheet).ToList(); var newConditionalFormatRanges = new List(); newConditionalFormatRanges.AddRange(conditionalFormatRanges); diff --git a/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.MergeCells.cs b/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.MergeCells.cs index 1ff7f05d..334db75e 100644 --- a/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.MergeCells.cs +++ b/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.MergeCells.cs @@ -1,3 +1,5 @@ +using MiniExcelLib.OpenXml.Reader; + namespace MiniExcelLib.OpenXml.Templates; internal partial class OpenXmlTemplate diff --git a/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.cs b/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.cs index d14b36b5..e7d71c91 100644 --- a/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.cs +++ b/src/MiniExcel.OpenXml/Templates/OpenXmlTemplate.cs @@ -1,3 +1,4 @@ +using MiniExcelLib.OpenXml.Reader; using CalcChainHelper = MiniExcelLib.OpenXml.Utils.CalcChainHelper; namespace MiniExcelLib.OpenXml.Templates; diff --git a/src/MiniExcel.OpenXml/Utils/OpenXmlZip.cs b/src/MiniExcel.OpenXml/Utils/OpenXmlZip.cs index 28387d13..f32027d4 100644 --- a/src/MiniExcel.OpenXml/Utils/OpenXmlZip.cs +++ b/src/MiniExcel.OpenXml/Utils/OpenXmlZip.cs @@ -5,13 +5,6 @@ namespace MiniExcelLib.OpenXml.Utils; /// Copied & modified from ExcelDataReader ZipWorker @MIT License internal sealed partial class OpenXmlZip : IDisposable, IAsyncDisposable { - private static readonly XmlReaderSettings XmlSettings = new() - { - IgnoreComments = true, - IgnoreWhitespace = true, - XmlResolver = null, - }; - private bool _disposed; internal ZipArchive ZipFile { get; } @@ -51,10 +44,6 @@ internal static async Task CreateAsync(Stream fileStream, ZipArchive public ZipArchiveEntry? GetEntry(string path) => Entries.GetValueOrDefault(path); - public XmlReader? GetXmlReader(string path) => GetEntry(path) is { } entry - ? XmlReader.Create(entry.Open(), XmlSettings) - : null; - public void Dispose() { diff --git a/src/MiniExcel.OpenXml/Utils/XmlReaderHelper.cs b/src/MiniExcel.OpenXml/Utils/XmlReaderHelper.cs index 2266bfb8..dacd9e0b 100644 --- a/src/MiniExcel.OpenXml/Utils/XmlReaderHelper.cs +++ b/src/MiniExcel.OpenXml/Utils/XmlReaderHelper.cs @@ -141,13 +141,7 @@ private static async Task ReadRichTextRunAsync(this XmlReader reader, Ca [CreateSyncVersion] public static async IAsyncEnumerable GetSharedStringsAsync(Stream stream, [EnumeratorCancellation]CancellationToken cancellationToken = default, params string[] nss) { - var xmlSettings = GetXmlReaderSettings( -#if SYNC_ONLY - false -#else - true -#endif - ); + var xmlSettings = GetXmlReaderSettings(); using var reader = XmlReader.Create(stream, xmlSettings); if (!reader.IsStartElement("sst", nss)) @@ -170,11 +164,14 @@ public static async IAsyncEnumerable GetSharedStringsAsync(Stream stream } } - internal static XmlReaderSettings GetXmlReaderSettings(bool async) => new() + internal static XmlReaderSettings GetXmlReaderSettings(bool forceSynchronous = false) => new() { + CheckCharacters = false, IgnoreComments = true, IgnoreWhitespace = true, XmlResolver = null, - Async = async +#if !SYNC_ONLY + Async = !forceSynchronous +#endif }; } diff --git a/src/MiniExcel.OpenXml/Writer/OpenXmlWriter.CopyInsert.cs b/src/MiniExcel.OpenXml/Writer/OpenXmlWriter.CopyInsert.cs index 24b0f3aa..cef48071 100644 --- a/src/MiniExcel.OpenXml/Writer/OpenXmlWriter.CopyInsert.cs +++ b/src/MiniExcel.OpenXml/Writer/OpenXmlWriter.CopyInsert.cs @@ -1,3 +1,4 @@ +using MiniExcelLib.OpenXml.Reader; using MiniExcelLib.OpenXml.Styles.Builder; namespace MiniExcelLib.OpenXml.Writer; @@ -48,7 +49,7 @@ public async Task CopyAndInsertAsync(bool overwriteSheet = false, IProgress using var disposableNewArchive = _archive; #endif using var reader = await OpenXmlReader.CreateAsync(_oldStream!, _configuration, cancellationToken: cancellationToken).ConfigureAwait(false); - var rels = await reader.GetWorkbookRelsAsync(_oldArchive!.Entries, cancellationToken).ConfigureAwait(false) ?? []; + var rels = await OpenXmlReader.GetWorkbookRelsAsync(_oldArchive!.Entries, cancellationToken).ConfigureAwait(false) ?? []; _sheets.AddRange(rels .OrderBy(sheet => sheet.Id) diff --git a/src/MiniExcel.OpenXml/Writer/OpenXmlWriter.cs b/src/MiniExcel.OpenXml/Writer/OpenXmlWriter.cs index e59694df..a0af25a3 100644 --- a/src/MiniExcel.OpenXml/Writer/OpenXmlWriter.cs +++ b/src/MiniExcel.OpenXml/Writer/OpenXmlWriter.cs @@ -1,9 +1,10 @@ using MiniExcelLib.Core.WriteAdapters; +using MiniExcelLib.OpenXml.Reader; using MiniExcelLib.OpenXml.Styles.Builder; namespace MiniExcelLib.OpenXml.Writer; -internal partial class OpenXmlWriter : IMiniExcelWriter +internal sealed partial class OpenXmlWriter : IMiniExcelWriter { private static readonly UTF8Encoding Utf8WithBom = new(true); @@ -105,7 +106,7 @@ public async Task InsertAsync(bool overwriteSheet = false, IProgress? await using var sbc = _sheetStyleBuilderContext.ConfigureAwait(false); using var reader = await OpenXmlReader.CreateAsync(_stream, _configuration, cancellationToken: cancellationToken).ConfigureAwait(false); - var rels = await reader.GetWorkbookRelsAsync(_archive.Entries, cancellationToken).ConfigureAwait(false) ?? []; + var rels = await OpenXmlReader.GetWorkbookRelsAsync(_archive.Entries, cancellationToken).ConfigureAwait(false) ?? []; _sheets.AddRange(rels .OrderBy(sheet => sheet.Id) @@ -674,7 +675,7 @@ private async Task CreateZipEntryAsync(string path, string? contentType, string [CreateSyncVersion] /* Todo: this method is not very efficient, but workbook.xml is generally a very small file so at the moment it's not worth over-optimizing it. - Also, consider adding active sheet as one of the editable properties. */ + Also, consider adding active sheet as one of the editable properties.*/ internal async Task AlterWorksheetAsync(string sheetName, string? newSheetName, int? newSheetIndex, SheetState? newSheetState, CancellationToken cancellationToken = default) { if (newSheetName is null && newSheetIndex is null && newSheetState is null) diff --git a/tests/MiniExcel.OpenXml.Tests/MiniExcelIssueAsyncTests.cs b/tests/MiniExcel.OpenXml.Tests/MiniExcelIssueAsyncTests.cs index 981feab2..be0bad0c 100644 --- a/tests/MiniExcel.OpenXml.Tests/MiniExcelIssueAsyncTests.cs +++ b/tests/MiniExcel.OpenXml.Tests/MiniExcelIssueAsyncTests.cs @@ -953,7 +953,7 @@ public async Task Issue193() }; await _excelTemplater.FillTemplateAsync(path, templatePath, value); - foreach (var sheetName in await _excelImporter.GetSheetNamesAsync(path)) + foreach (var sheetName in await _excelImporter.GetSheetNamesAsync(path)) { var rows = await _excelImporter.QueryAsync(path, sheetName: sheetName).ToListAsync(); Assert.Equal(9, rows.Count); diff --git a/tests/MiniExcel.OpenXml.Tests/MiniExcelOpenXmlConfigurationTest.cs b/tests/MiniExcel.OpenXml.Tests/MiniExcelOpenXmlConfigurationTest.cs index 251d7396..ca540042 100644 --- a/tests/MiniExcel.OpenXml.Tests/MiniExcelOpenXmlConfigurationTest.cs +++ b/tests/MiniExcel.OpenXml.Tests/MiniExcelOpenXmlConfigurationTest.cs @@ -23,7 +23,7 @@ public async Task DisableWriteFilePathTest() await _excelExporter.ExportAsync(path, value, configuration: new OpenXmlConfiguration { EnableWriteFilePath = false }, overwriteFile: true); Assert.True(File.Exists(path)); - var rows = await _excelImporter.QueryAsync(path).CreateListAsync(); + var rows = await _excelImporter.QueryAsync(path).ToListAsync(); Assert.True(rows.All(x => x.Img is null or [])); } diff --git a/tests/MiniExcel.OpenXml.Tests/Tables/MiniExcelOpenXmlTableAsyncTests.cs b/tests/MiniExcel.OpenXml.Tests/Tables/MiniExcelOpenXmlTableAsyncTests.cs new file mode 100644 index 00000000..4dad49db --- /dev/null +++ b/tests/MiniExcel.OpenXml.Tests/Tables/MiniExcelOpenXmlTableAsyncTests.cs @@ -0,0 +1,132 @@ +using MiniExcelLib.Tests.Common.Utils; + +namespace MiniExcelLib.OpenXml.Tests.Tables; + +public class MiniExcelOpenXmlTableAsyncTests +{ + private readonly OpenXmlImporter _excelImporter = MiniExcel.Importers.GetOpenXmlImporter(); + + /// + /// Tests querying a named table from a file path with dynamic results. + /// + [Fact] + public async Task QueryTableAsync_FromFilePath_ReturnsDynamicRows() + { + // Arrange + var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx"); + + // Act + var rows = await _excelImporter.QueryTableAsync(path, "Sheet1", "Table1").ToListAsync(); + + // Assert + Assert.Equal(3, rows.Count); + Assert.Equal("aaa", rows[0].Col1); + Assert.Equal(123D, rows[0].Col2); + Assert.Equal(new DateTime(2026, 5, 17), rows[0].Col3); + } + + /// + /// Tests querying a named table from a stream with dynamic results. + /// + [Fact] + public async Task QueryTableAsync_FromStream_ReturnsDynamicRows() + { + // Arrange + var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx"); + await using var stream = File.OpenRead(path); + + // Act + var rows = await _excelImporter.QueryTableAsync(stream, "Sheet1", "Table1").ToListAsync(); + + // Assert + Assert.Equal(3, rows.Count); + Assert.Equal("bbb", rows[1].Col1); + Assert.Equal(456D, rows[1].Col2); + Assert.Equal(new DateTime(2026, 5, 18), rows[1].Col3); + } + + /// + /// Tests querying a named table from a file path with strongly-typed results. + /// + [Fact] + public async Task QueryTableAsync_Generic_FromFilePath_ReturnsTypedRows() + { + // Arrange + var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx"); + + // Act + var rows = await _excelImporter.QueryTableAsync(path, "Sheet1", "Table1").ToListAsync(); + + // Assert + Assert.Equal(3, rows.Count); + Assert.Equal("aaa", rows[0].Col1); + Assert.Equal(123D, rows[0].Col2); + Assert.Equal(new DateTime(2026, 5, 17), rows[0].Col3); + } + + /// + /// Tests querying a named table from a stream with strongly-typed results. + /// + [Fact] + public async Task QueryTableAsync_Generic_FromStream_ReturnsTypedRows() + { + // Arrange + var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx"); + await using var stream = File.OpenRead(path); + + // Act + var rows = await _excelImporter.QueryTableAsync(stream).ToListAsync(); + + // Assert + Assert.Equal(3, rows.Count); + Assert.Equal("ccc", rows[2].Col1); + Assert.Equal(789D, rows[2].Col2); + Assert.Equal(new DateTime(2026, 5, 19), rows[2].Col3); + } + + /// + /// Tests querying multiple tables from the same sheet. + /// + [Fact] + public async Task QueryTableAsync_MultipleTablesInSheet_ReturnsCorrectTableData() + { + // Arrange + var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx"); + + // Act + var table1 = await _excelImporter.QueryTableAsync(path).ToListAsync(); + var table2 = await _excelImporter.QueryTableAsync(path, "Sheet1", "Table2").ToListAsync(); + + // Assert + Assert.NotEmpty(table1); + Assert.NotEmpty(table2); + + // Assert + Assert.Equal(3, table1.Count); + Assert.Equal("aaa", table1[0].Col1); + Assert.Equal(123D, table1[0].Col2); + Assert.Equal(new DateTime(2026, 5, 17), table1[0].Col3); + + Assert.Equal(2, table2.Count); + Assert.Equal("test", table2[0].Prop1); + Assert.Equal(11D, table2[0].Prop2); + Assert.Equal("aaa", table2[0].Prop3); + Assert.Equal(new TimeSpan(10, 30, 0), table2[0].Prop4.TimeOfDay); + } + + /// + /// Tests QueryTableAsync with custom sheet and table names. + /// + [Fact] + public async Task QueryTableAsync_WithCustomSheetAndTableNames_ReturnsCorrectData() + { + // Arrange + var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx"); + + // Act + var rows = await _excelImporter.QueryTableAsync(path, "CustomSheet", "CustomTable").ToListAsync(); + + // Assert + Assert.NotEmpty(rows); + } +} diff --git a/tests/MiniExcel.OpenXml.Tests/Tables/MiniExcelOpenXmlTableTests.cs b/tests/MiniExcel.OpenXml.Tests/Tables/MiniExcelOpenXmlTableTests.cs new file mode 100644 index 00000000..f282521a --- /dev/null +++ b/tests/MiniExcel.OpenXml.Tests/Tables/MiniExcelOpenXmlTableTests.cs @@ -0,0 +1,133 @@ +using MiniExcelLib.Tests.Common.Utils; + +namespace MiniExcelLib.OpenXml.Tests.Tables; + +public class MiniExcelOpenXmlTableTests +{ + private readonly OpenXmlImporter _excelImporter = MiniExcel.Importers.GetOpenXmlImporter(); + + + /// + /// Tests querying a named table from a file path with dynamic results. + /// + [Fact] + public void QueryTable_FromFilePath_ReturnsDynamicRows() + { + // Arrange + var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx"); + + // Act + var rows = _excelImporter.QueryTable(path, "Sheet1", "Table1").ToList(); + + // Assert + Assert.Equal(3, rows.Count); + Assert.Equal("aaa", rows[0].Col1); + Assert.Equal(123D, rows[0].Col2); + Assert.Equal(new DateTime(2026, 5, 17), rows[0].Col3); + } + + /// + /// Tests querying a named table from a stream with dynamic results. + /// + [Fact] + public void QueryTable_FromStream_ReturnsDynamicRows() + { + // Arrange + var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx"); + using var stream = File.OpenRead(path); + + // Act + var rows = _excelImporter.QueryTable(stream, "Sheet1", "Table1").ToList(); + + // Assert + Assert.Equal(3, rows.Count); + Assert.Equal("bbb", rows[1].Col1); + Assert.Equal(456D, rows[1].Col2); + Assert.Equal(new DateTime(2026, 5, 18), rows[1].Col3); + } + + /// + /// Tests querying a named table from a file path with strongly-typed results. + /// + [Fact] + public void QueryTable_Generic_FromFilePath_ReturnsTypedRows() + { + // Arrange + var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx"); + + // Act + var rows = _excelImporter.QueryTable(path, "Sheet1", "Table1").ToList(); + + // Assert + Assert.Equal(3, rows.Count); + Assert.Equal("aaa", rows[0].Col1); + Assert.Equal(123D, rows[0].Col2); + Assert.Equal(new DateTime(2026, 5, 17), rows[0].Col3); + } + + /// + /// Tests querying a named table from a stream with strongly-typed results. + /// + [Fact] + public void QueryTable_Generic_FromStream_ReturnsTypedRows() + { + // Arrange + var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx"); + using var stream = File.OpenRead(path); + + // Act + var rows = _excelImporter.QueryTable(stream).ToList(); + + // Assert + Assert.Equal(3, rows.Count); + Assert.Equal("ccc", rows[2].Col1); + Assert.Equal(789D, rows[2].Col2); + Assert.Equal(new DateTime(2026, 5, 19), rows[2].Col3); + } + + /// + /// Tests querying multiple tables from the same sheet. + /// + [Fact] + public void QueryTable_MultipleTablesInSheet_ReturnsCorrectTableData() + { + // Arrange + var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx"); + + // Act + var table1 = _excelImporter.QueryTable(path).ToList(); + var table2 = _excelImporter.QueryTable(path, "Sheet1", "Table2").ToList(); + + // Assert + Assert.NotEmpty(table1); + Assert.NotEmpty(table2); + + // Assert + Assert.Equal(3, table1.Count); + Assert.Equal("aaa", table1[0].Col1); + Assert.Equal(123D, table1[0].Col2); + Assert.Equal(new DateTime(2026, 5, 17), table1[0].Col3); + + Assert.Equal(2, table2.Count); + Assert.Equal("test", table2[0].Prop1); + Assert.Equal(11D, table2[0].Prop2); + Assert.Equal("aaa", table2[0].Prop3); + Assert.Equal(new TimeSpan(10, 30, 0), table2[0].Prop4.TimeOfDay); + } + + /// + /// Tests QueryTable with custom sheet and table names. + /// + [Fact] + public void QueryTable_WithCustomSheetAndTableNames_ReturnsCorrectData() + { + // Arrange + var path = PathHelper.GetFile("xlsx/TestQueryTable.xlsx"); + + // Act + var rows = _excelImporter.QueryTable(path, "CustomSheet", "CustomTable").ToList(); + + // Assert + Assert.NotEmpty(rows); + } +} diff --git a/tests/MiniExcel.OpenXml.Tests/Tables/Models.cs b/tests/MiniExcel.OpenXml.Tests/Tables/Models.cs new file mode 100644 index 00000000..674e6df6 --- /dev/null +++ b/tests/MiniExcel.OpenXml.Tests/Tables/Models.cs @@ -0,0 +1,8 @@ +namespace MiniExcelLib.OpenXml.Tests.Tables; + +internal class QueryTableTestModel +{ + public string? Col1 { get; set; } + public int Col2 { get; set; } + public DateTime Col3 { get; set; } +} diff --git a/tests/data/xlsx/TestQueryTable.xlsx b/tests/data/xlsx/TestQueryTable.xlsx new file mode 100644 index 00000000..c5039015 Binary files /dev/null and b/tests/data/xlsx/TestQueryTable.xlsx differ