From 9091e1d6c4426223932391b5f771bf414e43914a Mon Sep 17 00:00:00 2001 From: alinpahontu2912 Date: Fri, 10 Apr 2026 12:32:39 +0200 Subject: [PATCH 1/3] set up compression guide --- docs/fundamentals/toc.yml | 2 + .../zip-tar-best-practices/csharp/Program.cs | 254 ++++++++++++++ .../csharp/Project.csproj | 11 + docs/standard/io/zip-tar-best-practices.md | 313 ++++++++++++++++++ 4 files changed, 580 insertions(+) create mode 100644 docs/standard/io/snippets/zip-tar-best-practices/csharp/Program.cs create mode 100644 docs/standard/io/snippets/zip-tar-best-practices/csharp/Project.csproj create mode 100644 docs/standard/io/zip-tar-best-practices.md diff --git a/docs/fundamentals/toc.yml b/docs/fundamentals/toc.yml index 3394d08173dff..0f12cf034b3b4 100644 --- a/docs/fundamentals/toc.yml +++ b/docs/fundamentals/toc.yml @@ -967,6 +967,8 @@ items: href: ../standard/io/how-to-add-or-remove-access-control-list-entries.md - name: "How to: Compress and Extract Files" href: ../standard/io/how-to-compress-and-extract-files.md + - name: ZIP and TAR best practices + href: ../standard/io/zip-tar-best-practices.md - name: Composing Streams href: ../standard/io/composing-streams.md - name: "How to: Convert Between .NET Framework Streams and Windows Runtime Streams" diff --git a/docs/standard/io/snippets/zip-tar-best-practices/csharp/Program.cs b/docs/standard/io/snippets/zip-tar-best-practices/csharp/Program.cs new file mode 100644 index 0000000000000..557b1452a3c20 --- /dev/null +++ b/docs/standard/io/snippets/zip-tar-best-practices/csharp/Program.cs @@ -0,0 +1,254 @@ +using System.Formats.Tar; +using System.IO.Compression; +// +void SafeExtractEntry(ZipArchiveEntry entry, string destinationPath, long maxDecompressedSize) +{ + // Check the declared uncompressed size first (can be spoofed, but is a fast first check). + if (entry.Length > maxDecompressedSize) + { + throw new InvalidOperationException( + $"Entry '{entry.FullName}' declares size {entry.Length}, exceeding limit {maxDecompressedSize}."); + } + + using Stream source = entry.Open(); + using FileStream destination = File.Create(destinationPath); + + byte[] buffer = new byte[81920]; + long totalBytesRead = 0; + int bytesRead; + + while ((bytesRead = source.Read(buffer, 0, buffer.Length)) > 0) + { + totalBytesRead += bytesRead; + if (totalBytesRead > maxDecompressedSize) + { + throw new InvalidOperationException( + $"Extraction of '{entry.FullName}' exceeded limit of {maxDecompressedSize} bytes."); + } + destination.Write(buffer, 0, bytesRead); + } +} +// + +// +void SafeExtractArchive(ZipArchive archive, string destinationDir, + long maxTotalSize, int maxEntryCount) +{ + // Some zip bombs contain millions of tiny entries (e.g., "42.zip"). + if (archive.Entries.Count > maxEntryCount) + { + throw new InvalidOperationException("Archive contains an excessive number of entries."); + } + + long totalExtracted = 0; + foreach (ZipArchiveEntry entry in archive.Entries) + { + totalExtracted += entry.Length; + if (totalExtracted > maxTotalSize) + { + throw new InvalidOperationException( + $"Archive total decompressed size exceeds the allowed limit of {maxTotalSize} bytes."); + } + // ... extract each entry with per-entry limits too + } +} +// + +// +void ValidatePaths(ZipArchive archive, string destinationDir) +{ + string fullDestDir = Path.GetFullPath(destinationDir); + if (!fullDestDir.EndsWith(Path.DirectorySeparatorChar)) + fullDestDir += Path.DirectorySeparatorChar; + + foreach (ZipArchiveEntry entry in archive.Entries) + { + string destPath = Path.GetFullPath(Path.Combine(fullDestDir, entry.FullName)); + + if (!destPath.StartsWith(fullDestDir, StringComparison.Ordinal)) + throw new IOException( + $"Entry '{entry.FullName}' would extract outside the destination directory."); + + // ... safe to extract + } +} +// + +// +void DangerousExtract(string extractDir) +{ + // ⚠️ DANGEROUS: entry.FullName could contain "../" sequences + using ZipArchive archive = ZipFile.OpenRead("archive.zip"); + foreach (ZipArchiveEntry entry in archive.Entries) + { + string destinationPath = Path.Combine(extractDir, entry.FullName); + entry.ExtractToFile(destinationPath, overwrite: true); // NO path validation! + } +} +// + +// +void SafeExtractZip(string archivePath, string destinationDir, + long maxTotalSize, long maxEntrySize, int maxEntryCount) +{ + // Resolve the destination to an absolute path and ensure it ends with a + // directory separator. This trailing separator is essential — without it, + // the StartsWith check below could be tricked by paths like + // "/safe-dir-evil/" matching "/safe-dir". + string fullDestDir = Path.GetFullPath(destinationDir); + if (!fullDestDir.EndsWith(Path.DirectorySeparatorChar)) + fullDestDir += Path.DirectorySeparatorChar; + + Directory.CreateDirectory(fullDestDir); + + using var archive = new ZipArchive(File.OpenRead(archivePath), ZipArchiveMode.Read); + + // Check the entry count up front. ZIP central directory is read eagerly, + // so archive.Entries.Count is available immediately without iterating. + if (archive.Entries.Count > maxEntryCount) + throw new InvalidOperationException("Archive contains too many entries."); + + long totalSize = 0; + foreach (ZipArchiveEntry entry in archive.Entries) + { + // Enforce per-entry and cumulative size limits using the declared + // uncompressed size. Note: this value is read from the archive header + // and could be spoofed by a malicious archive — for defense in depth, + // also monitor actual bytes read during decompression (see the zip + // bomb section for a streaming size check example). + totalSize += entry.Length; + if (entry.Length > maxEntrySize) + throw new InvalidOperationException( + $"Entry '{entry.FullName}' exceeds per-entry size limit."); + if (totalSize > maxTotalSize) + throw new InvalidOperationException("Archive exceeds total size limit."); + + // Resolve the full destination path using Path.GetFullPath, which + // normalizes away any "../" segments. Then verify the result still + // starts with the destination directory. + string destPath = Path.GetFullPath(Path.Combine(fullDestDir, entry.FullName)); + if (!destPath.StartsWith(fullDestDir, StringComparison.Ordinal)) + throw new IOException( + $"Entry '{entry.FullName}' would extract outside the destination."); + + // ZIP uses a convention where directory entries have names ending in '/'. + // Path.GetFileName returns empty for these, so we use that to + // distinguish directories from files. + if (string.IsNullOrEmpty(Path.GetFileName(destPath))) + { + Directory.CreateDirectory(destPath); + } + else + { + // Ensure the parent directory exists before extracting the file. + Directory.CreateDirectory(Path.GetDirectoryName(destPath)!); + entry.ExtractToFile(destPath, overwrite: false); + } + } +} +// + +// +void SafeExtractTar(Stream archiveStream, string destinationDir, + long maxTotalSize, long maxEntrySize, int maxEntryCount) +{ + // Same trailing-separator technique as the ZIP example. + string fullDestDir = Path.GetFullPath(destinationDir); + if (!fullDestDir.EndsWith(Path.DirectorySeparatorChar)) + fullDestDir += Path.DirectorySeparatorChar; + + Directory.CreateDirectory(fullDestDir); + + using var reader = new TarReader(archiveStream); + TarEntry? entry; + long totalSize = 0; + int entryCount = 0; + + // TAR has no central directory — entries are read one at a time. + // GetNextEntry() returns null when the archive is exhausted. + while ((entry = reader.GetNextEntry()) is not null) + { + if (++entryCount > maxEntryCount) + throw new InvalidOperationException("Archive contains too many entries."); + + if (entry.Length > maxEntrySize) + throw new InvalidOperationException( + $"Entry '{entry.Name}' exceeds per-entry size limit."); + totalSize += entry.Length; + if (totalSize > maxTotalSize) + throw new InvalidOperationException("Archive exceeds total size limit."); + + // Symbolic links and hard links can be used to write files outside the + // extraction directory or to overwrite sensitive files. The safest + // approach for untrusted input is to skip them entirely. + if (entry.EntryType is TarEntryType.SymbolicLink or TarEntryType.HardLink) + continue; + + // Global extended attributes are PAX metadata entries that apply to all + // subsequent entries. They contain no file data and should be skipped. + if (entry.EntryType is TarEntryType.GlobalExtendedAttributes) + continue; + + // Normalize and validate the path, same as the ZIP example. + string destPath = Path.GetFullPath(Path.Join(fullDestDir, entry.Name)); + if (!destPath.StartsWith(fullDestDir, StringComparison.Ordinal)) + throw new IOException( + $"Entry '{entry.Name}' would extract outside the destination."); + + if (entry.EntryType is TarEntryType.Directory) + { + Directory.CreateDirectory(destPath); + } + else if (entry.DataStream is not null) + { + // For file entries, copy the data stream to a new file. + // We use entry.DataStream directly instead of ExtractToFile because + // ExtractToFile rejects symbolic and hard link entries (already + // filtered above) and requires a file path rather than a stream. + Directory.CreateDirectory(Path.GetDirectoryName(destPath)!); + using var fileStream = File.Create(destPath); + entry.DataStream.CopyTo(fileStream); + } + } +} +// + +// +void StreamingModify() +{ + // ✅ Streaming approach for large archives + using var input = new ZipArchive(File.OpenRead("large.zip"), ZipArchiveMode.Read); + using var output = new ZipArchive(File.Create("modified.zip"), ZipArchiveMode.Create); + + foreach (var entry in input.Entries) + { + if (ShouldKeep(entry)) + { + var newEntry = output.CreateEntry(entry.FullName); + using var src = entry.Open(); + using var dst = newEntry.Open(); + src.CopyTo(dst); + } + } +} + +bool ShouldKeep(ZipArchiveEntry entry) => true; +// + +// +void TarStreamingRead(Stream archiveStream) +{ + using var reader = new TarReader(archiveStream); + TarEntry? entry; + while ((entry = reader.GetNextEntry()) is not null) + { + if (entry.DataStream is not null) + { + string safePath = "output.bin"; + // Copy now — the stream becomes invalid after the next GetNextEntry() call + using var fileStream = File.Create(safePath); + entry.DataStream.CopyTo(fileStream); + } + } +} +// diff --git a/docs/standard/io/snippets/zip-tar-best-practices/csharp/Project.csproj b/docs/standard/io/snippets/zip-tar-best-practices/csharp/Project.csproj new file mode 100644 index 0000000000000..7cbe0892651d0 --- /dev/null +++ b/docs/standard/io/snippets/zip-tar-best-practices/csharp/Project.csproj @@ -0,0 +1,11 @@ + + + + Exe + net9.0 + enable + enable + + + + diff --git a/docs/standard/io/zip-tar-best-practices.md b/docs/standard/io/zip-tar-best-practices.md new file mode 100644 index 0000000000000..450aa07ac4b1a --- /dev/null +++ b/docs/standard/io/zip-tar-best-practices.md @@ -0,0 +1,313 @@ +--- +title: Best practices for ZIP and TAR archives +description: Learn best practices for working with ZIP and TAR archives in .NET, including API selection, trusted and untrusted extraction patterns, memory management, and platform considerations. +ms.date: 04/10/2026 +ai-usage: ai-assisted +dev_langs: + - "csharp" +helpviewer_keywords: + - "I/O [.NET], compression" + - "compression" + - "ZIP" + - "TAR" + - "zip bomb" + - "path traversal" + - "Zip Slip" + - "archive security" +--- + +# Best practices for working with ZIP and TAR archives in .NET + +.NET provides built-in support for two of the most common archive formats: + +- **ZIP** (`System.IO.Compression`): A compressed archive format that bundles multiple files and directories into a single file. ZIP supports per-entry compression (Deflate, Deflate64, Stored). The primary types are for reading and writing archives, for file-based convenience methods, and `ZipFileExtensions` for extraction helpers. +- **TAR** (`System.Formats.Tar`): A Unix-origin archive format that stores files, directories, and metadata (permissions, ownership, timestamps) without compression. .NET supports the V7, UStar, PAX, and GNU formats. The primary types are and for streaming access, and for file-based convenience methods. TAR is often combined with a compression layer (for example, `GZipStream` for `.tar.gz` files). + +This article helps you choose the right API, use the convenience methods effectively for trusted input, and safely handle untrusted archives. + +## Choose the right API + +.NET offers two tiers of archive APIs. Pick the tier that matches your scenario. + +### Convenience APIs (one-shot operations) + +- `ZipFile.CreateFromDirectory` / `ZipFile.ExtractToDirectory`—create or extract an entire archive in one call. +- `TarFile.CreateFromDirectory` / `TarFile.ExtractToDirectory`—same for TAR. +- Best for: simple workflows with trusted input, quick scripts, build tooling. + +### Streaming APIs (entry-by-entry control) + +- `ZipArchive`—open an archive, iterate entries, read or write selectively. +- `TarReader` / `TarWriter`—sequential entry-by-entry access. +- Best for: large archives, selective extraction, untrusted input, custom processing. + +If you control the archive source (your own build output, known-safe backups), the convenience APIs are the simplest choice. If the archive comes from an external source (user uploads, downloads, network transfers), use the streaming APIs with the safety checks described in this article. + +## Work with trusted archives + +When the archive source is known and trusted, the convenience methods give you a safe, one-line extraction path: + +- `ZipFile.ExtractToDirectory` and `TarFile.ExtractToDirectory` handle path validation automatically—they sanitize entry names, resolve paths, and check directory boundaries. +- Default overwrite behavior is `false`. Always be explicit: + +```csharp +// ✅ Explicit — default is safe +ZipFile.ExtractToDirectory("archive.zip", destDir, overwriteFiles: false); +TarFile.ExtractToDirectory("archive.tar", destDir, overwriteFiles: false); +``` + +- When overwriting is enabled during ZIP extraction, .NET extracts to a temporary file first and only replaces the target after successful extraction—this approach prevents partial corruption if the extraction fails. + +> [!NOTE] +> The convenience methods don't limit decompressed size or entry count. If that matters even for trusted input (for example, very large archives), use the streaming approach described in [Handle untrusted archives safely](#handle-untrusted-archives-safely). + +## Handle untrusted archives safely + +For untrusted input—user uploads, third-party downloads, or network transfers—iterate over entries manually and enforce your own safety checks. The following subsections describe what you need to enforce and why. + +### What the convenience methods don't protect you from + +`ExtractToDirectory` handles path traversal validation, but it doesn't enforce size limits, entry count limits, or filter dangerous TAR entry types. A small compressed file can expand to terabytes of data (known as a *zip bomb*), and TAR archives can contain symbolic links that escape the extraction directory. You must handle these yourself when processing untrusted input. + +### Enforce size and entry count limits + +Neither `ZipArchive` nor `TarReader` limits the total uncompressed size or the number of entries extracted, and neither do the `ExtractToDirectory` convenience methods. You must enforce these limits yourself. + +> [!IMPORTANT] +> A small compressed file can expand to terabytes of data—this is known as a *zip bomb*. Always enforce limits on decompressed size and entry count when extracting untrusted archives. + +#### Track decompressed size during extraction + +:::code language="csharp" source="./snippets/zip-tar-best-practices/csharp/Program.cs" id="SafeExtractEntry"::: + +#### Track aggregate size and entry count + +:::code language="csharp" source="./snippets/zip-tar-best-practices/csharp/Program.cs" id="SafeExtractArchive"::: + +> [!TIP] +> The same approach applies to TAR archives. Since TAR files are read entry-by-entry via `TarReader.GetNextEntry()`, track both the cumulative data size and entry count as you iterate. + +### Validate destination paths (low-level APIs only) + +When you use the streaming APIs, you're responsible for validating every entry's destination path. The low-level APIs perform no path validation at all. + +For every entry, resolve the destination to an absolute path and verify it falls within your target directory: + +:::code language="csharp" source="./snippets/zip-tar-best-practices/csharp/Program.cs" id="PathValidation"::: + +Key points: + +- `Path.GetFullPath()` resolves relative segments like `../` into an absolute path. +- The `StartsWith` check ensures the resolved path is still inside the destination. +- The trailing directory separator on `fullDestDir` is critical—without it, a path like `/safe-dir-evil/file` would incorrectly match `/safe-dir`. + +> [!NOTE] +> `ExtractToDirectory` handles path traversal for you—the runtime sanitizes entry names, resolves paths with `Path.GetFullPath()`, and verifies them with `StartsWith`. You're using the streaming APIs here because of the size-limits issue described above. + +> [!WARNING] +> The following APIs leave you completely unprotected against path traversal. You must validate paths yourself before calling them. + +- `ZipArchiveEntry.ExtractToFile()` writes to whatever path you give it—no sanitization, no boundary check. +- `ZipArchiveEntry.Open()` returns a raw `Stream`—the caller decides where to write. +- `TarEntry.ExtractToFile()` writes to the given path without validating it against any directory boundary. + +**Vulnerable pattern—DO NOT USE without validation:** + +:::code language="csharp" source="./snippets/zip-tar-best-practices/csharp/Program.cs" id="VulnerablePattern"::: + +### Handle symbolic and hard links (TAR) + +TAR archives support symbolic links and hard links, which introduce attack vectors beyond basic path traversal: + +- **Symlink escape:** A symlink entry points to an arbitrary location (for example, `/etc/`), then a subsequent file entry relative to the symlink directory writes through the link to that external location. +- **Hard link to sensitive file:** A hard link target references a file outside the extraction directory, allowing reads or overwrites. + +The safest approach for untrusted archives is to skip link entries entirely: + +```csharp +if (entry.EntryType is TarEntryType.SymbolicLink or TarEntryType.HardLink) + continue; // Skip link entries for untrusted input +``` + +If your use case requires hard links but you want to avoid filesystem-level hard links, `TarHardLinkMode.CopyContents` copies the file content instead of creating a real hard link. This approach eliminates hard-link-based attacks and produces more portable output on Windows. + +For reference, `TarFile.ExtractToDirectory` validates both the entry path and link target path against the destination directory boundary. If either resolves outside, an `IOException` is thrown. `TarEntry.ExtractToFile()` rejects symbolic and hard link entries entirely—it throws `InvalidOperationException`. + +### Complete safe extraction examples + +Combine path traversal validation, size limits, entry count limits, and link handling in a single extraction loop. + +#### ZIP—complete safe extraction + +The following method extracts a ZIP archive while enforcing all recommended safety checks: + +:::code language="csharp" source="./snippets/zip-tar-best-practices/csharp/Program.cs" id="SafeExtractZip"::: + +#### TAR—complete safe extraction + +TAR extraction differs from ZIP in several ways: entries are read sequentially (there's no central directory), link entries need explicit handling, and the `DataStream` must be consumed before advancing to the next entry. + +:::code language="csharp" source="./snippets/zip-tar-best-practices/csharp/Program.cs" id="SafeExtractTar"::: + +## Memory and performance considerations + +### ZipArchiveMode.Update loads entries into memory + +Don't use `ZipArchiveMode.Update` for large or untrusted archives. When you open a `ZipArchive` in `Update` mode, each entry's uncompressed data is loaded into a `MemoryStream` when that entry is accessed. The runtime requires a seekable stream for Update mode and decompresses each entry fully into memory to support in-place modifications. For large or malicious archives, this behavior can cause `OutOfMemoryException`. + +Additionally, when you open a `ZipArchive` in `Read` mode with an **unseekable** stream (for example, a network stream), the runtime copies the entire stream into a `MemoryStream` up front to enable seeking through the central directory. + +```csharp +// ⚠️ In Update mode, each entry is decompressed into memory when accessed +using var archive = new ZipArchive(stream, ZipArchiveMode.Update); +``` + +**Recommendation:** Only use `Update` mode for archives you trust and know are small enough to fit in memory. For large archives, create a new archive and selectively copy entries: + +:::code language="csharp" source="./snippets/zip-tar-best-practices/csharp/Program.cs" id="StreamingApproach"::: + +### TAR streaming model + +`TarReader` reads entries one at a time and doesn't buffer the entire archive. However, for unseekable streams, each entry's `DataStream` is only valid until the next `GetNextEntry()` call. If you need to retain entry data, copy it immediately: + +:::code language="csharp" source="./snippets/zip-tar-best-practices/csharp/Program.cs" id="TarStreaming"::: + +### Thread safety + +`ZipArchive` isn't thread-safe. The internal state—entry lists, stream positions, and disposal flags—isn't synchronized. Don't read or write entries from multiple threads concurrently. If you need parallel processing, open a separate `ZipArchive` instance per thread, or synchronize access externally. + +`TarReader` and `TarWriter` are likewise not designed for concurrent use. Each operates on a single underlying stream with sequential access semantics. + +## Platform considerations + +### Unix file permissions + +- **ZIP:** Unix permissions are stored in the upper 16 bits of `ExternalAttributes`. When extracting on Unix, the runtime restores ownership permissions (read/write/execute for user/group/other), subject to the process umask. Permissions aren't applied if the upper bits are zero—this happens when the ZIP was created on Windows, because the Windows runtime sets `DefaultFileExternalAttributes` to `0`. On Windows, these attributes are always ignored during extraction. +- **TAR:** The `TarEntry.Mode` property represents `UnixFileMode` and can store all 12 permission bits (read/write/execute for user/group/other, plus SetUID, SetGID, and StickyBit). However, during **regular file extraction**, only the 9 ownership bits (rwx for user/group/other) are applied—SetUID, SetGID, and StickyBit are explicitly stripped for security. Directories, block devices, character devices, and FIFOs receive the full `Mode` value including SetUID, SetGID, and StickyBit. + +### Special entry types (TAR) + +Block devices, character devices, and FIFOs can only be created on Unix. Extracting these on Windows throws an exception. Elevated privileges are required to create block and character device entries. + +### File name sanitization differs by platform + +On Windows, the runtime replaces control characters and `"*:<>?|` with underscores via `ArchivingUtils.SanitizeEntryFilePath()`. On Unix, only null characters are replaced. Archive entries with names like `file:name.txt` are renamed to `file_name.txt` on Windows but extracted as-is on Unix. + +## Data integrity + +ZIP entries include a CRC-32 checksum that you can use to verify data hasn't been corrupted or tampered with. + +Starting with .NET 11, the runtime validates CRC-32 checksums automatically when reading ZIP entries. When you read an entry's data stream to completion, the runtime compares the computed CRC of the decompressed data against the checksum stored in the archive. If they don't match, an `InvalidDataException` is thrown. + +> [!NOTE] +> In prior versions of .NET, no CRC validation was performed on read. The runtime computed CRC values when writing entries (for storage in the archive), but never verified them during extraction. If you're targeting a runtime older than .NET 11, be aware that corrupt or tampered entries are silently accepted. + +> [!NOTE] +> CRC-32 isn't a cryptographic hash—it detects accidental corruption but doesn't protect against intentional tampering by a sophisticated attacker. + +## Untrusted metadata + +### ZIP comments and extra fields + +Treat all archive metadata as untrusted input. ZIP archives can contain attacker-controlled metadata beyond entry names: + +- **Archive and entry comments** are arbitrary strings encoded using either Code Page 437 or UTF-8 (depending on the language encoding flag). If your application displays or processes comments, sanitize them appropriately. +- **Extra fields** are binary key-value pairs attached to each entry. The runtime preserves unknown extra fields and trailing data when reading and writing archives in `Update` mode—they're round-tripped as-is. If your application reads or interprets extra fields, validate their contents. +- **Entry name encoding** defaults to the system codepage for entries without the language encoding flag (EFS) set, and UTF-8 when EFS is set. The ZIP specification defines Code Page 437 as the default, but in practice, most tools (including the Windows Shell zip tool) use the local system codepage instead, and .NET follows the same behavior. When interoperating with archives from other tools, mismatched encodings can produce garbled file names. Use the `entryNameEncoding` parameter on `ZipArchive` to override encoding when needed, but be aware the override affects all entries uniformly. + +### TAR header-driven memory allocation + +TAR entry headers contain size fields that the parser uses to allocate buffers. A malicious TAR archive can declare an extremely large size for a PAX extended attributes block or a GNU long-path entry, causing the parser to attempt a large memory allocation. The runtime does include a `ValidateSize()` guard that rejects allocations exceeding `Array.MaxLength` (~2 GB), so allocations aren't completely unbounded—but values up to ~2 GB can still cause significant memory pressure. Your entry-count and per-entry-size limits (described in [Enforce size and entry count limits](#enforce-size-and-entry-count-limits)) also help mitigate this risk, since these metadata entries are counted and sized like regular entries. + +## Encryption considerations (preview) + +> [!NOTE] +> ZIP encryption support (ZipCrypto and WinZip AES) is a preview feature that isn't yet publicly available. The APIs described in this section are subject to change. + +.NET 11 adds support for reading and writing encrypted ZIP archives using WinZip-compatible encryption. The `ZipEncryptionMethod` enum specifies the encryption method: + +| Value | Description | +|-------|-------------| +| `None` | No encryption. | +| `ZipCrypto` | Legacy ZIP encryption. Use only for backward compatibility—vulnerable to known-plaintext attacks. | +| `Aes128` | WinZip AES-128. | +| `Aes192` | WinZip AES-192. | +| `Aes256` | WinZip AES-256. **Recommended**—strongest available option. | +| `Unknown` | Returned when the entry uses PKWare strong encryption, which .NET doesn't support. | + +### Choose AES-256 for new archives + +When creating encrypted entries, always prefer `Aes256`. `ZipCrypto` is a legacy method with known cryptographic weaknesses and shouldn't be relied upon for security—use it only when interoperating with tools that don't support WinZip AES. + +```csharp +// ⚠️ Weak encryption — use only for backward compatibility +archive.CreateEntry("file.txt", "password", ZipEncryptionMethod.ZipCrypto); + +// ✅ Prefer AES-256 +archive.CreateEntry("file.txt", "password", ZipEncryptionMethod.Aes256); +``` + +### Read encrypted entries + +Use `ZipArchiveEntry.EncryptionMethod` to check the encryption method, and provide a password to `Open`: + +```csharp +using ZipArchive archive = ZipFile.OpenRead("encrypted.zip"); + +foreach (ZipArchiveEntry entry in archive.Entries) +{ + if (entry.EncryptionMethod == ZipEncryptionMethod.Unknown) + { + // PKWare strong encryption — not supported by .NET + continue; + } + + using Stream stream = entry.Open("myPassword"); + // ... read the decrypted data +} +``` + +Attempting to open an entry that uses PKWare strong encryption (`ZipEncryptionMethod.Unknown`) throws `NotSupportedException`. + +### Convenience methods with encryption + +New option types let you pass a password and encryption method to the convenience APIs: + +```csharp +// Extract an encrypted archive +ZipFile.ExtractToDirectory("encrypted.zip", destDir, new ZipExtractionOptions +{ + Password = "myPassword".AsMemory(), + OverwriteFiles = false +}); + +// Create an encrypted archive +ZipFile.CreateFromDirectory(sourceDir, "encrypted.zip", new ZipFileCreationOptions +{ + Password = "myPassword".AsMemory(), + EncryptionMethod = ZipEncryptionMethod.Aes256, + CompressionLevel = CompressionLevel.Optimal +}); +``` + +## Security checklist + +Before deploying code that handles archives from untrusted sources, verify you've addressed each of the following: + +- [ ] **Manual iteration:** Don't use `ExtractToDirectory` for untrusted input—iterate entries manually to enforce all limits. +- [ ] **Path traversal:** Validate all destination paths with `Path.GetFullPath()` + `StartsWith()`. +- [ ] **Decompression bombs:** Enforce limits on decompressed size (per-entry and total) and entry count. +- [ ] **Symlink/hardlink attacks (TAR):** Validate link targets resolve within the destination, or skip link entries entirely. +- [ ] **Memory limits:** Avoid `ZipArchiveMode.Update` for large untrusted archives. Avoid `Read` mode with unseekable streams from untrusted sources. +- [ ] **Thread safety:** Don't share `ZipArchive`, `TarReader`, or `TarWriter` instances across threads. +- [ ] **Untrusted metadata:** Treat entry names, comments, and extra fields as untrusted input. Sanitize before display or processing. +- [ ] **File name validation:** On Windows, guard against reserved names (`CON`, `PRN`, `AUX`, `NUL`). +- [ ] **Overwrite behavior:** Default to `overwrite: false`. +- [ ] **Resource disposal:** Always dispose `ZipArchive`, `TarReader`, `TarWriter`, and their streams. + +## See also + +- +- +- +- From 9de945668eb5b8c03d17b4440aa73a7ad9c13f1b Mon Sep 17 00:00:00 2001 From: alinpahontu2912 Date: Fri, 10 Apr 2026 20:28:46 +0200 Subject: [PATCH 2/3] address comments --- .../zip-tar-best-practices/csharp/Program.cs | 58 ++++++++------- docs/standard/io/zip-tar-best-practices.md | 73 ++++++++----------- 2 files changed, 61 insertions(+), 70 deletions(-) diff --git a/docs/standard/io/snippets/zip-tar-best-practices/csharp/Program.cs b/docs/standard/io/snippets/zip-tar-best-practices/csharp/Program.cs index 557b1452a3c20..2f35a194a997d 100644 --- a/docs/standard/io/snippets/zip-tar-best-practices/csharp/Program.cs +++ b/docs/standard/io/snippets/zip-tar-best-practices/csharp/Program.cs @@ -3,30 +3,15 @@ // void SafeExtractEntry(ZipArchiveEntry entry, string destinationPath, long maxDecompressedSize) { - // Check the declared uncompressed size first (can be spoofed, but is a fast first check). + // The runtime enforces that entry.Open() will never produce more than + // entry.Length bytes, so checking the declared size is sufficient. if (entry.Length > maxDecompressedSize) { throw new InvalidOperationException( $"Entry '{entry.FullName}' declares size {entry.Length}, exceeding limit {maxDecompressedSize}."); } - using Stream source = entry.Open(); - using FileStream destination = File.Create(destinationPath); - - byte[] buffer = new byte[81920]; - long totalBytesRead = 0; - int bytesRead; - - while ((bytesRead = source.Read(buffer, 0, buffer.Length)) > 0) - { - totalBytesRead += bytesRead; - if (totalBytesRead > maxDecompressedSize) - { - throw new InvalidOperationException( - $"Extraction of '{entry.FullName}' exceeded limit of {maxDecompressedSize} bytes."); - } - destination.Write(buffer, 0, bytesRead); - } + entry.ExtractToFile(destinationPath, overwrite: false); } // @@ -82,7 +67,7 @@ void DangerousExtract(string extractDir) foreach (ZipArchiveEntry entry in archive.Entries) { string destinationPath = Path.Combine(extractDir, entry.FullName); - entry.ExtractToFile(destinationPath, overwrite: true); // NO path validation! + entry.ExtractToFile(destinationPath, overwrite: true); // May write outside of `extractDir` } } // @@ -131,16 +116,16 @@ void SafeExtractZip(string archivePath, string destinationDir, throw new IOException( $"Entry '{entry.FullName}' would extract outside the destination."); - // ZIP uses a convention where directory entries have names ending in '/'. - // Path.GetFileName returns empty for these, so we use that to - // distinguish directories from files. + // By convention, directory entries in ZIP archives have names ending + // in '/'. Path.GetFileName returns empty for these, so we use that + // to distinguish directories from files. if (string.IsNullOrEmpty(Path.GetFileName(destPath))) { Directory.CreateDirectory(destPath); } else { - // Ensure the parent directory exists before extracting the file. + // Create the parent directory and any missing intermediate directories. Directory.CreateDirectory(Path.GetDirectoryName(destPath)!); entry.ExtractToFile(destPath, overwrite: false); } @@ -201,10 +186,7 @@ void SafeExtractTar(Stream archiveStream, string destinationDir, } else if (entry.DataStream is not null) { - // For file entries, copy the data stream to a new file. - // We use entry.DataStream directly instead of ExtractToFile because - // ExtractToFile rejects symbolic and hard link entries (already - // filtered above) and requires a file path rather than a stream. + // Create the parent directory and any missing intermediate directories. Directory.CreateDirectory(Path.GetDirectoryName(destPath)!); using var fileStream = File.Create(destPath); entry.DataStream.CopyTo(fileStream); @@ -213,6 +195,28 @@ void SafeExtractTar(Stream archiveStream, string destinationDir, } // +// +bool IsLinkTargetSafe(TarEntry entry, string fullDestDir) +{ + string resolvedTarget; + + if (entry.EntryType is TarEntryType.SymbolicLink) + { + // Symlink targets are relative to the symlink's own parent directory, or absolute. + string entryDir = Path.GetDirectoryName( + Path.GetFullPath(Path.Join(fullDestDir, entry.Name)))!; + resolvedTarget = Path.GetFullPath(Path.Join(entryDir, entry.LinkName)); + } + else + { + // Hard link targets are relative to the destination directory root. + resolvedTarget = Path.GetFullPath(Path.Join(fullDestDir, entry.LinkName)); + } + + return resolvedTarget.StartsWith(fullDestDir, StringComparison.Ordinal); +} +// + // void StreamingModify() { diff --git a/docs/standard/io/zip-tar-best-practices.md b/docs/standard/io/zip-tar-best-practices.md index 450aa07ac4b1a..f390e8dd4051d 100644 --- a/docs/standard/io/zip-tar-best-practices.md +++ b/docs/standard/io/zip-tar-best-practices.md @@ -47,16 +47,9 @@ If you control the archive source (your own build output, known-safe backups), t When the archive source is known and trusted, the convenience methods give you a safe, one-line extraction path: -- `ZipFile.ExtractToDirectory` and `TarFile.ExtractToDirectory` handle path validation automatically—they sanitize entry names, resolve paths, and check directory boundaries. -- Default overwrite behavior is `false`. Always be explicit: - -```csharp -// ✅ Explicit — default is safe -ZipFile.ExtractToDirectory("archive.zip", destDir, overwriteFiles: false); -TarFile.ExtractToDirectory("archive.tar", destDir, overwriteFiles: false); -``` - -- When overwriting is enabled during ZIP extraction, .NET extracts to a temporary file first and only replaces the target after successful extraction—this approach prevents partial corruption if the extraction fails. +- `ZipFile.ExtractToDirectory` and `TarFile.ExtractToDirectory` handle path validation automatically. They sanitize entry names, resolve paths, and check directory boundaries. +- `ZipFile.ExtractToDirectory` has overloads that default to not overwriting existing files. All `TarFile.ExtractToDirectory` overloads require the `overwriteFiles` parameter, so you must always choose explicitly. +- When overwriting is enabled during ZIP extraction, .NET extracts to a temporary file first and only replaces the target after successful extraction. This prevents partial corruption if the extraction fails. > [!NOTE] > The convenience methods don't limit decompressed size or entry count. If that matters even for trusted input (for example, very large archives), use the streaming approach described in [Handle untrusted archives safely](#handle-untrusted-archives-safely). @@ -67,7 +60,7 @@ For untrusted input—user uploads, third-party downloads, or network transfers ### What the convenience methods don't protect you from -`ExtractToDirectory` handles path traversal validation, but it doesn't enforce size limits, entry count limits, or filter dangerous TAR entry types. A small compressed file can expand to terabytes of data (known as a *zip bomb*), and TAR archives can contain symbolic links that escape the extraction directory. You must handle these yourself when processing untrusted input. +`ExtractToDirectory` handles path traversal validation (including symbolic link targets in TAR), but it doesn't enforce size limits or entry count limits. A small compressed file can expand to terabytes of data (known as a *zip bomb*). You must enforce these limits yourself when processing untrusted input. ### Enforce size and entry count limits @@ -76,7 +69,7 @@ Neither `ZipArchive` nor `TarReader` limits the total uncompressed size or the n > [!IMPORTANT] > A small compressed file can expand to terabytes of data—this is known as a *zip bomb*. Always enforce limits on decompressed size and entry count when extracting untrusted archives. -#### Track decompressed size during extraction +#### Enforce per-entry size limits :::code language="csharp" source="./snippets/zip-tar-best-practices/csharp/Program.cs" id="SafeExtractEntry"::: @@ -87,7 +80,7 @@ Neither `ZipArchive` nor `TarReader` limits the total uncompressed size or the n > [!TIP] > The same approach applies to TAR archives. Since TAR files are read entry-by-entry via `TarReader.GetNextEntry()`, track both the cumulative data size and entry count as you iterate. -### Validate destination paths (low-level APIs only) +### Validate destination paths When you use the streaming APIs, you're responsible for validating every entry's destination path. The low-level APIs perform no path validation at all. @@ -101,9 +94,6 @@ Key points: - The `StartsWith` check ensures the resolved path is still inside the destination. - The trailing directory separator on `fullDestDir` is critical—without it, a path like `/safe-dir-evil/file` would incorrectly match `/safe-dir`. -> [!NOTE] -> `ExtractToDirectory` handles path traversal for you—the runtime sanitizes entry names, resolves paths with `Path.GetFullPath()`, and verifies them with `StartsWith`. You're using the streaming APIs here because of the size-limits issue described above. - > [!WARNING] > The following APIs leave you completely unprotected against path traversal. You must validate paths yourself before calling them. @@ -120,7 +110,7 @@ Key points: TAR archives support symbolic links and hard links, which introduce attack vectors beyond basic path traversal: - **Symlink escape:** A symlink entry points to an arbitrary location (for example, `/etc/`), then a subsequent file entry relative to the symlink directory writes through the link to that external location. -- **Hard link to sensitive file:** A hard link target references a file outside the extraction directory, allowing reads or overwrites. +- **Hard link to sensitive file:** A hard link target references a file outside the extraction directory. Because a hard link shares the same inode as the original, any code that later opens the hard link for writing modifies the original file's contents. Simply overwriting the hard link (for example, with `File.Create`) replaces the directory entry and does not affect the original. The safest approach for untrusted archives is to skip link entries entirely: @@ -129,7 +119,11 @@ if (entry.EntryType is TarEntryType.SymbolicLink or TarEntryType.HardLink) continue; // Skip link entries for untrusted input ``` -If your use case requires hard links but you want to avoid filesystem-level hard links, `TarHardLinkMode.CopyContents` copies the file content instead of creating a real hard link. This approach eliminates hard-link-based attacks and produces more portable output on Windows. +If you need to preserve links, validate that the link target resolves within your destination directory before creating it: + +:::code language="csharp" source="./snippets/zip-tar-best-practices/csharp/Program.cs" id="ValidateSymlink"::: + +If your use case requires hard links but you want to avoid filesystem-level hard links, `TarHardLinkMode.CopyContents` copies the file content instead of creating a real hard link. This eliminates hard-link-based attacks and produces more portable output on Windows. For reference, `TarFile.ExtractToDirectory` validates both the entry path and link target path against the destination directory boundary. If either resolves outside, an `IOException` is thrown. `TarEntry.ExtractToFile()` rejects symbolic and hard link entries entirely—it throws `InvalidOperationException`. @@ -153,12 +147,12 @@ TAR extraction differs from ZIP in several ways: entries are read sequentially ( ### ZipArchiveMode.Update loads entries into memory -Don't use `ZipArchiveMode.Update` for large or untrusted archives. When you open a `ZipArchive` in `Update` mode, each entry's uncompressed data is loaded into a `MemoryStream` when that entry is accessed. The runtime requires a seekable stream for Update mode and decompresses each entry fully into memory to support in-place modifications. For large or malicious archives, this behavior can cause `OutOfMemoryException`. +Don't use `ZipArchiveMode.Update` for large or untrusted archives. When you open a `ZipArchive` in `Update` mode and call `Open()` or `OpenAsync()` on an entry, its uncompressed data is loaded into a `MemoryStream` to support in-place modifications. Accessing entry metadata (such as `FullName`, `Length`, or `ExternalAttributes`) does not trigger decompression. For large or malicious archives, opening entry content streams can cause `OutOfMemoryException`. Additionally, when you open a `ZipArchive` in `Read` mode with an **unseekable** stream (for example, a network stream), the runtime copies the entire stream into a `MemoryStream` up front to enable seeking through the central directory. ```csharp -// ⚠️ In Update mode, each entry is decompressed into memory when accessed +// Update mode: calling entry.Open() loads the full entry into memory using var archive = new ZipArchive(stream, ZipArchiveMode.Update); ``` @@ -168,22 +162,22 @@ using var archive = new ZipArchive(stream, ZipArchiveMode.Update); ### TAR streaming model -`TarReader` reads entries one at a time and doesn't buffer the entire archive. However, for unseekable streams, each entry's `DataStream` is only valid until the next `GetNextEntry()` call. If you need to retain entry data, copy it immediately: +`TarReader` reads entries one at a time and doesn't buffer the entire archive. However, for unseekable streams, each entry's `DataStream` is only valid until the next `GetNextEntry()` call. If you need to retain entry data, either copy it immediately or pass `copyContents: true` to `GetNextEntry()`, which copies the entry data into a separate `MemoryStream` that remains valid after advancing: :::code language="csharp" source="./snippets/zip-tar-best-practices/csharp/Program.cs" id="TarStreaming"::: ### Thread safety -`ZipArchive` isn't thread-safe. The internal state—entry lists, stream positions, and disposal flags—isn't synchronized. Don't read or write entries from multiple threads concurrently. If you need parallel processing, open a separate `ZipArchive` instance per thread, or synchronize access externally. - -`TarReader` and `TarWriter` are likewise not designed for concurrent use. Each operates on a single underlying stream with sequential access semantics. +`ZipArchive`, `TarReader`, and `TarWriter` are not thread-safe. Don't access an instance from multiple threads concurrently. If you need parallel processing, use a separate instance per thread or synchronize access externally. ## Platform considerations ### Unix file permissions -- **ZIP:** Unix permissions are stored in the upper 16 bits of `ExternalAttributes`. When extracting on Unix, the runtime restores ownership permissions (read/write/execute for user/group/other), subject to the process umask. Permissions aren't applied if the upper bits are zero—this happens when the ZIP was created on Windows, because the Windows runtime sets `DefaultFileExternalAttributes` to `0`. On Windows, these attributes are always ignored during extraction. -- **TAR:** The `TarEntry.Mode` property represents `UnixFileMode` and can store all 12 permission bits (read/write/execute for user/group/other, plus SetUID, SetGID, and StickyBit). However, during **regular file extraction**, only the 9 ownership bits (rwx for user/group/other) are applied—SetUID, SetGID, and StickyBit are explicitly stripped for security. Directories, block devices, character devices, and FIFOs receive the full `Mode` value including SetUID, SetGID, and StickyBit. +- **ZIP:** Unix permissions are stored in the upper 16 bits of `ExternalAttributes`. When extracting on Unix via `ExtractToDirectory` or `ExtractToFile`, the runtime restores ownership permissions (read/write/execute for user/group/other), subject to the process umask. SetUID, SetGID, and StickyBit are stripped. Permissions are not applied if the upper bits are zero. This happens when the ZIP was created on Windows, because the Windows runtime sets `DefaultFileExternalAttributes` to `0`. On Windows, these attributes are always ignored during extraction. +- **TAR:** The `TarEntry.Mode` property represents `UnixFileMode` and can store all 12 permission bits (read/write/execute for user/group/other, plus SetUID, SetGID, and StickyBit). When extracting on Unix via `ExtractToDirectory` or `ExtractToFile`, the runtime applies only the 9 ownership bits (rwx for user/group/other), subject to the process umask. SetUID, SetGID, and StickyBit are stripped for security. + +When processing untrusted archives, validate `TarEntry.Mode` before extracting. An archive could set executable permissions on files that should not be executable. ### Special entry types (TAR) @@ -191,13 +185,13 @@ Block devices, character devices, and FIFOs can only be created on Unix. Extract ### File name sanitization differs by platform -On Windows, the runtime replaces control characters and `"*:<>?|` with underscores via `ArchivingUtils.SanitizeEntryFilePath()`. On Unix, only null characters are replaced. Archive entries with names like `file:name.txt` are renamed to `file_name.txt` on Windows but extracted as-is on Unix. +On Windows, when using `ExtractToDirectory`, the runtime replaces control characters and ``"*:<>?|`` with underscores in entry names. On Unix, only null characters are replaced. Archive entries with names like `file:name.txt` are renamed to `file_name.txt` on Windows but extracted as-is on Unix. The per-entry APIs (`Open()`, `ExtractToFile()`) do not perform any name sanitization. ## Data integrity ZIP entries include a CRC-32 checksum that you can use to verify data hasn't been corrupted or tampered with. -Starting with .NET 11, the runtime validates CRC-32 checksums automatically when reading ZIP entries. When you read an entry's data stream to completion, the runtime compares the computed CRC of the decompressed data against the checksum stored in the archive. If they don't match, an `InvalidDataException` is thrown. +Starting with .NET 11, the runtime validates CRC-32 checksums automatically when reading ZIP entries. When you read an entry's data stream to completion, the runtime compares the computed CRC of the decompressed data against the checksum stored in the archive. If they don't match, an `InvalidDataException` is thrown. .NET 11 also validates CRC-32 checksums in TAR entry headers. > [!NOTE] > In prior versions of .NET, no CRC validation was performed on read. The runtime computed CRC values when writing entries (for storage in the archive), but never verified them during extraction. If you're targeting a runtime older than .NET 11, be aware that corrupt or tampered entries are silently accepted. @@ -209,20 +203,14 @@ Starting with .NET 11, the runtime validates CRC-32 checksums automatically when ### ZIP comments and extra fields -Treat all archive metadata as untrusted input. ZIP archives can contain attacker-controlled metadata beyond entry names: - -- **Archive and entry comments** are arbitrary strings encoded using either Code Page 437 or UTF-8 (depending on the language encoding flag). If your application displays or processes comments, sanitize them appropriately. -- **Extra fields** are binary key-value pairs attached to each entry. The runtime preserves unknown extra fields and trailing data when reading and writing archives in `Update` mode—they're round-tripped as-is. If your application reads or interprets extra fields, validate their contents. -- **Entry name encoding** defaults to the system codepage for entries without the language encoding flag (EFS) set, and UTF-8 when EFS is set. The ZIP specification defines Code Page 437 as the default, but in practice, most tools (including the Windows Shell zip tool) use the local system codepage instead, and .NET follows the same behavior. When interoperating with archives from other tools, mismatched encodings can produce garbled file names. Use the `entryNameEncoding` parameter on `ZipArchive` to override encoding when needed, but be aware the override affects all entries uniformly. - -### TAR header-driven memory allocation - -TAR entry headers contain size fields that the parser uses to allocate buffers. A malicious TAR archive can declare an extremely large size for a PAX extended attributes block or a GNU long-path entry, causing the parser to attempt a large memory allocation. The runtime does include a `ValidateSize()` guard that rejects allocations exceeding `Array.MaxLength` (~2 GB), so allocations aren't completely unbounded—but values up to ~2 GB can still cause significant memory pressure. Your entry-count and per-entry-size limits (described in [Enforce size and entry count limits](#enforce-size-and-entry-count-limits)) also help mitigate this risk, since these metadata entries are counted and sized like regular entries. +- **Archive and entry comments** are arbitrary strings. If your application displays or processes comments, sanitize them appropriately. +- **Extra fields** are binary key-value pairs attached to each entry. The runtime preserves unknown extra fields and trailing data when reading and writing archives in `Update` mode and round-trips them as-is. If your application reads or interprets extra fields, validate their contents. +- **Entry name encoding:** when writing, the runtime uses ASCII for entry names that contain only printable characters (32-126) and UTF-8 (with the language encoding flag set) for names that contain other characters. When reading without a custom encoding, entries with the language encoding flag are decoded as UTF-8, and entries without it are also decoded as UTF-8. Use the `entryNameEncoding` parameter on `ZipArchive` to override encoding when needed, but be aware the override affects all entries uniformly. -## Encryption considerations (preview) +## Encryption considerations (.NET 11+) > [!NOTE] -> ZIP encryption support (ZipCrypto and WinZip AES) is a preview feature that isn't yet publicly available. The APIs described in this section are subject to change. +> ZIP encryption support (ZipCrypto and WinZip AES) is new in .NET 11. .NET 11 adds support for reading and writing encrypted ZIP archives using WinZip-compatible encryption. The `ZipEncryptionMethod` enum specifies the encryption method: @@ -233,7 +221,7 @@ TAR entry headers contain size fields that the parser uses to allocate buffers. | `Aes128` | WinZip AES-128. | | `Aes192` | WinZip AES-192. | | `Aes256` | WinZip AES-256. **Recommended**—strongest available option. | -| `Unknown` | Returned when the entry uses PKWare strong encryption, which .NET doesn't support. | +| `Unknown` | Returned when the entry uses an encryption method that .NET does not support. | ### Choose AES-256 for new archives @@ -258,7 +246,7 @@ foreach (ZipArchiveEntry entry in archive.Entries) { if (entry.EncryptionMethod == ZipEncryptionMethod.Unknown) { - // PKWare strong encryption — not supported by .NET + // Unsupported encryption method, skip this entry continue; } @@ -267,7 +255,7 @@ foreach (ZipArchiveEntry entry in archive.Entries) } ``` -Attempting to open an entry that uses PKWare strong encryption (`ZipEncryptionMethod.Unknown`) throws `NotSupportedException`. +Attempting to open an entry that uses an unsupported encryption method (`ZipEncryptionMethod.Unknown`) throws `NotSupportedException`. ### Convenience methods with encryption @@ -301,7 +289,6 @@ Before deploying code that handles archives from untrusted sources, verify you'v - [ ] **Memory limits:** Avoid `ZipArchiveMode.Update` for large untrusted archives. Avoid `Read` mode with unseekable streams from untrusted sources. - [ ] **Thread safety:** Don't share `ZipArchive`, `TarReader`, or `TarWriter` instances across threads. - [ ] **Untrusted metadata:** Treat entry names, comments, and extra fields as untrusted input. Sanitize before display or processing. -- [ ] **File name validation:** On Windows, guard against reserved names (`CON`, `PRN`, `AUX`, `NUL`). - [ ] **Overwrite behavior:** Default to `overwrite: false`. - [ ] **Resource disposal:** Always dispose `ZipArchive`, `TarReader`, `TarWriter`, and their streams. From aa7cc00c8bad9c724420d714b71b93f027d73a5b Mon Sep 17 00:00:00 2001 From: alinpahontu2912 Date: Thu, 16 Apr 2026 10:30:44 +0200 Subject: [PATCH 3/3] address comments --- .../zip-tar-best-practices/csharp/Program.cs | 24 ++++++++++--------- docs/standard/io/zip-tar-best-practices.md | 21 +++++++++------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/docs/standard/io/snippets/zip-tar-best-practices/csharp/Program.cs b/docs/standard/io/snippets/zip-tar-best-practices/csharp/Program.cs index 2f35a194a997d..2ce4693a24ad4 100644 --- a/docs/standard/io/snippets/zip-tar-best-practices/csharp/Program.cs +++ b/docs/standard/io/snippets/zip-tar-best-practices/csharp/Program.cs @@ -48,13 +48,11 @@ void ValidatePaths(ZipArchive archive, string destinationDir) foreach (ZipArchiveEntry entry in archive.Entries) { - string destPath = Path.GetFullPath(Path.Combine(fullDestDir, entry.FullName)); + string destPath = Path.GetFullPath(Path.Join(fullDestDir, entry.FullName)); if (!destPath.StartsWith(fullDestDir, StringComparison.Ordinal)) throw new IOException( $"Entry '{entry.FullName}' would extract outside the destination directory."); - - // ... safe to extract } } // @@ -111,7 +109,7 @@ void SafeExtractZip(string archivePath, string destinationDir, // Resolve the full destination path using Path.GetFullPath, which // normalizes away any "../" segments. Then verify the result still // starts with the destination directory. - string destPath = Path.GetFullPath(Path.Combine(fullDestDir, entry.FullName)); + string destPath = Path.GetFullPath(Path.Join(fullDestDir, entry.FullName)); if (!destPath.StartsWith(fullDestDir, StringComparison.Ordinal)) throw new IOException( $"Entry '{entry.FullName}' would extract outside the destination."); @@ -184,12 +182,11 @@ void SafeExtractTar(Stream archiveStream, string destinationDir, { Directory.CreateDirectory(destPath); } - else if (entry.DataStream is not null) + else if (entry.EntryType is TarEntryType.RegularFile or TarEntryType.V7RegularFile or TarEntryType.ContiguousFile) { // Create the parent directory and any missing intermediate directories. Directory.CreateDirectory(Path.GetDirectoryName(destPath)!); - using var fileStream = File.Create(destPath); - entry.DataStream.CopyTo(fileStream); + entry.ExtractToFile(destPath, overwrite: false); } } } @@ -240,19 +237,24 @@ void StreamingModify() // // -void TarStreamingRead(Stream archiveStream) +void TarStreamingRead(Stream archiveStream, string destDir) { using var reader = new TarReader(archiveStream); TarEntry? entry; while ((entry = reader.GetNextEntry()) is not null) { + // DataStream is only valid until the next GetNextEntry() call, + // so consume or copy the data before advancing. if (entry.DataStream is not null) { - string safePath = "output.bin"; - // Copy now — the stream becomes invalid after the next GetNextEntry() call - using var fileStream = File.Create(safePath); + string destPath = Path.Join(destDir, entry.Name); + using var fileStream = File.Create(destPath); entry.DataStream.CopyTo(fileStream); } } + + // Alternatively, pass copyContents: true to retain entry data + // in a separate MemoryStream that remains valid after advancing: + // entry = reader.GetNextEntry(copyContents: true); } // diff --git a/docs/standard/io/zip-tar-best-practices.md b/docs/standard/io/zip-tar-best-practices.md index f390e8dd4051d..375ccaf282f76 100644 --- a/docs/standard/io/zip-tar-best-practices.md +++ b/docs/standard/io/zip-tar-best-practices.md @@ -37,12 +37,15 @@ This article helps you choose the right API, use the convenience methods effecti ### Streaming APIs (entry-by-entry control) -- `ZipArchive`—open an archive, iterate entries, read or write selectively. -- `TarReader` / `TarWriter`—sequential entry-by-entry access. +- `ZipArchive`—open an archive, iterate entries, read or write selectively. Use `ZipArchiveEntry.ExtractToFile` to extract individual entries, or `ZipArchive.ExtractToDirectory` to extract all entries from an already-opened archive. +- `TarReader` / `TarWriter`—sequential entry-by-entry access. Use `TarEntry.ExtractToFile` to extract individual entries. - Best for: large archives, selective extraction, untrusted input, custom processing. If you control the archive source (your own build output, known-safe backups), the convenience APIs are the simplest choice. If the archive comes from an external source (user uploads, downloads, network transfers), use the streaming APIs with the safety checks described in this article. +> [!CAUTION] +> A ZIP archive primarily transmits files, while a TAR archive transmits a filesystem topology, including file types, symbolic links, hard links, permissions, and other metadata. This gives the TAR extraction process much more control over how data is represented on disk. Because these structures are meaningful to the filesystem, an adversary can influence security-impacting behaviors beyond filenames and file contents. Exercise extra caution when processing untrusted TAR archives. + ## Work with trusted archives When the archive source is known and trusted, the convenience methods give you a safe, one-line extraction path: @@ -52,7 +55,7 @@ When the archive source is known and trusted, the convenience methods give you a - When overwriting is enabled during ZIP extraction, .NET extracts to a temporary file first and only replaces the target after successful extraction. This prevents partial corruption if the extraction fails. > [!NOTE] -> The convenience methods don't limit decompressed size or entry count. If that matters even for trusted input (for example, very large archives), use the streaming approach described in [Handle untrusted archives safely](#handle-untrusted-archives-safely). +> The convenience methods don't enforce size limits, entry count limits, or other policies needed for safe extraction of untrusted archives. If that matters even for trusted input (for example, very large archives), use the streaming approach described in [Handle untrusted archives safely](#handle-untrusted-archives-safely). ## Handle untrusted archives safely @@ -145,11 +148,11 @@ TAR extraction differs from ZIP in several ways: entries are read sequentially ( ## Memory and performance considerations -### ZipArchiveMode.Update loads entries into memory +### ZipArchive memory usage -Don't use `ZipArchiveMode.Update` for large or untrusted archives. When you open a `ZipArchive` in `Update` mode and call `Open()` or `OpenAsync()` on an entry, its uncompressed data is loaded into a `MemoryStream` to support in-place modifications. Accessing entry metadata (such as `FullName`, `Length`, or `ExternalAttributes`) does not trigger decompression. For large or malicious archives, opening entry content streams can cause `OutOfMemoryException`. +Don't use `ZipArchiveMode.Update` for large or untrusted archives. When you open a `ZipArchive` in `Update` mode and call `Open()` or `OpenAsync()` on an entry, its uncompressed data is loaded into a `MemoryStream` to support in-place modifications. Accessing entry metadata (such as `FullName`, `Length`, or `ExternalAttributes`) does not trigger decompression. For large or malicious archives, opening entry content streams can cause `OutOfMemoryException`. Check `ZipArchiveEntry.Length` before calling `Open()` to avoid decompressing unexpectedly large entries. -Additionally, when you open a `ZipArchive` in `Read` mode with an **unseekable** stream (for example, a network stream), the runtime copies the entire stream into a `MemoryStream` up front to enable seeking through the central directory. +Additionally, when you open a `ZipArchive` in `Read` mode with an **unseekable** stream (for example, a network stream), the runtime buffers the entire archive contents in memory to enable seeking through the central directory. ```csharp // Update mode: calling entry.Open() loads the full entry into memory @@ -177,7 +180,7 @@ using var archive = new ZipArchive(stream, ZipArchiveMode.Update); - **ZIP:** Unix permissions are stored in the upper 16 bits of `ExternalAttributes`. When extracting on Unix via `ExtractToDirectory` or `ExtractToFile`, the runtime restores ownership permissions (read/write/execute for user/group/other), subject to the process umask. SetUID, SetGID, and StickyBit are stripped. Permissions are not applied if the upper bits are zero. This happens when the ZIP was created on Windows, because the Windows runtime sets `DefaultFileExternalAttributes` to `0`. On Windows, these attributes are always ignored during extraction. - **TAR:** The `TarEntry.Mode` property represents `UnixFileMode` and can store all 12 permission bits (read/write/execute for user/group/other, plus SetUID, SetGID, and StickyBit). When extracting on Unix via `ExtractToDirectory` or `ExtractToFile`, the runtime applies only the 9 ownership bits (rwx for user/group/other), subject to the process umask. SetUID, SetGID, and StickyBit are stripped for security. -When processing untrusted archives, validate `TarEntry.Mode` before extracting. An archive could set executable permissions on files that should not be executable. +When processing untrusted archives, be aware that extracted files may have executable permissions set by the archive author. Untrusted archives could contain malicious executable files. ### Special entry types (TAR) @@ -185,11 +188,11 @@ Block devices, character devices, and FIFOs can only be created on Unix. Extract ### File name sanitization differs by platform -On Windows, when using `ExtractToDirectory`, the runtime replaces control characters and ``"*:<>?|`` with underscores in entry names. On Unix, only null characters are replaced. Archive entries with names like `file:name.txt` are renamed to `file_name.txt` on Windows but extracted as-is on Unix. The per-entry APIs (`Open()`, `ExtractToFile()`) do not perform any name sanitization. +On Windows, when using `ExtractToDirectory`, the runtime replaces control characters and ``"*:<>?|`` with underscores in entry names. On Unix, only null characters are replaced. Archive entries with names like `file:name.txt` are renamed to `file_name.txt` on Windows but extracted as-is on Unix. The per-entry APIs (`Open()`, `ExtractToFile()`) do not perform any name sanitization, so when using them with entry names from untrusted archives, validate the name and path before extracting (as shown in the [Validate destination paths](#validate-destination-paths) section). ## Data integrity -ZIP entries include a CRC-32 checksum that you can use to verify data hasn't been corrupted or tampered with. +Both ZIP and TAR formats include CRC-32 checksums that you can use to verify data hasn't been corrupted or tampered with. Starting with .NET 11, the runtime validates CRC-32 checksums automatically when reading ZIP entries. When you read an entry's data stream to completion, the runtime compares the computed CRC of the decompressed data against the checksum stored in the archive. If they don't match, an `InvalidDataException` is thrown. .NET 11 also validates CRC-32 checksums in TAR entry headers.