diff --git a/src/deflate.go b/src/deflate.go new file mode 100644 index 0000000..68b85a7 --- /dev/null +++ b/src/deflate.go @@ -0,0 +1,157 @@ +package main + +import ( + "bytes" + "compress/zlib" + "io" + "os" + "strings" +) + +// minDeflateSize is the minimum payload size in bytes before compression is +// attempted. +const minDeflateSize uint32 = 512 + +// incompressibleTypes lists media types (lowercased, without parameters) that +// are already compressed or otherwise unlikely to benefit from zlib-deflate. +var incompressibleTypes = map[string]bool{ + // images + "image/jpeg": true, "image/png": true, "image/gif": true, + "image/webp": true, "image/heic": true, "image/avif": true, + "image/apng": true, + // audio + "audio/aac": true, "audio/mpeg": true, "audio/ogg": true, + "audio/opus": true, "audio/webm": true, + // video + "video/h264": true, "video/h265": true, "video/h266": true, + "video/ogg": true, "video/vp8": true, "video/vp9": true, + "video/webm": true, + // archives / compressed containers + "application/gzip": true, "application/zip": true, + "application/epub+zip": true, + "application/octet-stream": true, + // zip-based office formats + "application/vnd.oasis.opendocument.presentation": true, + "application/vnd.oasis.opendocument.spreadsheet": true, + "application/vnd.oasis.opendocument.text": true, + "application/vnd.openxmlformats-officedocument.presentationml.presentation": true, + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": true, + "application/vnd.openxmlformats-officedocument.wordprocessingml.document": true, + "application/vnd.amazon.ebook": true, + // fonts (compressed) + "font/woff": true, "font/woff2": true, + // pdf (internally compressed) + "application/pdf": true, + // 3d models (compressed containers) + "model/3mf": true, "model/gltf-binary": true, + "model/vnd.usdz+zip": true, +} + +// shouldCompress reports whether compression should be attempted for a payload +// with the given media type and size. It returns false for payloads that are +// too small or use a media type known to be already compressed. +func shouldCompress(mediaType string, dataSize uint32) bool { + if dataSize < minDeflateSize { + return false + } + t := strings.ToLower(mediaType) + if i := strings.IndexByte(t, ';'); i >= 0 { + t = strings.TrimRight(t[:i], " ") + } + return !incompressibleTypes[t] +} + +// deflateSampleSize is the number of bytes sampled from the start of a file +// to estimate compressibility before committing to a full-file compression +// pass. Chosen large enough for zlib to find patterns but small enough to be +// fast even on very large files. +const deflateSampleSize = 8192 + +// probeSample compresses up to deflateSampleSize bytes from the start of src +// and reports whether the ratio looks promising (compressed < 80% of input). +// src is seeked back to the start on return. +func probeSample(src *os.File, srcSize uint32) (bool, error) { + sampleLen := int64(deflateSampleSize) + if int64(srcSize) < sampleLen { + sampleLen = int64(srcSize) + } + + var buf bytes.Buffer + zw := zlib.NewWriter(&buf) + if _, err := io.CopyN(zw, src, sampleLen); err != nil { + _ = zw.Close() + return false, err + } + if err := zw.Close(); err != nil { + return false, err + } + + if _, err := src.Seek(0, io.SeekStart); err != nil { + return false, err + } + + return int64(buf.Len()) < sampleLen*8/10, nil +} + +// tryCompress compresses the file at srcPath using zlib-deflate and writes the +// result to a temporary file. For files larger than deflateSampleSize it first +// compresses a prefix sample to estimate compressibility, avoiding a full pass +// over files that won't compress well. It returns worthwhile=true only when +// the compressed output is less than 80% of the original size (at least a 20% +// reduction). When not worthwhile the temporary file is removed. When +// worthwhile the caller is responsible for removing the file at dstPath. +func tryCompress(srcPath string, srcSize uint32) (dstPath string, compressedSize uint32, worthwhile bool, err error) { + src, err := os.Open(srcPath) + if err != nil { + return "", 0, false, err + } + defer src.Close() + + // For files larger than the sample size, probe a prefix first. + if srcSize > deflateSampleSize { + promising, err := probeSample(src, srcSize) + if err != nil { + return "", 0, false, err + } + if !promising { + return "", 0, false, nil + } + } + + dst, err := os.CreateTemp("", "fmsg-deflate-*") + if err != nil { + return "", 0, false, err + } + dstName := dst.Name() + + zw := zlib.NewWriter(dst) + if _, err := io.Copy(zw, src); err != nil { + _ = zw.Close() + _ = dst.Close() + _ = os.Remove(dstName) + return "", 0, false, err + } + if err := zw.Close(); err != nil { + _ = dst.Close() + _ = os.Remove(dstName) + return "", 0, false, err + } + if err := dst.Close(); err != nil { + _ = os.Remove(dstName) + return "", 0, false, err + } + + fi, err := os.Stat(dstName) + if err != nil { + _ = os.Remove(dstName) + return "", 0, false, err + } + + cSize := uint32(fi.Size()) + if cSize >= srcSize*8/10 { + _ = os.Remove(dstName) + return "", 0, false, nil + } + + return dstName, cSize, true, nil +} diff --git a/src/deflate_test.go b/src/deflate_test.go new file mode 100644 index 0000000..f3e7b56 --- /dev/null +++ b/src/deflate_test.go @@ -0,0 +1,546 @@ +package main + +import ( + "bytes" + "compress/zlib" + "crypto/rand" + "crypto/sha256" + "io" + "os" + "strings" + "testing" +) + +// --- shouldDeflate tests --- + +func TestShouldDeflate_TextTypes(t *testing.T) { + compressible := []string{ + "text/plain;charset=UTF-8", + "text/html", + "text/markdown", + "text/csv", + "text/css", + "text/javascript", + "text/calendar", + "text/vcard", + "text/plain;charset=US-ASCII", + "text/plain;charset=UTF-16", + "application/json", + "application/xml", + "application/xhtml+xml", + "application/rtf", + "application/x-tar", + "application/msword", + "application/vnd.ms-excel", + "application/vnd.ms-powerpoint", + "image/svg+xml", + "audio/midi", + "model/obj", + "model/step", + "model/stl", + } + for _, mt := range compressible { + if !shouldCompress(mt, 1024) { + t.Errorf("shouldCompress(%q, 1024) = false, want true", mt) + } + } +} + +func TestShouldDeflate_IncompressibleTypes(t *testing.T) { + skip := []string{ + "image/jpeg", + "image/png", + "image/gif", + "image/webp", + "image/heic", + "image/avif", + "image/apng", + "audio/aac", + "audio/mpeg", + "audio/ogg", + "audio/opus", + "audio/webm", + "video/H264", + "video/H265", + "video/H266", + "video/ogg", + "video/VP8", + "video/VP9", + "video/webm", + "application/gzip", + "application/zip", + "application/epub+zip", + "application/octet-stream", + "application/pdf", + "application/vnd.oasis.opendocument.presentation", + "application/vnd.oasis.opendocument.spreadsheet", + "application/vnd.oasis.opendocument.text", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.amazon.ebook", + "font/woff", + "font/woff2", + "model/3mf", + "model/gltf-binary", + "model/vnd.usdz+zip", + } + for _, mt := range skip { + if shouldCompress(mt, 1024) { + t.Errorf("shouldCompress(%q, 1024) = true, want false", mt) + } + } +} + +func TestShouldDeflate_SmallPayload(t *testing.T) { + sizes := []uint32{0, 1, 100, 511} + for _, sz := range sizes { + if shouldCompress("text/plain;charset=UTF-8", sz) { + t.Errorf("shouldCompress(text/plain, %d) = true, want false", sz) + } + } +} + +func TestShouldDeflate_EdgeCases(t *testing.T) { + // Exactly at threshold: should attempt + if !shouldCompress("text/plain;charset=UTF-8", 512) { + t.Error("shouldDeflate at threshold 512 should return true") + } + // Unknown type: default to try compression + if !shouldCompress("application/x-custom", 1024) { + t.Error("shouldDeflate for unknown type should return true") + } + // Type with parameters should match base type + if shouldCompress("application/pdf; charset=utf-8", 1024) { + t.Error("shouldDeflate should strip parameters and match application/pdf") + } + // Case insensitive + if shouldCompress("VIDEO/H264", 1024) { + t.Error("shouldDeflate should be case-insensitive") + } +} + +// --- tryDeflate tests --- + +func writeTempFile(t *testing.T, data []byte) string { + t.Helper() + f, err := os.CreateTemp("", "deflate-test-*") + if err != nil { + t.Fatal(err) + } + if _, err := f.Write(data); err != nil { + f.Close() + os.Remove(f.Name()) + t.Fatal(err) + } + f.Close() + return f.Name() +} + +func TestTryDeflate_CompressibleData(t *testing.T) { + original := []byte(strings.Repeat("hello world, this is compressible text data! ", 100)) + srcPath := writeTempFile(t, original) + defer os.Remove(srcPath) + + dstPath, cSize, worthwhile, err := tryCompress(srcPath, uint32(len(original))) + if err != nil { + t.Fatal(err) + } + if !worthwhile { + t.Fatal("expected compression to be worthwhile for repetitive text") + } + defer os.Remove(dstPath) + + if cSize >= uint32(len(original))*8/10 { + t.Errorf("compressed size %d not < 80%% of original %d", cSize, len(original)) + } + + // Verify the compressed file decompresses to the original data + f, err := os.Open(dstPath) + if err != nil { + t.Fatal(err) + } + defer f.Close() + + zr, err := zlib.NewReader(f) + if err != nil { + t.Fatal(err) + } + decompressed, err := io.ReadAll(zr) + zr.Close() + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(decompressed, original) { + t.Error("decompressed data does not match original") + } +} + +func TestTryDeflate_IncompressibleData(t *testing.T) { + // Random bytes are effectively incompressible + data := make([]byte, 2048) + if _, err := rand.Read(data); err != nil { + t.Fatal(err) + } + srcPath := writeTempFile(t, data) + defer os.Remove(srcPath) + + _, _, worthwhile, err := tryCompress(srcPath, uint32(len(data))) + if err != nil { + t.Fatal(err) + } + if worthwhile { + t.Error("expected compression of random data to not be worthwhile") + } +} + +func TestTryDeflate_RoundTrip(t *testing.T) { + original := []byte(strings.Repeat("Round-trip test data with enough repetition to compress well. ", 50)) + srcPath := writeTempFile(t, original) + defer os.Remove(srcPath) + + dstPath, cSize, worthwhile, err := tryCompress(srcPath, uint32(len(original))) + if err != nil { + t.Fatal(err) + } + if !worthwhile { + t.Fatal("expected compression to be worthwhile") + } + defer os.Remove(dstPath) + + // Read compressed file + compressed, err := os.ReadFile(dstPath) + if err != nil { + t.Fatal(err) + } + if uint32(len(compressed)) != cSize { + t.Errorf("compressed file size %d != reported size %d", len(compressed), cSize) + } + + // Decompress and verify + zr, err := zlib.NewReader(bytes.NewReader(compressed)) + if err != nil { + t.Fatal(err) + } + decompressed, err := io.ReadAll(zr) + zr.Close() + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(decompressed, original) { + t.Errorf("round-trip mismatch: got %d bytes, want %d bytes", len(decompressed), len(original)) + } +} + +func TestTryDeflate_CleanupOnNotWorthwhile(t *testing.T) { + // Random data won't compress well — the temp file should be removed + data := make([]byte, 2048) + if _, err := rand.Read(data); err != nil { + t.Fatal(err) + } + srcPath := writeTempFile(t, data) + defer os.Remove(srcPath) + + dstPath, _, worthwhile, err := tryCompress(srcPath, uint32(len(data))) + if err != nil { + t.Fatal(err) + } + if worthwhile { + defer os.Remove(dstPath) + t.Fatal("expected not worthwhile for random data") + } + // dstPath should be empty and no leaked temp file + if dstPath != "" { + t.Errorf("expected empty dstPath when not worthwhile, got %q", dstPath) + } +} + +func TestTryDeflate_ProbeRejectsLargeIncompressible(t *testing.T) { + // A file larger than deflateSampleSize filled with random bytes should be + // rejected by the sample probe without writing a full compressed file. + data := make([]byte, deflateSampleSize+4096) + if _, err := rand.Read(data); err != nil { + t.Fatal(err) + } + srcPath := writeTempFile(t, data) + defer os.Remove(srcPath) + + _, _, worthwhile, err := tryCompress(srcPath, uint32(len(data))) + if err != nil { + t.Fatal(err) + } + if worthwhile { + t.Error("expected probe to reject large random data") + } +} + +func TestTryDeflate_ProbeAcceptsLargeCompressible(t *testing.T) { + // A file larger than deflateSampleSize filled with repetitive text should + // pass the probe and compress the full file successfully. + data := []byte(strings.Repeat("probe compressible test data! ", 1000)) + if len(data) <= deflateSampleSize { + t.Fatalf("test data %d bytes not larger than sample size %d", len(data), deflateSampleSize) + } + srcPath := writeTempFile(t, data) + defer os.Remove(srcPath) + + dstPath, cSize, worthwhile, err := tryCompress(srcPath, uint32(len(data))) + if err != nil { + t.Fatal(err) + } + if !worthwhile { + t.Fatal("expected large compressible data to be worthwhile") + } + defer os.Remove(dstPath) + + if cSize >= uint32(len(data))*8/10 { + t.Errorf("compressed size %d not < 80%% of original %d", cSize, len(data)) + } + + // Verify round-trip + f, err := os.Open(dstPath) + if err != nil { + t.Fatal(err) + } + defer f.Close() + zr, err := zlib.NewReader(f) + if err != nil { + t.Fatal(err) + } + decompressed, err := io.ReadAll(zr) + zr.Close() + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(decompressed, data) { + t.Error("decompressed data does not match original") + } +} + +// --- Hash determinism tests --- + +func TestGetMessageHash_WithDeflate(t *testing.T) { + // Create repetitive data that compresses well + original := []byte(strings.Repeat("deflate hash test data ", 100)) + srcPath := writeTempFile(t, original) + defer os.Remove(srcPath) + + // Compress it + dstPath, cSize, worthwhile, err := tryCompress(srcPath, uint32(len(original))) + if err != nil { + t.Fatal(err) + } + if !worthwhile { + t.Fatal("expected compression to be worthwhile") + } + defer os.Remove(dstPath) + + // Build header with deflate flag pointing at compressed file + h := &FMsgHeader{ + Version: 1, + Flags: FlagDeflate, + From: FMsgAddress{User: "alice", Domain: "example.com"}, + To: []FMsgAddress{{User: "bob", Domain: "other.com"}}, + Topic: "test", + Type: "text/plain;charset=UTF-8", + Size: cSize, + Filepath: dstPath, + } + + msgHash, err := h.GetMessageHash() + if err != nil { + t.Fatal(err) + } + + // Manually compute expected: SHA-256(encoded header + decompressed data) + expected := sha256.New() + expected.Write(h.Encode()) + expected.Write(original) + expectedHash := expected.Sum(nil) + + if !bytes.Equal(msgHash, expectedHash) { + t.Errorf("hash mismatch:\n got %x\n want %x", msgHash, expectedHash) + } +} + +func TestGetMessageHash_WithoutDeflate(t *testing.T) { + original := []byte(strings.Repeat("no deflate hash test ", 100)) + srcPath := writeTempFile(t, original) + defer os.Remove(srcPath) + + h := &FMsgHeader{ + Version: 1, + Flags: 0, + From: FMsgAddress{User: "alice", Domain: "example.com"}, + To: []FMsgAddress{{User: "bob", Domain: "other.com"}}, + Topic: "test", + Type: "text/plain;charset=UTF-8", + Size: uint32(len(original)), + Filepath: srcPath, + } + + msgHash, err := h.GetMessageHash() + if err != nil { + t.Fatal(err) + } + + expected := sha256.New() + expected.Write(h.Encode()) + expected.Write(original) + expectedHash := expected.Sum(nil) + + if !bytes.Equal(msgHash, expectedHash) { + t.Errorf("hash mismatch:\n got %x\n want %x", msgHash, expectedHash) + } +} + +func TestGetMessageHash_DeflateChangesHash(t *testing.T) { + // The same data produces different message hashes depending on whether + // it is deflated, because the header bytes differ (flags and size fields). + original := []byte(strings.Repeat("deflate vs plain ", 100)) + srcPath := writeTempFile(t, original) + defer os.Remove(srcPath) + + dstPath, cSize, worthwhile, err := tryCompress(srcPath, uint32(len(original))) + if err != nil { + t.Fatal(err) + } + if !worthwhile { + t.Fatal("expected compression to be worthwhile") + } + defer os.Remove(dstPath) + + base := FMsgHeader{ + Version: 1, + From: FMsgAddress{User: "alice", Domain: "example.com"}, + To: []FMsgAddress{{User: "bob", Domain: "other.com"}}, + Topic: "test", + Type: "text/plain;charset=UTF-8", + } + + // Hash without deflate + plain := base + plain.Flags = 0 + plain.Size = uint32(len(original)) + plain.Filepath = srcPath + hashPlain, err := plain.GetMessageHash() + if err != nil { + t.Fatal(err) + } + + // Hash with deflate + deflated := base + deflated.Flags = FlagDeflate + deflated.Size = cSize + deflated.Filepath = dstPath + hashDeflated, err := deflated.GetMessageHash() + if err != nil { + t.Fatal(err) + } + + if bytes.Equal(hashPlain, hashDeflated) { + t.Error("expected different hashes for deflated vs non-deflated wire representations") + } +} + +func TestGetMessageHash_AttachmentDeflate(t *testing.T) { + msgData := []byte("short message body that fits in a file") + msgPath := writeTempFile(t, msgData) + defer os.Remove(msgPath) + + attOriginal := []byte(strings.Repeat("attachment data for compression test ", 100)) + attSrcPath := writeTempFile(t, attOriginal) + defer os.Remove(attSrcPath) + + attDstPath, attCSize, worthwhile, err := tryCompress(attSrcPath, uint32(len(attOriginal))) + if err != nil { + t.Fatal(err) + } + if !worthwhile { + t.Fatal("expected attachment compression to be worthwhile") + } + defer os.Remove(attDstPath) + + h := &FMsgHeader{ + Version: 1, + Flags: 0, + From: FMsgAddress{User: "alice", Domain: "example.com"}, + To: []FMsgAddress{{User: "bob", Domain: "other.com"}}, + Topic: "test", + Type: "text/plain;charset=UTF-8", + Size: uint32(len(msgData)), + Filepath: msgPath, + Attachments: []FMsgAttachmentHeader{ + { + Flags: 1 << 1, // attachment deflate bit + Type: "text/csv", + Filename: "data.csv", + Size: attCSize, + Filepath: attDstPath, + }, + }, + } + + msgHash, err := h.GetMessageHash() + if err != nil { + t.Fatal(err) + } + + // Manually compute: SHA-256(header + msg data + decompressed attachment) + expected := sha256.New() + expected.Write(h.Encode()) + expected.Write(msgData) + expected.Write(attOriginal) + expectedHash := expected.Sum(nil) + + if !bytes.Equal(msgHash, expectedHash) { + t.Errorf("attachment hash mismatch:\n got %x\n want %x", msgHash, expectedHash) + } +} + +// --- Encode flag tests --- + +func TestEncode_DeflateFlag(t *testing.T) { + h := &FMsgHeader{ + Version: 1, + Flags: FlagDeflate, + From: FMsgAddress{User: "alice", Domain: "example.com"}, + To: []FMsgAddress{{User: "bob", Domain: "other.com"}}, + Topic: "test", + Type: "text/plain;charset=UTF-8", + } + b := h.Encode() + if b[1]&FlagDeflate == 0 { + t.Error("deflate flag bit (5) not set in encoded header flags byte") + } +} + +func TestEncode_AttachmentDeflateFlag(t *testing.T) { + h := &FMsgHeader{ + Version: 1, + Flags: 0, + From: FMsgAddress{User: "alice", Domain: "example.com"}, + To: []FMsgAddress{{User: "bob", Domain: "other.com"}}, + Topic: "test", + Type: "text/plain;charset=UTF-8", + Attachments: []FMsgAttachmentHeader{ + {Flags: 1 << 1, Type: "text/plain", Filename: "test.txt", Size: 100}, + }, + } + b := h.Encode() + // The encoded header ends with attachment headers. Find the attachment + // flags byte: it's the first byte after the attachment count byte. + // The attachment count is at len(b) - (1 + 1 + len("text/plain") + 1 + len("test.txt") + 4) - 1 + // Simpler: just verify the flags byte value appears in the output. + // The attachment count byte (1) followed by attachment flags byte (0x02). + found := false + for i := 0; i < len(b)-1; i++ { + if b[i] == 1 && b[i+1] == (1<<1) { // count=1, flags=0x02 + found = true + break + } + } + if !found { + t.Error("attachment deflate flag bit (1) not found in encoded header") + } +} diff --git a/src/sender.go b/src/sender.go index 1db7148..687d9d4 100644 --- a/src/sender.go +++ b/src/sender.go @@ -304,6 +304,42 @@ func deliverMessage(target pendingTarget) { return } + // Try zlib-deflate compression for message data and attachment data. + // Compressed temp files are cleaned up after delivery completes. + var deflateCleanup []string + defer func() { + for _, p := range deflateCleanup { + _ = os.Remove(p) + } + }() + if shouldCompress(h.Type, h.Size) { + dp, cs, ok, derr := tryCompress(h.Filepath, h.Size) + if derr != nil { + log.Printf("WARN: sender: compress msg data for msg %d: %s", target.MsgID, derr) + } else if ok { + log.Printf("INFO: sender: compressed msg %d data: %d -> %d bytes", target.MsgID, h.Size, cs) + deflateCleanup = append(deflateCleanup, dp) + h.Filepath = dp + h.Size = cs + h.Flags |= FlagDeflate + } + } + for i := range h.Attachments { + att := &h.Attachments[i] + if shouldCompress(att.Type, att.Size) { + dp, cs, ok, derr := tryCompress(att.Filepath, att.Size) + if derr != nil { + log.Printf("WARN: sender: compress attachment %s for msg %d: %s", att.Filename, target.MsgID, derr) + } else if ok { + log.Printf("INFO: sender: compressed msg %d attachment %s: %d -> %d bytes", target.MsgID, att.Filename, att.Size, cs) + deflateCleanup = append(deflateCleanup, dp) + att.Filepath = dp + att.Size = cs + att.Flags |= 1 << 1 + } + } + } + // Ensure sha256 is populated for outgoing messages so future pid lookups // (e.g. add-to notifications referencing this message) can find it. msgHash, err := h.GetMessageHash()