From 8e172db3242ea1537c4bebbbb0037f5c1d030731 Mon Sep 17 00:00:00 2001 From: Mark Mennell Date: Sun, 12 Apr 2026 18:38:28 +0800 Subject: [PATCH 1/3] implement compression --- src/deflate.go | 110 ++++++++++ src/deflate_test.go | 484 ++++++++++++++++++++++++++++++++++++++++++++ src/sender.go | 36 ++++ 3 files changed, 630 insertions(+) create mode 100644 src/deflate.go create mode 100644 src/deflate_test.go diff --git a/src/deflate.go b/src/deflate.go new file mode 100644 index 0000000..64c3c1d --- /dev/null +++ b/src/deflate.go @@ -0,0 +1,110 @@ +package main + +import ( + "compress/zlib" + "io" + "os" + "strings" +) + +// minDeflateSize is the minimum payload size in bytes before compression is +// attempted. Below this, zlib framing overhead likely outweighs savings. +const minDeflateSize uint32 = 512 + +// incompressibleTypes lists media types (lowercased, without parameters) that +// are already compressed or otherwise unlikely to benefit from zlib-deflate. +var incompressibleTypes = map[string]bool{ + // images + "image/jpeg": true, "image/png": true, "image/gif": true, + "image/webp": true, "image/heic": true, "image/avif": true, + "image/apng": true, + // audio + "audio/aac": true, "audio/mpeg": true, "audio/ogg": true, + "audio/opus": true, "audio/webm": true, + // video + "video/h264": true, "video/h265": true, "video/h266": true, + "video/ogg": true, "video/vp8": true, "video/vp9": true, + "video/webm": true, + // archives / compressed containers + "application/gzip": true, "application/zip": true, + "application/epub+zip": true, + "application/octet-stream": true, + // zip-based office formats + "application/vnd.oasis.opendocument.presentation": true, + "application/vnd.oasis.opendocument.spreadsheet": true, + "application/vnd.oasis.opendocument.text": true, + "application/vnd.openxmlformats-officedocument.presentationml.presentation": true, + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": true, + "application/vnd.openxmlformats-officedocument.wordprocessingml.document": true, + "application/vnd.amazon.ebook": true, + // fonts (compressed) + "font/woff": true, "font/woff2": true, + // pdf (internally compressed) + "application/pdf": true, + // 3d models (compressed containers) + "model/3mf": true, "model/gltf-binary": true, + "model/vnd.usdz+zip": true, +} + +// shouldDeflate reports whether compression should be attempted for a payload +// with the given media type and size. It returns false for payloads that are +// too small or use a media type known to be already compressed. +func shouldDeflate(mediaType string, dataSize uint32) bool { + if dataSize < minDeflateSize { + return false + } + t := strings.ToLower(mediaType) + if i := strings.IndexByte(t, ';'); i >= 0 { + t = strings.TrimRight(t[:i], " ") + } + return !incompressibleTypes[t] +} + +// tryDeflate compresses the file at srcPath using zlib-deflate and writes the +// result to a temporary file. It returns worthwhile=true only when the +// compressed output is less than 90% of the original size (at least a 10% +// reduction). When not worthwhile the temporary file is removed. +func tryDeflate(srcPath string, srcSize uint32) (dstPath string, compressedSize uint32, worthwhile bool, err error) { + src, err := os.Open(srcPath) + if err != nil { + return "", 0, false, err + } + defer src.Close() + + dst, err := os.CreateTemp("", "fmsg-deflate-*") + if err != nil { + return "", 0, false, err + } + dstName := dst.Name() + + zw := zlib.NewWriter(dst) + if _, err := io.Copy(zw, src); err != nil { + _ = zw.Close() + _ = dst.Close() + _ = os.Remove(dstName) + return "", 0, false, err + } + if err := zw.Close(); err != nil { + _ = dst.Close() + _ = os.Remove(dstName) + return "", 0, false, err + } + if err := dst.Close(); err != nil { + _ = os.Remove(dstName) + return "", 0, false, err + } + + fi, err := os.Stat(dstName) + if err != nil { + _ = os.Remove(dstName) + return "", 0, false, err + } + + cSize := uint32(fi.Size()) + if cSize >= srcSize*9/10 { + _ = os.Remove(dstName) + return "", 0, false, nil + } + + return dstName, cSize, true, nil +} diff --git a/src/deflate_test.go b/src/deflate_test.go new file mode 100644 index 0000000..f9e5d5f --- /dev/null +++ b/src/deflate_test.go @@ -0,0 +1,484 @@ +package main + +import ( + "bytes" + "compress/zlib" + "crypto/rand" + "crypto/sha256" + "io" + "os" + "strings" + "testing" +) + +// --- shouldDeflate tests --- + +func TestShouldDeflate_TextTypes(t *testing.T) { + compressible := []string{ + "text/plain;charset=UTF-8", + "text/html", + "text/markdown", + "text/csv", + "text/css", + "text/javascript", + "text/calendar", + "text/vcard", + "text/plain;charset=US-ASCII", + "text/plain;charset=UTF-16", + "application/json", + "application/xml", + "application/xhtml+xml", + "application/rtf", + "application/x-tar", + "application/msword", + "application/vnd.ms-excel", + "application/vnd.ms-powerpoint", + "image/svg+xml", + "audio/midi", + "model/obj", + "model/step", + "model/stl", + } + for _, mt := range compressible { + if !shouldDeflate(mt, 1024) { + t.Errorf("shouldDeflate(%q, 1024) = false, want true", mt) + } + } +} + +func TestShouldDeflate_IncompressibleTypes(t *testing.T) { + skip := []string{ + "image/jpeg", + "image/png", + "image/gif", + "image/webp", + "image/heic", + "image/avif", + "image/apng", + "audio/aac", + "audio/mpeg", + "audio/ogg", + "audio/opus", + "audio/webm", + "video/H264", + "video/H265", + "video/H266", + "video/ogg", + "video/VP8", + "video/VP9", + "video/webm", + "application/gzip", + "application/zip", + "application/epub+zip", + "application/octet-stream", + "application/pdf", + "application/vnd.oasis.opendocument.presentation", + "application/vnd.oasis.opendocument.spreadsheet", + "application/vnd.oasis.opendocument.text", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.amazon.ebook", + "font/woff", + "font/woff2", + "model/3mf", + "model/gltf-binary", + "model/vnd.usdz+zip", + } + for _, mt := range skip { + if shouldDeflate(mt, 1024) { + t.Errorf("shouldDeflate(%q, 1024) = true, want false", mt) + } + } +} + +func TestShouldDeflate_SmallPayload(t *testing.T) { + sizes := []uint32{0, 1, 100, 511} + for _, sz := range sizes { + if shouldDeflate("text/plain;charset=UTF-8", sz) { + t.Errorf("shouldDeflate(text/plain, %d) = true, want false", sz) + } + } +} + +func TestShouldDeflate_EdgeCases(t *testing.T) { + // Exactly at threshold: should attempt + if !shouldDeflate("text/plain;charset=UTF-8", 512) { + t.Error("shouldDeflate at threshold 512 should return true") + } + // Unknown type: default to try compression + if !shouldDeflate("application/x-custom", 1024) { + t.Error("shouldDeflate for unknown type should return true") + } + // Type with parameters should match base type + if shouldDeflate("application/pdf; charset=utf-8", 1024) { + t.Error("shouldDeflate should strip parameters and match application/pdf") + } + // Case insensitive + if shouldDeflate("VIDEO/H264", 1024) { + t.Error("shouldDeflate should be case-insensitive") + } +} + +// --- tryDeflate tests --- + +func writeTempFile(t *testing.T, data []byte) string { + t.Helper() + f, err := os.CreateTemp("", "deflate-test-*") + if err != nil { + t.Fatal(err) + } + if _, err := f.Write(data); err != nil { + f.Close() + os.Remove(f.Name()) + t.Fatal(err) + } + f.Close() + return f.Name() +} + +func TestTryDeflate_CompressibleData(t *testing.T) { + original := []byte(strings.Repeat("hello world, this is compressible text data! ", 100)) + srcPath := writeTempFile(t, original) + defer os.Remove(srcPath) + + dstPath, cSize, worthwhile, err := tryDeflate(srcPath, uint32(len(original))) + if err != nil { + t.Fatal(err) + } + if !worthwhile { + t.Fatal("expected compression to be worthwhile for repetitive text") + } + defer os.Remove(dstPath) + + if cSize >= uint32(len(original))*9/10 { + t.Errorf("compressed size %d not < 90%% of original %d", cSize, len(original)) + } + + // Verify the compressed file decompresses to the original data + f, err := os.Open(dstPath) + if err != nil { + t.Fatal(err) + } + defer f.Close() + + zr, err := zlib.NewReader(f) + if err != nil { + t.Fatal(err) + } + decompressed, err := io.ReadAll(zr) + zr.Close() + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(decompressed, original) { + t.Error("decompressed data does not match original") + } +} + +func TestTryDeflate_IncompressibleData(t *testing.T) { + // Random bytes are effectively incompressible + data := make([]byte, 2048) + if _, err := rand.Read(data); err != nil { + t.Fatal(err) + } + srcPath := writeTempFile(t, data) + defer os.Remove(srcPath) + + _, _, worthwhile, err := tryDeflate(srcPath, uint32(len(data))) + if err != nil { + t.Fatal(err) + } + if worthwhile { + t.Error("expected compression of random data to not be worthwhile") + } +} + +func TestTryDeflate_RoundTrip(t *testing.T) { + original := []byte(strings.Repeat("Round-trip test data with enough repetition to compress well. ", 50)) + srcPath := writeTempFile(t, original) + defer os.Remove(srcPath) + + dstPath, cSize, worthwhile, err := tryDeflate(srcPath, uint32(len(original))) + if err != nil { + t.Fatal(err) + } + if !worthwhile { + t.Fatal("expected compression to be worthwhile") + } + defer os.Remove(dstPath) + + // Read compressed file + compressed, err := os.ReadFile(dstPath) + if err != nil { + t.Fatal(err) + } + if uint32(len(compressed)) != cSize { + t.Errorf("compressed file size %d != reported size %d", len(compressed), cSize) + } + + // Decompress and verify + zr, err := zlib.NewReader(bytes.NewReader(compressed)) + if err != nil { + t.Fatal(err) + } + decompressed, err := io.ReadAll(zr) + zr.Close() + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(decompressed, original) { + t.Errorf("round-trip mismatch: got %d bytes, want %d bytes", len(decompressed), len(original)) + } +} + +func TestTryDeflate_CleanupOnNotWorthwhile(t *testing.T) { + // Random data won't compress well — the temp file should be removed + data := make([]byte, 2048) + if _, err := rand.Read(data); err != nil { + t.Fatal(err) + } + srcPath := writeTempFile(t, data) + defer os.Remove(srcPath) + + dstPath, _, worthwhile, err := tryDeflate(srcPath, uint32(len(data))) + if err != nil { + t.Fatal(err) + } + if worthwhile { + defer os.Remove(dstPath) + t.Fatal("expected not worthwhile for random data") + } + // dstPath should be empty and no leaked temp file + if dstPath != "" { + t.Errorf("expected empty dstPath when not worthwhile, got %q", dstPath) + } +} + +// --- Hash determinism tests --- + +func TestGetMessageHash_WithDeflate(t *testing.T) { + // Create repetitive data that compresses well + original := []byte(strings.Repeat("deflate hash test data ", 100)) + srcPath := writeTempFile(t, original) + defer os.Remove(srcPath) + + // Compress it + dstPath, cSize, worthwhile, err := tryDeflate(srcPath, uint32(len(original))) + if err != nil { + t.Fatal(err) + } + if !worthwhile { + t.Fatal("expected compression to be worthwhile") + } + defer os.Remove(dstPath) + + // Build header with deflate flag pointing at compressed file + h := &FMsgHeader{ + Version: 1, + Flags: FlagDeflate, + From: FMsgAddress{User: "alice", Domain: "example.com"}, + To: []FMsgAddress{{User: "bob", Domain: "other.com"}}, + Topic: "test", + Type: "text/plain;charset=UTF-8", + Size: cSize, + Filepath: dstPath, + } + + msgHash, err := h.GetMessageHash() + if err != nil { + t.Fatal(err) + } + + // Manually compute expected: SHA-256(encoded header + decompressed data) + expected := sha256.New() + expected.Write(h.Encode()) + expected.Write(original) + expectedHash := expected.Sum(nil) + + if !bytes.Equal(msgHash, expectedHash) { + t.Errorf("hash mismatch:\n got %x\n want %x", msgHash, expectedHash) + } +} + +func TestGetMessageHash_WithoutDeflate(t *testing.T) { + original := []byte(strings.Repeat("no deflate hash test ", 100)) + srcPath := writeTempFile(t, original) + defer os.Remove(srcPath) + + h := &FMsgHeader{ + Version: 1, + Flags: 0, + From: FMsgAddress{User: "alice", Domain: "example.com"}, + To: []FMsgAddress{{User: "bob", Domain: "other.com"}}, + Topic: "test", + Type: "text/plain;charset=UTF-8", + Size: uint32(len(original)), + Filepath: srcPath, + } + + msgHash, err := h.GetMessageHash() + if err != nil { + t.Fatal(err) + } + + expected := sha256.New() + expected.Write(h.Encode()) + expected.Write(original) + expectedHash := expected.Sum(nil) + + if !bytes.Equal(msgHash, expectedHash) { + t.Errorf("hash mismatch:\n got %x\n want %x", msgHash, expectedHash) + } +} + +func TestGetMessageHash_DeflateChangesHash(t *testing.T) { + // The same data produces different message hashes depending on whether + // it is deflated, because the header bytes differ (flags and size fields). + original := []byte(strings.Repeat("deflate vs plain ", 100)) + srcPath := writeTempFile(t, original) + defer os.Remove(srcPath) + + dstPath, cSize, worthwhile, err := tryDeflate(srcPath, uint32(len(original))) + if err != nil { + t.Fatal(err) + } + if !worthwhile { + t.Fatal("expected compression to be worthwhile") + } + defer os.Remove(dstPath) + + base := FMsgHeader{ + Version: 1, + From: FMsgAddress{User: "alice", Domain: "example.com"}, + To: []FMsgAddress{{User: "bob", Domain: "other.com"}}, + Topic: "test", + Type: "text/plain;charset=UTF-8", + } + + // Hash without deflate + plain := base + plain.Flags = 0 + plain.Size = uint32(len(original)) + plain.Filepath = srcPath + hashPlain, err := plain.GetMessageHash() + if err != nil { + t.Fatal(err) + } + + // Hash with deflate + deflated := base + deflated.Flags = FlagDeflate + deflated.Size = cSize + deflated.Filepath = dstPath + hashDeflated, err := deflated.GetMessageHash() + if err != nil { + t.Fatal(err) + } + + if bytes.Equal(hashPlain, hashDeflated) { + t.Error("expected different hashes for deflated vs non-deflated wire representations") + } +} + +func TestGetMessageHash_AttachmentDeflate(t *testing.T) { + msgData := []byte("short message body that fits in a file") + msgPath := writeTempFile(t, msgData) + defer os.Remove(msgPath) + + attOriginal := []byte(strings.Repeat("attachment data for compression test ", 100)) + attSrcPath := writeTempFile(t, attOriginal) + defer os.Remove(attSrcPath) + + attDstPath, attCSize, worthwhile, err := tryDeflate(attSrcPath, uint32(len(attOriginal))) + if err != nil { + t.Fatal(err) + } + if !worthwhile { + t.Fatal("expected attachment compression to be worthwhile") + } + defer os.Remove(attDstPath) + + h := &FMsgHeader{ + Version: 1, + Flags: 0, + From: FMsgAddress{User: "alice", Domain: "example.com"}, + To: []FMsgAddress{{User: "bob", Domain: "other.com"}}, + Topic: "test", + Type: "text/plain;charset=UTF-8", + Size: uint32(len(msgData)), + Filepath: msgPath, + Attachments: []FMsgAttachmentHeader{ + { + Flags: 1 << 1, // attachment deflate bit + Type: "text/csv", + Filename: "data.csv", + Size: attCSize, + Filepath: attDstPath, + }, + }, + } + + msgHash, err := h.GetMessageHash() + if err != nil { + t.Fatal(err) + } + + // Manually compute: SHA-256(header + msg data + decompressed attachment) + expected := sha256.New() + expected.Write(h.Encode()) + expected.Write(msgData) + expected.Write(attOriginal) + expectedHash := expected.Sum(nil) + + if !bytes.Equal(msgHash, expectedHash) { + t.Errorf("attachment hash mismatch:\n got %x\n want %x", msgHash, expectedHash) + } +} + +// --- Encode flag tests --- + +func TestEncode_DeflateFlag(t *testing.T) { + h := &FMsgHeader{ + Version: 1, + Flags: FlagDeflate, + From: FMsgAddress{User: "alice", Domain: "example.com"}, + To: []FMsgAddress{{User: "bob", Domain: "other.com"}}, + Topic: "test", + Type: "text/plain;charset=UTF-8", + } + b := h.Encode() + if b[1]&FlagDeflate == 0 { + t.Error("deflate flag bit (5) not set in encoded header flags byte") + } +} + +func TestEncode_AttachmentDeflateFlag(t *testing.T) { + h := &FMsgHeader{ + Version: 1, + Flags: 0, + From: FMsgAddress{User: "alice", Domain: "example.com"}, + To: []FMsgAddress{{User: "bob", Domain: "other.com"}}, + Topic: "test", + Type: "text/plain;charset=UTF-8", + Attachments: []FMsgAttachmentHeader{ + {Flags: 1 << 1, Type: "text/plain", Filename: "test.txt", Size: 100}, + }, + } + b := h.Encode() + // The encoded header ends with attachment headers. Find the attachment + // flags byte: it's the first byte after the attachment count byte. + // The attachment count is at len(b) - (1 + 1 + len("text/plain") + 1 + len("test.txt") + 4) - 1 + // Simpler: just verify the flags byte value appears in the output. + // The attachment count byte (1) followed by attachment flags byte (0x02). + found := false + for i := 0; i < len(b)-1; i++ { + if b[i] == 1 && b[i+1] == (1<<1) { // count=1, flags=0x02 + found = true + break + } + } + if !found { + t.Error("attachment deflate flag bit (1) not found in encoded header") + } +} diff --git a/src/sender.go b/src/sender.go index 1db7148..3010624 100644 --- a/src/sender.go +++ b/src/sender.go @@ -304,6 +304,42 @@ func deliverMessage(target pendingTarget) { return } + // Try zlib-deflate compression for message data and attachment data. + // Compressed temp files are cleaned up after delivery completes. + var deflateCleanup []string + defer func() { + for _, p := range deflateCleanup { + _ = os.Remove(p) + } + }() + if shouldDeflate(h.Type, h.Size) { + dp, cs, ok, derr := tryDeflate(h.Filepath, h.Size) + if derr != nil { + log.Printf("WARN: sender: deflate msg data for msg %d: %s", target.MsgID, derr) + } else if ok { + log.Printf("INFO: sender: deflated msg %d data: %d -> %d bytes", target.MsgID, h.Size, cs) + deflateCleanup = append(deflateCleanup, dp) + h.Filepath = dp + h.Size = cs + h.Flags |= FlagDeflate + } + } + for i := range h.Attachments { + att := &h.Attachments[i] + if shouldDeflate(att.Type, att.Size) { + dp, cs, ok, derr := tryDeflate(att.Filepath, att.Size) + if derr != nil { + log.Printf("WARN: sender: deflate attachment %s for msg %d: %s", att.Filename, target.MsgID, derr) + } else if ok { + log.Printf("INFO: sender: deflated msg %d attachment %s: %d -> %d bytes", target.MsgID, att.Filename, att.Size, cs) + deflateCleanup = append(deflateCleanup, dp) + att.Filepath = dp + att.Size = cs + att.Flags |= 1 << 1 + } + } + } + // Ensure sha256 is populated for outgoing messages so future pid lookups // (e.g. add-to notifications referencing this message) can find it. msgHash, err := h.GetMessageHash() From 9952f71a9b58258d4786df442c271c5a7631e9ce Mon Sep 17 00:00:00 2001 From: Mark Mennell Date: Sun, 12 Apr 2026 18:40:48 +0800 Subject: [PATCH 2/3] rm hyperbole --- src/deflate.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/deflate.go b/src/deflate.go index 64c3c1d..20d6590 100644 --- a/src/deflate.go +++ b/src/deflate.go @@ -8,7 +8,7 @@ import ( ) // minDeflateSize is the minimum payload size in bytes before compression is -// attempted. Below this, zlib framing overhead likely outweighs savings. +// attempted. const minDeflateSize uint32 = 512 // incompressibleTypes lists media types (lowercased, without parameters) that From d6706ce3b8aacce29e1d8ec95011077ade6ec7d4 Mon Sep 17 00:00:00 2001 From: Mark Mennell Date: Mon, 13 Apr 2026 00:06:37 +0800 Subject: [PATCH 3/3] deflate vs. compress wording --- src/deflate.go | 63 ++++++++++++++++++++++++---- src/deflate_test.go | 100 +++++++++++++++++++++++++++++++++++--------- src/sender.go | 16 +++---- 3 files changed, 144 insertions(+), 35 deletions(-) diff --git a/src/deflate.go b/src/deflate.go index 20d6590..68b85a7 100644 --- a/src/deflate.go +++ b/src/deflate.go @@ -1,6 +1,7 @@ package main import ( + "bytes" "compress/zlib" "io" "os" @@ -46,10 +47,10 @@ var incompressibleTypes = map[string]bool{ "model/vnd.usdz+zip": true, } -// shouldDeflate reports whether compression should be attempted for a payload +// shouldCompress reports whether compression should be attempted for a payload // with the given media type and size. It returns false for payloads that are // too small or use a media type known to be already compressed. -func shouldDeflate(mediaType string, dataSize uint32) bool { +func shouldCompress(mediaType string, dataSize uint32) bool { if dataSize < minDeflateSize { return false } @@ -60,17 +61,63 @@ func shouldDeflate(mediaType string, dataSize uint32) bool { return !incompressibleTypes[t] } -// tryDeflate compresses the file at srcPath using zlib-deflate and writes the -// result to a temporary file. It returns worthwhile=true only when the -// compressed output is less than 90% of the original size (at least a 10% -// reduction). When not worthwhile the temporary file is removed. -func tryDeflate(srcPath string, srcSize uint32) (dstPath string, compressedSize uint32, worthwhile bool, err error) { +// deflateSampleSize is the number of bytes sampled from the start of a file +// to estimate compressibility before committing to a full-file compression +// pass. Chosen large enough for zlib to find patterns but small enough to be +// fast even on very large files. +const deflateSampleSize = 8192 + +// probeSample compresses up to deflateSampleSize bytes from the start of src +// and reports whether the ratio looks promising (compressed < 80% of input). +// src is seeked back to the start on return. +func probeSample(src *os.File, srcSize uint32) (bool, error) { + sampleLen := int64(deflateSampleSize) + if int64(srcSize) < sampleLen { + sampleLen = int64(srcSize) + } + + var buf bytes.Buffer + zw := zlib.NewWriter(&buf) + if _, err := io.CopyN(zw, src, sampleLen); err != nil { + _ = zw.Close() + return false, err + } + if err := zw.Close(); err != nil { + return false, err + } + + if _, err := src.Seek(0, io.SeekStart); err != nil { + return false, err + } + + return int64(buf.Len()) < sampleLen*8/10, nil +} + +// tryCompress compresses the file at srcPath using zlib-deflate and writes the +// result to a temporary file. For files larger than deflateSampleSize it first +// compresses a prefix sample to estimate compressibility, avoiding a full pass +// over files that won't compress well. It returns worthwhile=true only when +// the compressed output is less than 80% of the original size (at least a 20% +// reduction). When not worthwhile the temporary file is removed. When +// worthwhile the caller is responsible for removing the file at dstPath. +func tryCompress(srcPath string, srcSize uint32) (dstPath string, compressedSize uint32, worthwhile bool, err error) { src, err := os.Open(srcPath) if err != nil { return "", 0, false, err } defer src.Close() + // For files larger than the sample size, probe a prefix first. + if srcSize > deflateSampleSize { + promising, err := probeSample(src, srcSize) + if err != nil { + return "", 0, false, err + } + if !promising { + return "", 0, false, nil + } + } + dst, err := os.CreateTemp("", "fmsg-deflate-*") if err != nil { return "", 0, false, err @@ -101,7 +148,7 @@ func tryDeflate(srcPath string, srcSize uint32) (dstPath string, compressedSize } cSize := uint32(fi.Size()) - if cSize >= srcSize*9/10 { + if cSize >= srcSize*8/10 { _ = os.Remove(dstName) return "", 0, false, nil } diff --git a/src/deflate_test.go b/src/deflate_test.go index f9e5d5f..f3e7b56 100644 --- a/src/deflate_test.go +++ b/src/deflate_test.go @@ -40,8 +40,8 @@ func TestShouldDeflate_TextTypes(t *testing.T) { "model/stl", } for _, mt := range compressible { - if !shouldDeflate(mt, 1024) { - t.Errorf("shouldDeflate(%q, 1024) = false, want true", mt) + if !shouldCompress(mt, 1024) { + t.Errorf("shouldCompress(%q, 1024) = false, want true", mt) } } } @@ -86,8 +86,8 @@ func TestShouldDeflate_IncompressibleTypes(t *testing.T) { "model/vnd.usdz+zip", } for _, mt := range skip { - if shouldDeflate(mt, 1024) { - t.Errorf("shouldDeflate(%q, 1024) = true, want false", mt) + if shouldCompress(mt, 1024) { + t.Errorf("shouldCompress(%q, 1024) = true, want false", mt) } } } @@ -95,27 +95,27 @@ func TestShouldDeflate_IncompressibleTypes(t *testing.T) { func TestShouldDeflate_SmallPayload(t *testing.T) { sizes := []uint32{0, 1, 100, 511} for _, sz := range sizes { - if shouldDeflate("text/plain;charset=UTF-8", sz) { - t.Errorf("shouldDeflate(text/plain, %d) = true, want false", sz) + if shouldCompress("text/plain;charset=UTF-8", sz) { + t.Errorf("shouldCompress(text/plain, %d) = true, want false", sz) } } } func TestShouldDeflate_EdgeCases(t *testing.T) { // Exactly at threshold: should attempt - if !shouldDeflate("text/plain;charset=UTF-8", 512) { + if !shouldCompress("text/plain;charset=UTF-8", 512) { t.Error("shouldDeflate at threshold 512 should return true") } // Unknown type: default to try compression - if !shouldDeflate("application/x-custom", 1024) { + if !shouldCompress("application/x-custom", 1024) { t.Error("shouldDeflate for unknown type should return true") } // Type with parameters should match base type - if shouldDeflate("application/pdf; charset=utf-8", 1024) { + if shouldCompress("application/pdf; charset=utf-8", 1024) { t.Error("shouldDeflate should strip parameters and match application/pdf") } // Case insensitive - if shouldDeflate("VIDEO/H264", 1024) { + if shouldCompress("VIDEO/H264", 1024) { t.Error("shouldDeflate should be case-insensitive") } } @@ -142,7 +142,7 @@ func TestTryDeflate_CompressibleData(t *testing.T) { srcPath := writeTempFile(t, original) defer os.Remove(srcPath) - dstPath, cSize, worthwhile, err := tryDeflate(srcPath, uint32(len(original))) + dstPath, cSize, worthwhile, err := tryCompress(srcPath, uint32(len(original))) if err != nil { t.Fatal(err) } @@ -151,8 +151,8 @@ func TestTryDeflate_CompressibleData(t *testing.T) { } defer os.Remove(dstPath) - if cSize >= uint32(len(original))*9/10 { - t.Errorf("compressed size %d not < 90%% of original %d", cSize, len(original)) + if cSize >= uint32(len(original))*8/10 { + t.Errorf("compressed size %d not < 80%% of original %d", cSize, len(original)) } // Verify the compressed file decompresses to the original data @@ -185,7 +185,7 @@ func TestTryDeflate_IncompressibleData(t *testing.T) { srcPath := writeTempFile(t, data) defer os.Remove(srcPath) - _, _, worthwhile, err := tryDeflate(srcPath, uint32(len(data))) + _, _, worthwhile, err := tryCompress(srcPath, uint32(len(data))) if err != nil { t.Fatal(err) } @@ -199,7 +199,7 @@ func TestTryDeflate_RoundTrip(t *testing.T) { srcPath := writeTempFile(t, original) defer os.Remove(srcPath) - dstPath, cSize, worthwhile, err := tryDeflate(srcPath, uint32(len(original))) + dstPath, cSize, worthwhile, err := tryCompress(srcPath, uint32(len(original))) if err != nil { t.Fatal(err) } @@ -241,7 +241,7 @@ func TestTryDeflate_CleanupOnNotWorthwhile(t *testing.T) { srcPath := writeTempFile(t, data) defer os.Remove(srcPath) - dstPath, _, worthwhile, err := tryDeflate(srcPath, uint32(len(data))) + dstPath, _, worthwhile, err := tryCompress(srcPath, uint32(len(data))) if err != nil { t.Fatal(err) } @@ -255,6 +255,68 @@ func TestTryDeflate_CleanupOnNotWorthwhile(t *testing.T) { } } +func TestTryDeflate_ProbeRejectsLargeIncompressible(t *testing.T) { + // A file larger than deflateSampleSize filled with random bytes should be + // rejected by the sample probe without writing a full compressed file. + data := make([]byte, deflateSampleSize+4096) + if _, err := rand.Read(data); err != nil { + t.Fatal(err) + } + srcPath := writeTempFile(t, data) + defer os.Remove(srcPath) + + _, _, worthwhile, err := tryCompress(srcPath, uint32(len(data))) + if err != nil { + t.Fatal(err) + } + if worthwhile { + t.Error("expected probe to reject large random data") + } +} + +func TestTryDeflate_ProbeAcceptsLargeCompressible(t *testing.T) { + // A file larger than deflateSampleSize filled with repetitive text should + // pass the probe and compress the full file successfully. + data := []byte(strings.Repeat("probe compressible test data! ", 1000)) + if len(data) <= deflateSampleSize { + t.Fatalf("test data %d bytes not larger than sample size %d", len(data), deflateSampleSize) + } + srcPath := writeTempFile(t, data) + defer os.Remove(srcPath) + + dstPath, cSize, worthwhile, err := tryCompress(srcPath, uint32(len(data))) + if err != nil { + t.Fatal(err) + } + if !worthwhile { + t.Fatal("expected large compressible data to be worthwhile") + } + defer os.Remove(dstPath) + + if cSize >= uint32(len(data))*8/10 { + t.Errorf("compressed size %d not < 80%% of original %d", cSize, len(data)) + } + + // Verify round-trip + f, err := os.Open(dstPath) + if err != nil { + t.Fatal(err) + } + defer f.Close() + zr, err := zlib.NewReader(f) + if err != nil { + t.Fatal(err) + } + decompressed, err := io.ReadAll(zr) + zr.Close() + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(decompressed, data) { + t.Error("decompressed data does not match original") + } +} + // --- Hash determinism tests --- func TestGetMessageHash_WithDeflate(t *testing.T) { @@ -264,7 +326,7 @@ func TestGetMessageHash_WithDeflate(t *testing.T) { defer os.Remove(srcPath) // Compress it - dstPath, cSize, worthwhile, err := tryDeflate(srcPath, uint32(len(original))) + dstPath, cSize, worthwhile, err := tryCompress(srcPath, uint32(len(original))) if err != nil { t.Fatal(err) } @@ -339,7 +401,7 @@ func TestGetMessageHash_DeflateChangesHash(t *testing.T) { srcPath := writeTempFile(t, original) defer os.Remove(srcPath) - dstPath, cSize, worthwhile, err := tryDeflate(srcPath, uint32(len(original))) + dstPath, cSize, worthwhile, err := tryCompress(srcPath, uint32(len(original))) if err != nil { t.Fatal(err) } @@ -390,7 +452,7 @@ func TestGetMessageHash_AttachmentDeflate(t *testing.T) { attSrcPath := writeTempFile(t, attOriginal) defer os.Remove(attSrcPath) - attDstPath, attCSize, worthwhile, err := tryDeflate(attSrcPath, uint32(len(attOriginal))) + attDstPath, attCSize, worthwhile, err := tryCompress(attSrcPath, uint32(len(attOriginal))) if err != nil { t.Fatal(err) } diff --git a/src/sender.go b/src/sender.go index 3010624..687d9d4 100644 --- a/src/sender.go +++ b/src/sender.go @@ -312,12 +312,12 @@ func deliverMessage(target pendingTarget) { _ = os.Remove(p) } }() - if shouldDeflate(h.Type, h.Size) { - dp, cs, ok, derr := tryDeflate(h.Filepath, h.Size) + if shouldCompress(h.Type, h.Size) { + dp, cs, ok, derr := tryCompress(h.Filepath, h.Size) if derr != nil { - log.Printf("WARN: sender: deflate msg data for msg %d: %s", target.MsgID, derr) + log.Printf("WARN: sender: compress msg data for msg %d: %s", target.MsgID, derr) } else if ok { - log.Printf("INFO: sender: deflated msg %d data: %d -> %d bytes", target.MsgID, h.Size, cs) + log.Printf("INFO: sender: compressed msg %d data: %d -> %d bytes", target.MsgID, h.Size, cs) deflateCleanup = append(deflateCleanup, dp) h.Filepath = dp h.Size = cs @@ -326,12 +326,12 @@ func deliverMessage(target pendingTarget) { } for i := range h.Attachments { att := &h.Attachments[i] - if shouldDeflate(att.Type, att.Size) { - dp, cs, ok, derr := tryDeflate(att.Filepath, att.Size) + if shouldCompress(att.Type, att.Size) { + dp, cs, ok, derr := tryCompress(att.Filepath, att.Size) if derr != nil { - log.Printf("WARN: sender: deflate attachment %s for msg %d: %s", att.Filename, target.MsgID, derr) + log.Printf("WARN: sender: compress attachment %s for msg %d: %s", att.Filename, target.MsgID, derr) } else if ok { - log.Printf("INFO: sender: deflated msg %d attachment %s: %d -> %d bytes", target.MsgID, att.Filename, att.Size, cs) + log.Printf("INFO: sender: compressed msg %d attachment %s: %d -> %d bytes", target.MsgID, att.Filename, att.Size, cs) deflateCleanup = append(deflateCleanup, dp) att.Filepath = dp att.Size = cs