From c24fc986779d7d713a6833700a7105d8516378be Mon Sep 17 00:00:00 2001
From: Andrew Schwegler <schwegler@gmail.com>
Date: Sun, 14 Jun 2026 14:55:45 -0500
Subject: [PATCH 1/2] feat: Add Google Keep JSON export support and note
 deduplication

---
 src/config/formats.js |  41 ++++++++------
 src/main.js           | 123 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 144 insertions(+), 20 deletions(-)

diff --git a/src/config/formats.js b/src/config/formats.js
index 0b85806..aea5614 100644
--- a/src/config/formats.js
+++ b/src/config/formats.js
@@ -86,22 +86,33 @@ export function detectFormat(mainFilename, fileList = []) {
         throw new Error("Gzip (.tgz) archives are not supported. Please use standard .zip files.");
     }
 
-    // 2. Hard Matches
-    if (ext === 'enex') return 'enex';
-    if (ext === 'json' && fileList.length <= 1) return 'json';
-    if (ext === 'md') return 'markdown';
-    if (ext === 'html' || ext === '_keep') return 'keep';
-
-    // 3. Zip Content Scanning
-    if (ext === 'zip') {
-        const hasHtml = fileList.some(f => f.endsWith('.html'));
-        const hasMd = fileList.some(f => f.endsWith('.md'));
-        const hasCsv = fileList.some(f => f.endsWith('.csv'));
-        
-        if (hasMd && hasCsv) return 'notion';
-        if (hasHtml) return 'keep'; // Google Takeout is mostly HTMLs
-        return 'markdown'; // Default zip assumption
+    // 2. Hard Matches for single files
+    if (fileList.length <= 1) {
+        if (ext === 'enex') return 'enex';
+        if (ext === 'json') return 'json';
+        if (ext === 'md') return 'markdown';
+        if (ext === 'html' || ext === '_keep') return 'keep';
     }
+
+    // 3. Batch / Zip Scanning
+    const hasHtml = fileList.some(f => f.endsWith('.html'));
+    const hasJson = fileList.some(f => f.endsWith('.json'));
+    const hasMd = fileList.some(f => f.endsWith('.md'));
+    const hasCsv = fileList.some(f => f.endsWith('.csv'));
+    const hasEnex = fileList.some(f => f.endsWith('.enex'));
+
+    if (hasMd && hasCsv) return 'notion';
+    if (hasEnex) return 'enex';
+    
+    // Check if Google Keep Takeout
+    // Keep takeout contains HTML/JSON notes, and often a "Keep" folder or "archive_browser.html"
+    const hasKeepPath = fileList.some(f => f.toLowerCase().includes('keep/') || f.toLowerCase().includes('keep\\'));
+    const hasArchiveBrowser = fileList.some(f => f.includes('archive_browser.html'));
+    
+    if (hasKeepPath || hasArchiveBrowser) return 'keep';
+    if (hasHtml && !hasMd) return 'keep';
+    if (hasJson && !hasMd) return 'keep';
+    if (hasMd) return 'markdown';
     
     return 'unknown';
 }
\ No newline at end of file
diff --git a/src/main.js b/src/main.js
index aaf0c5e..9f0466a 100644
--- a/src/main.js
+++ b/src/main.js
@@ -273,11 +273,19 @@ function finalizeBatch(sourceIndex, entries) {
     state.detectedFormat = detectFormat(primaryName, allNames);
 
     // Auto-Select ONLY Visible Files
+    const jsonPaths = new Set(state.allEntries.filter(e => e.path.endsWith('.json')).map(e => e.path));
     taggedEntries.forEach(e => {
         let isVisible = false;
         if (!e.name.startsWith('.')) {
             if (isImage(e.name)) isVisible = true;
-            else if (state.detectedFormat === 'keep' && e.name.endsWith('.html')) isVisible = true;
+            else if (state.detectedFormat === 'keep') {
+                if (e.name.endsWith('.json') && e.name !== 'archive_browser.html') {
+                    isVisible = true;
+                } else if (e.name.endsWith('.html') && e.name !== 'archive_browser.html') {
+                    const correspondingJson = e.path.substring(0, e.path.length - 5) + '.json';
+                    if (!jsonPaths.has(correspondingJson)) isVisible = true;
+                }
+            }
             else if (state.detectedFormat === 'markdown' && e.name.endsWith('.md')) isVisible = true;
             else if (state.detectedFormat === 'enex' && e.name.endsWith('.enex')) isVisible = true;
             else if (state.detectedFormat === 'json' && e.name.endsWith('.json')) isVisible = true;
@@ -298,13 +306,23 @@ function renderList() {
     els.fileList.innerHTML = '';
     
     // Filter view based on detected format + Images
+    const jsonPaths = new Set(state.allEntries.filter(e => e.path.endsWith('.json')).map(e => e.path));
     const displayEntries = state.allEntries.filter(e => {
         if (e.name.startsWith('.')) return false;
         
         const isImg = isImage(e.name);
         if (isImg) return true; // Always show images if they were accepted
 
-        if (state.detectedFormat === 'keep') return e.name.endsWith('.html');
+        if (state.detectedFormat === 'keep') {
+            if (e.name.endsWith('.json') && e.name !== 'archive_browser.html') {
+                return true;
+            }
+            if (e.name.endsWith('.html') && e.name !== 'archive_browser.html') {
+                const correspondingJson = e.path.substring(0, e.path.length - 5) + '.json';
+                return !jsonPaths.has(correspondingJson);
+            }
+            return false;
+        }
         if (state.detectedFormat === 'markdown' || state.detectedFormat === 'notion') return e.name.endsWith('.md');
         if (state.detectedFormat === 'enex') return e.name.endsWith('.enex');
         if (state.detectedFormat === 'json') return e.name.endsWith('.json');
@@ -384,10 +402,20 @@ function toggleSelectAll() {
     // Since we don't store ID in DOM, we rely on state sync.
     // Let's re-calculate visible IDs.
     
+    const jsonPaths = new Set(state.allEntries.filter(e => e.path.endsWith('.json')).map(e => e.path));
     const visibleEntries = state.allEntries.filter(e => {
         if (e.name.startsWith('.')) return false;
         if (isImage(e.name)) return true;
-        if (state.detectedFormat === 'keep') return e.name.endsWith('.html');
+        if (state.detectedFormat === 'keep') {
+            if (e.name.endsWith('.json') && e.name !== 'archive_browser.html') {
+                return true;
+            }
+            if (e.name.endsWith('.html') && e.name !== 'archive_browser.html') {
+                const correspondingJson = e.path.substring(0, e.path.length - 5) + '.json';
+                return !jsonPaths.has(correspondingJson);
+            }
+            return false;
+        }
         if (state.detectedFormat === 'markdown') return e.name.endsWith('.md');
         if (state.detectedFormat === 'enex') return e.name.endsWith('.enex');
         if (state.detectedFormat === 'json') return e.name.endsWith('.json');
@@ -515,7 +543,13 @@ async function finishConversion(contentMap, binaryMap, dateMap = {}) {
         Object.entries(contentMap).forEach(([path, content]) => {
             try {
                 let note = null;
-                if (source === 'keep') note = parseKeepHtml(content);
+                if (source === 'keep') {
+                    if (path.endsWith('.json')) {
+                        note = parseKeepJson(content);
+                    } else {
+                        note = parseKeepHtml(content);
+                    }
+                }
                 else if (source === 'enex') note = parseEnex(content);
                 else if (source === 'markdown') note = fromMarkdown(content);
                 else if (source === 'json') note = JSON.parse(content);
@@ -628,6 +662,9 @@ async function generateEnexWithResources(notes, binaryMap) {
             }
         }
         
+        content = content.replace(/<input[^>]*type="checkbox"[^>]*checked="true"[^>]*\/?>/gi, '<en-todo checked="true"/>');
+        content = content.replace(/<input[^>]*type="checkbox"[^>]*checked[^>]*\/?>/gi, '<en-todo checked="true"/>');
+        content = content.replace(/<input[^>]*type="checkbox"[^>]*\/?>/gi, '<en-todo/>');
         content = content.replace(/<br>/g, '<br/>');
         content = content.replace(/<img[^>]*>/gi, '');
         
@@ -638,6 +675,16 @@ async function generateEnexWithResources(notes, binaryMap) {
         const createdTs = toEnexDate(note.created) || ts;
         const updatedTs = toEnexDate(note.updated) || createdTs;
 
+        let tagsXml = '';
+        if (note.tags && Array.isArray(note.tags)) {
+            note.tags.forEach(t => {
+                const cleanTag = t.replace(/[<>&'"]/g, c => {
+                    switch(c){case '<':return '&lt;';case '>':return '&gt;';case '&':return '&amp;';case "'":return '&apos;';case '"':return '&quot;';}
+                });
+                tagsXml += `  <tag>${cleanTag}</tag>\n`;
+            });
+        }
+
         xml += `
 <note>
   <title>${title}</title>
@@ -646,13 +693,79 @@ async function generateEnexWithResources(notes, binaryMap) {
 <en-note>${content}</en-note>]]></content>
   <created>${createdTs}</created>
   <updated>${updatedTs}</updated>
-  ${resourcesXml}
+  ${tagsXml}  ${resourcesXml}
 </note>`;
     }
     xml += `\n</en-export>`;
     return xml;
 }
 
+function parseKeepJson(content) {
+    const data = JSON.parse(content);
+    
+    // Map textContent or listContent to content (HTML string)
+    let htmlContent = '';
+    if (data.listContent && Array.isArray(data.listContent)) {
+        htmlContent = '<ul>';
+        data.listContent.forEach(item => {
+            const checkedAttr = item.isChecked ? ' checked="true"' : '';
+            htmlContent += `<li><input type="checkbox"${checkedAttr}/> ${item.text || ''}</li>`;
+        });
+        htmlContent += '</ul>';
+    } else if (data.textContent) {
+        // Convert text content: escape HTML, replace newlines with <br/>
+        const escaped = (data.textContent || '')
+            .replace(/&/g, '&amp;')
+            .replace(/</g, '&lt;')
+            .replace(/>/g, '&gt;');
+        htmlContent = escaped.replace(/\n/g, '<br/>');
+    }
+
+    // Map labels to tags
+    const tags = [];
+    if (data.labels && Array.isArray(data.labels)) {
+        data.labels.forEach(l => {
+            if (l.name) tags.push(l.name);
+        });
+    }
+
+    // Map attachments
+    const attachments = [];
+    if (data.attachments && Array.isArray(data.attachments)) {
+        data.attachments.forEach(att => {
+            if (att.filePath) {
+                attachments.push({
+                    filePath: att.filePath,
+                    mimeType: att.mimetype || 'image/jpeg'
+                });
+            }
+        });
+    }
+
+    // Map dates (microseconds timestamps to ISO string)
+    let created = null;
+    let updated = null;
+    if (data.createdTimestampUsec) {
+        created = new Date(data.createdTimestampUsec / 1000).toISOString();
+    }
+    if (data.userEditedTimestampUsec) {
+        updated = new Date(data.userEditedTimestampUsec / 1000).toISOString();
+    }
+
+    return {
+        title: data.title || '',
+        content: htmlContent,
+        textContent: data.textContent || '',
+        tags: tags,
+        created: created,
+        updated: updated,
+        isArchived: !!data.isArchived,
+        isPinned: !!data.isPinned,
+        isTrashed: !!data.isTrashed,
+        attachments: attachments
+    };
+}
+
 // --- UTILS ---
 
 function isImage(name) {

From c4bbbfcc3dfa02b3280323496264423fadcf1a3d Mon Sep 17 00:00:00 2001
From: Andrew Schwegler <schwegler@gmail.com>
Date: Sun, 14 Jun 2026 15:12:31 -0500
Subject: [PATCH 2/2] fix(worker): Resolve UTF-8 emoji and tag encoding
 corruption

---
 src/modules/worker.js | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/modules/worker.js b/src/modules/worker.js
index 2993ab5..c29d352 100644
--- a/src/modules/worker.js
+++ b/src/modules/worker.js
@@ -41,7 +41,8 @@ self.onmessage = async (e) => {
                         if (isImage) {
                             binaryMap[path] = await entry.async('arraybuffer');
                         } else {
-                            contentMap[path] = await entry.async('string');
+                            const bytes = await entry.async('uint8array');
+                            contentMap[path] = new TextDecoder('utf-8').decode(bytes);
                         }
                         if (entry.date) dateMap[path] = entry.date.toISOString();
                     } catch (err) {