From c24fc986779d7d713a6833700a7105d8516378be Mon Sep 17 00:00:00 2001 From: Andrew Schwegler Date: Sun, 14 Jun 2026 14:55:45 -0500 Subject: [PATCH 1/2] feat: Add Google Keep JSON export support and note deduplication --- src/config/formats.js | 41 ++++++++------ src/main.js | 123 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 144 insertions(+), 20 deletions(-) diff --git a/src/config/formats.js b/src/config/formats.js index 0b85806..aea5614 100644 --- a/src/config/formats.js +++ b/src/config/formats.js @@ -86,22 +86,33 @@ export function detectFormat(mainFilename, fileList = []) { throw new Error("Gzip (.tgz) archives are not supported. Please use standard .zip files."); } - // 2. Hard Matches - if (ext === 'enex') return 'enex'; - if (ext === 'json' && fileList.length <= 1) return 'json'; - if (ext === 'md') return 'markdown'; - if (ext === 'html' || ext === '_keep') return 'keep'; - - // 3. Zip Content Scanning - if (ext === 'zip') { - const hasHtml = fileList.some(f => f.endsWith('.html')); - const hasMd = fileList.some(f => f.endsWith('.md')); - const hasCsv = fileList.some(f => f.endsWith('.csv')); - - if (hasMd && hasCsv) return 'notion'; - if (hasHtml) return 'keep'; // Google Takeout is mostly HTMLs - return 'markdown'; // Default zip assumption + // 2. Hard Matches for single files + if (fileList.length <= 1) { + if (ext === 'enex') return 'enex'; + if (ext === 'json') return 'json'; + if (ext === 'md') return 'markdown'; + if (ext === 'html' || ext === '_keep') return 'keep'; } + + // 3. Batch / Zip Scanning + const hasHtml = fileList.some(f => f.endsWith('.html')); + const hasJson = fileList.some(f => f.endsWith('.json')); + const hasMd = fileList.some(f => f.endsWith('.md')); + const hasCsv = fileList.some(f => f.endsWith('.csv')); + const hasEnex = fileList.some(f => f.endsWith('.enex')); + + if (hasMd && hasCsv) return 'notion'; + if (hasEnex) return 'enex'; + + // Check if Google Keep Takeout + // Keep takeout contains HTML/JSON notes, and often a "Keep" folder or "archive_browser.html" + const hasKeepPath = fileList.some(f => f.toLowerCase().includes('keep/') || f.toLowerCase().includes('keep\\')); + const hasArchiveBrowser = fileList.some(f => f.includes('archive_browser.html')); + + if (hasKeepPath || hasArchiveBrowser) return 'keep'; + if (hasHtml && !hasMd) return 'keep'; + if (hasJson && !hasMd) return 'keep'; + if (hasMd) return 'markdown'; return 'unknown'; } \ No newline at end of file diff --git a/src/main.js b/src/main.js index aaf0c5e..9f0466a 100644 --- a/src/main.js +++ b/src/main.js @@ -273,11 +273,19 @@ function finalizeBatch(sourceIndex, entries) { state.detectedFormat = detectFormat(primaryName, allNames); // Auto-Select ONLY Visible Files + const jsonPaths = new Set(state.allEntries.filter(e => e.path.endsWith('.json')).map(e => e.path)); taggedEntries.forEach(e => { let isVisible = false; if (!e.name.startsWith('.')) { if (isImage(e.name)) isVisible = true; - else if (state.detectedFormat === 'keep' && e.name.endsWith('.html')) isVisible = true; + else if (state.detectedFormat === 'keep') { + if (e.name.endsWith('.json') && e.name !== 'archive_browser.html') { + isVisible = true; + } else if (e.name.endsWith('.html') && e.name !== 'archive_browser.html') { + const correspondingJson = e.path.substring(0, e.path.length - 5) + '.json'; + if (!jsonPaths.has(correspondingJson)) isVisible = true; + } + } else if (state.detectedFormat === 'markdown' && e.name.endsWith('.md')) isVisible = true; else if (state.detectedFormat === 'enex' && e.name.endsWith('.enex')) isVisible = true; else if (state.detectedFormat === 'json' && e.name.endsWith('.json')) isVisible = true; @@ -298,13 +306,23 @@ function renderList() { els.fileList.innerHTML = ''; // Filter view based on detected format + Images + const jsonPaths = new Set(state.allEntries.filter(e => e.path.endsWith('.json')).map(e => e.path)); const displayEntries = state.allEntries.filter(e => { if (e.name.startsWith('.')) return false; const isImg = isImage(e.name); if (isImg) return true; // Always show images if they were accepted - if (state.detectedFormat === 'keep') return e.name.endsWith('.html'); + if (state.detectedFormat === 'keep') { + if (e.name.endsWith('.json') && e.name !== 'archive_browser.html') { + return true; + } + if (e.name.endsWith('.html') && e.name !== 'archive_browser.html') { + const correspondingJson = e.path.substring(0, e.path.length - 5) + '.json'; + return !jsonPaths.has(correspondingJson); + } + return false; + } if (state.detectedFormat === 'markdown' || state.detectedFormat === 'notion') return e.name.endsWith('.md'); if (state.detectedFormat === 'enex') return e.name.endsWith('.enex'); if (state.detectedFormat === 'json') return e.name.endsWith('.json'); @@ -384,10 +402,20 @@ function toggleSelectAll() { // Since we don't store ID in DOM, we rely on state sync. // Let's re-calculate visible IDs. + const jsonPaths = new Set(state.allEntries.filter(e => e.path.endsWith('.json')).map(e => e.path)); const visibleEntries = state.allEntries.filter(e => { if (e.name.startsWith('.')) return false; if (isImage(e.name)) return true; - if (state.detectedFormat === 'keep') return e.name.endsWith('.html'); + if (state.detectedFormat === 'keep') { + if (e.name.endsWith('.json') && e.name !== 'archive_browser.html') { + return true; + } + if (e.name.endsWith('.html') && e.name !== 'archive_browser.html') { + const correspondingJson = e.path.substring(0, e.path.length - 5) + '.json'; + return !jsonPaths.has(correspondingJson); + } + return false; + } if (state.detectedFormat === 'markdown') return e.name.endsWith('.md'); if (state.detectedFormat === 'enex') return e.name.endsWith('.enex'); if (state.detectedFormat === 'json') return e.name.endsWith('.json'); @@ -515,7 +543,13 @@ async function finishConversion(contentMap, binaryMap, dateMap = {}) { Object.entries(contentMap).forEach(([path, content]) => { try { let note = null; - if (source === 'keep') note = parseKeepHtml(content); + if (source === 'keep') { + if (path.endsWith('.json')) { + note = parseKeepJson(content); + } else { + note = parseKeepHtml(content); + } + } else if (source === 'enex') note = parseEnex(content); else if (source === 'markdown') note = fromMarkdown(content); else if (source === 'json') note = JSON.parse(content); @@ -628,6 +662,9 @@ async function generateEnexWithResources(notes, binaryMap) { } } + content = content.replace(/]*type="checkbox"[^>]*checked="true"[^>]*\/?>/gi, ''); + content = content.replace(/]*type="checkbox"[^>]*checked[^>]*\/?>/gi, ''); + content = content.replace(/]*type="checkbox"[^>]*\/?>/gi, ''); content = content.replace(/
/g, '
'); content = content.replace(/]*>/gi, ''); @@ -638,6 +675,16 @@ async function generateEnexWithResources(notes, binaryMap) { const createdTs = toEnexDate(note.created) || ts; const updatedTs = toEnexDate(note.updated) || createdTs; + let tagsXml = ''; + if (note.tags && Array.isArray(note.tags)) { + note.tags.forEach(t => { + const cleanTag = t.replace(/[<>&'"]/g, c => { + switch(c){case '<':return '<';case '>':return '>';case '&':return '&';case "'":return ''';case '"':return '"';} + }); + tagsXml += ` ${cleanTag}\n`; + }); + } + xml += ` ${title} @@ -646,13 +693,79 @@ async function generateEnexWithResources(notes, binaryMap) { ${content}]]> ${createdTs} ${updatedTs} - ${resourcesXml} + ${tagsXml} ${resourcesXml} `; } xml += `\n`; return xml; } +function parseKeepJson(content) { + const data = JSON.parse(content); + + // Map textContent or listContent to content (HTML string) + let htmlContent = ''; + if (data.listContent && Array.isArray(data.listContent)) { + htmlContent = '
    '; + data.listContent.forEach(item => { + const checkedAttr = item.isChecked ? ' checked="true"' : ''; + htmlContent += `
  • ${item.text || ''}
  • `; + }); + htmlContent += '
'; + } else if (data.textContent) { + // Convert text content: escape HTML, replace newlines with
+ const escaped = (data.textContent || '') + .replace(/&/g, '&') + .replace(//g, '>'); + htmlContent = escaped.replace(/\n/g, '
'); + } + + // Map labels to tags + const tags = []; + if (data.labels && Array.isArray(data.labels)) { + data.labels.forEach(l => { + if (l.name) tags.push(l.name); + }); + } + + // Map attachments + const attachments = []; + if (data.attachments && Array.isArray(data.attachments)) { + data.attachments.forEach(att => { + if (att.filePath) { + attachments.push({ + filePath: att.filePath, + mimeType: att.mimetype || 'image/jpeg' + }); + } + }); + } + + // Map dates (microseconds timestamps to ISO string) + let created = null; + let updated = null; + if (data.createdTimestampUsec) { + created = new Date(data.createdTimestampUsec / 1000).toISOString(); + } + if (data.userEditedTimestampUsec) { + updated = new Date(data.userEditedTimestampUsec / 1000).toISOString(); + } + + return { + title: data.title || '', + content: htmlContent, + textContent: data.textContent || '', + tags: tags, + created: created, + updated: updated, + isArchived: !!data.isArchived, + isPinned: !!data.isPinned, + isTrashed: !!data.isTrashed, + attachments: attachments + }; +} + // --- UTILS --- function isImage(name) { From c4bbbfcc3dfa02b3280323496264423fadcf1a3d Mon Sep 17 00:00:00 2001 From: Andrew Schwegler Date: Sun, 14 Jun 2026 15:12:31 -0500 Subject: [PATCH 2/2] fix(worker): Resolve UTF-8 emoji and tag encoding corruption --- src/modules/worker.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/modules/worker.js b/src/modules/worker.js index 2993ab5..c29d352 100644 --- a/src/modules/worker.js +++ b/src/modules/worker.js @@ -41,7 +41,8 @@ self.onmessage = async (e) => { if (isImage) { binaryMap[path] = await entry.async('arraybuffer'); } else { - contentMap[path] = await entry.async('string'); + const bytes = await entry.async('uint8array'); + contentMap[path] = new TextDecoder('utf-8').decode(bytes); } if (entry.date) dateMap[path] = entry.date.toISOString(); } catch (err) {