diff --git a/src/config/formats.js b/src/config/formats.js
index 0b85806..aea5614 100644
--- a/src/config/formats.js
+++ b/src/config/formats.js
@@ -86,22 +86,33 @@ export function detectFormat(mainFilename, fileList = []) {
throw new Error("Gzip (.tgz) archives are not supported. Please use standard .zip files.");
}
- // 2. Hard Matches
- if (ext === 'enex') return 'enex';
- if (ext === 'json' && fileList.length <= 1) return 'json';
- if (ext === 'md') return 'markdown';
- if (ext === 'html' || ext === '_keep') return 'keep';
-
- // 3. Zip Content Scanning
- if (ext === 'zip') {
- const hasHtml = fileList.some(f => f.endsWith('.html'));
- const hasMd = fileList.some(f => f.endsWith('.md'));
- const hasCsv = fileList.some(f => f.endsWith('.csv'));
-
- if (hasMd && hasCsv) return 'notion';
- if (hasHtml) return 'keep'; // Google Takeout is mostly HTMLs
- return 'markdown'; // Default zip assumption
+ // 2. Hard Matches for single files
+ if (fileList.length <= 1) {
+ if (ext === 'enex') return 'enex';
+ if (ext === 'json') return 'json';
+ if (ext === 'md') return 'markdown';
+ if (ext === 'html' || ext === '_keep') return 'keep';
}
+
+ // 3. Batch / Zip Scanning
+ const hasHtml = fileList.some(f => f.endsWith('.html'));
+ const hasJson = fileList.some(f => f.endsWith('.json'));
+ const hasMd = fileList.some(f => f.endsWith('.md'));
+ const hasCsv = fileList.some(f => f.endsWith('.csv'));
+ const hasEnex = fileList.some(f => f.endsWith('.enex'));
+
+ if (hasMd && hasCsv) return 'notion';
+ if (hasEnex) return 'enex';
+
+ // Check if Google Keep Takeout
+ // Keep takeout contains HTML/JSON notes, and often a "Keep" folder or "archive_browser.html"
+ const hasKeepPath = fileList.some(f => f.toLowerCase().includes('keep/') || f.toLowerCase().includes('keep\\'));
+ const hasArchiveBrowser = fileList.some(f => f.includes('archive_browser.html'));
+
+ if (hasKeepPath || hasArchiveBrowser) return 'keep';
+ if (hasHtml && !hasMd) return 'keep';
+ if (hasJson && !hasMd) return 'keep';
+ if (hasMd) return 'markdown';
return 'unknown';
}
\ No newline at end of file
diff --git a/src/main.js b/src/main.js
index aaf0c5e..9f0466a 100644
--- a/src/main.js
+++ b/src/main.js
@@ -273,11 +273,19 @@ function finalizeBatch(sourceIndex, entries) {
state.detectedFormat = detectFormat(primaryName, allNames);
// Auto-Select ONLY Visible Files
+ const jsonPaths = new Set(state.allEntries.filter(e => e.path.endsWith('.json')).map(e => e.path));
taggedEntries.forEach(e => {
let isVisible = false;
if (!e.name.startsWith('.')) {
if (isImage(e.name)) isVisible = true;
- else if (state.detectedFormat === 'keep' && e.name.endsWith('.html')) isVisible = true;
+ else if (state.detectedFormat === 'keep') {
+ if (e.name.endsWith('.json') && e.name !== 'archive_browser.html') {
+ isVisible = true;
+ } else if (e.name.endsWith('.html') && e.name !== 'archive_browser.html') {
+ const correspondingJson = e.path.substring(0, e.path.length - 5) + '.json';
+ if (!jsonPaths.has(correspondingJson)) isVisible = true;
+ }
+ }
else if (state.detectedFormat === 'markdown' && e.name.endsWith('.md')) isVisible = true;
else if (state.detectedFormat === 'enex' && e.name.endsWith('.enex')) isVisible = true;
else if (state.detectedFormat === 'json' && e.name.endsWith('.json')) isVisible = true;
@@ -298,13 +306,23 @@ function renderList() {
els.fileList.innerHTML = '';
// Filter view based on detected format + Images
+ const jsonPaths = new Set(state.allEntries.filter(e => e.path.endsWith('.json')).map(e => e.path));
const displayEntries = state.allEntries.filter(e => {
if (e.name.startsWith('.')) return false;
const isImg = isImage(e.name);
if (isImg) return true; // Always show images if they were accepted
- if (state.detectedFormat === 'keep') return e.name.endsWith('.html');
+ if (state.detectedFormat === 'keep') {
+ if (e.name.endsWith('.json') && e.name !== 'archive_browser.html') {
+ return true;
+ }
+ if (e.name.endsWith('.html') && e.name !== 'archive_browser.html') {
+ const correspondingJson = e.path.substring(0, e.path.length - 5) + '.json';
+ return !jsonPaths.has(correspondingJson);
+ }
+ return false;
+ }
if (state.detectedFormat === 'markdown' || state.detectedFormat === 'notion') return e.name.endsWith('.md');
if (state.detectedFormat === 'enex') return e.name.endsWith('.enex');
if (state.detectedFormat === 'json') return e.name.endsWith('.json');
@@ -384,10 +402,20 @@ function toggleSelectAll() {
// Since we don't store ID in DOM, we rely on state sync.
// Let's re-calculate visible IDs.
+ const jsonPaths = new Set(state.allEntries.filter(e => e.path.endsWith('.json')).map(e => e.path));
const visibleEntries = state.allEntries.filter(e => {
if (e.name.startsWith('.')) return false;
if (isImage(e.name)) return true;
- if (state.detectedFormat === 'keep') return e.name.endsWith('.html');
+ if (state.detectedFormat === 'keep') {
+ if (e.name.endsWith('.json') && e.name !== 'archive_browser.html') {
+ return true;
+ }
+ if (e.name.endsWith('.html') && e.name !== 'archive_browser.html') {
+ const correspondingJson = e.path.substring(0, e.path.length - 5) + '.json';
+ return !jsonPaths.has(correspondingJson);
+ }
+ return false;
+ }
if (state.detectedFormat === 'markdown') return e.name.endsWith('.md');
if (state.detectedFormat === 'enex') return e.name.endsWith('.enex');
if (state.detectedFormat === 'json') return e.name.endsWith('.json');
@@ -515,7 +543,13 @@ async function finishConversion(contentMap, binaryMap, dateMap = {}) {
Object.entries(contentMap).forEach(([path, content]) => {
try {
let note = null;
- if (source === 'keep') note = parseKeepHtml(content);
+ if (source === 'keep') {
+ if (path.endsWith('.json')) {
+ note = parseKeepJson(content);
+ } else {
+ note = parseKeepHtml(content);
+ }
+ }
else if (source === 'enex') note = parseEnex(content);
else if (source === 'markdown') note = fromMarkdown(content);
else if (source === 'json') note = JSON.parse(content);
@@ -628,6 +662,9 @@ async function generateEnexWithResources(notes, binaryMap) {
}
}
+ content = content.replace(/]*type="checkbox"[^>]*checked="true"[^>]*\/?>/gi, '
/g, '
');
content = content.replace(/]*>/gi, '');
@@ -638,6 +675,16 @@ async function generateEnexWithResources(notes, binaryMap) {
const createdTs = toEnexDate(note.created) || ts;
const updatedTs = toEnexDate(note.updated) || createdTs;
+ let tagsXml = '';
+ if (note.tags && Array.isArray(note.tags)) {
+ note.tags.forEach(t => {
+ const cleanTag = t.replace(/[<>&'"]/g, c => {
+ switch(c){case '<':return '<';case '>':return '>';case '&':return '&';case "'":return ''';case '"':return '"';}
+ });
+ tagsXml += `