diff --git a/src/webfetch/content.ts b/src/webfetch/content.ts index d16fb86..d102379 100644 --- a/src/webfetch/content.ts +++ b/src/webfetch/content.ts @@ -169,12 +169,12 @@ function extractReadableArticle(html: string, url: string): ReadableArticle | un }; break; } + if (explicitArticle) return explicitArticle; const article = new Readability(dom.window.document, { charThreshold: 80, keepClasses: false, }).parse(); - if (explicitArticle) return explicitArticle; if (!article?.content || !article.textContent) return undefined; return { title: normalizePlainText( diff --git a/test/webfetch-explicit-article.test.ts b/test/webfetch-explicit-article.test.ts new file mode 100644 index 0000000..dc84bb0 --- /dev/null +++ b/test/webfetch-explicit-article.test.ts @@ -0,0 +1,45 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const readability = vi.hoisted(() => ({ + parse: vi.fn(() => { + throw new Error("Readability should not run for explicit article matches"); + }), +})); + +vi.mock("@mozilla/readability", () => ({ + Readability: class { + parse(): unknown { + return readability.parse(); + } + }, +})); + +import { htmlToMarkdown } from "../src/webfetch/content.js"; + +function explicitArticleHtml(): string { + return ` + +
+Explicit article body has enough words to pass the direct article selector threshold.
+