From ab88328792b326e6a39ecc04cf78ac970c9eb2f5 Mon Sep 17 00:00:00 2001 From: Ava Silver Date: Sun, 5 Oct 2025 16:01:33 -0400 Subject: [PATCH 1/3] [FEAT] add substack pre parser for cleaner layout --- .../main/java/me/ash/reader/infrastructure/rss/RssHelper.kt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/app/src/main/java/me/ash/reader/infrastructure/rss/RssHelper.kt b/app/src/main/java/me/ash/reader/infrastructure/rss/RssHelper.kt index 2d23dc9fc..0d89e09be 100644 --- a/app/src/main/java/me/ash/reader/infrastructure/rss/RssHelper.kt +++ b/app/src/main/java/me/ash/reader/infrastructure/rss/RssHelper.kt @@ -105,6 +105,12 @@ constructor( if (h1Element != null && h1Element.hasText() && h1Element.text() == title) { h1Element.remove() } + if (link.contains("substack.com")) { + it.selectFirst("label")?.remove() + it.selectFirst("[data-component-name=\"PreformattedTextBlockToDOM\"]")?.unwrap() + it.selectFirst("pre")?.attr("style", "white-space: pre-wrap;") + return@let it.toString().replace("pre", "div") + } articleContent.toString() } ?: throw IOException("articleContent is null") } else throw IOException(response.message) From 1616312b975163c621ad839f2e45e024e27834ec Mon Sep 17 00:00:00 2001 From: Ava Silver Date: Sun, 5 Oct 2025 17:12:10 -0400 Subject: [PATCH 2/3] [FEAT] find with label text --- .../main/java/me/ash/reader/infrastructure/rss/RssHelper.kt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/src/main/java/me/ash/reader/infrastructure/rss/RssHelper.kt b/app/src/main/java/me/ash/reader/infrastructure/rss/RssHelper.kt index 0d89e09be..a7f71cf3f 100644 --- a/app/src/main/java/me/ash/reader/infrastructure/rss/RssHelper.kt +++ b/app/src/main/java/me/ash/reader/infrastructure/rss/RssHelper.kt @@ -36,6 +36,8 @@ import org.jsoup.Jsoup val enclosureRegex = """""".toRegex() val imgRegex = """img.*?src=(["'])((?!data).*?)\1""".toRegex(RegexOption.DOT_MATCHES_ALL) +const val substackLabelText = """Text within this block will maintain its original spacing when published""" + /** Some operations on RSS. */ class RssHelper @Inject @@ -106,7 +108,7 @@ constructor( h1Element.remove() } if (link.contains("substack.com")) { - it.selectFirst("label")?.remove() + it.select("label").find { label -> label.text().equals(substackLabelText, true) }?.remove() it.selectFirst("[data-component-name=\"PreformattedTextBlockToDOM\"]")?.unwrap() it.selectFirst("pre")?.attr("style", "white-space: pre-wrap;") return@let it.toString().replace("pre", "div") From 6c93a96f8c3e0f5f10c853522f074531050d6039 Mon Sep 17 00:00:00 2001 From: Ava Silver Date: Sun, 5 Oct 2025 20:38:53 -0400 Subject: [PATCH 3/3] [FEAT] simplifications/fixes --- .../java/me/ash/reader/infrastructure/rss/RssHelper.kt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/src/main/java/me/ash/reader/infrastructure/rss/RssHelper.kt b/app/src/main/java/me/ash/reader/infrastructure/rss/RssHelper.kt index a7f71cf3f..e0251d7ec 100644 --- a/app/src/main/java/me/ash/reader/infrastructure/rss/RssHelper.kt +++ b/app/src/main/java/me/ash/reader/infrastructure/rss/RssHelper.kt @@ -108,10 +108,10 @@ constructor( h1Element.remove() } if (link.contains("substack.com")) { - it.select("label").find { label -> label.text().equals(substackLabelText, true) }?.remove() - it.selectFirst("[data-component-name=\"PreformattedTextBlockToDOM\"]")?.unwrap() - it.selectFirst("pre")?.attr("style", "white-space: pre-wrap;") - return@let it.toString().replace("pre", "div") + it.select("label:contains($substackLabelText)")?.remove() + it.select("pre")?.attr("style", "white-space: pre-wrap;") + // manually replace the pre tags to not mess up the formatting (like .tagName() does) + return@let it.toString().replace("