diff --git a/lib/docs/filters/couchdb/clean_html.rb b/lib/docs/filters/couchdb/clean_html.rb new file mode 100644 index 0000000000..0616a2097f --- /dev/null +++ b/lib/docs/filters/couchdb/clean_html.rb @@ -0,0 +1,30 @@ +module Docs + class Couchdb + class CleanHtmlFilter < Filter + def call + css('h1').each do |node| + node.content = node.content.gsub(/\d*\. |\P{ASCII}/, '').split('.').last + end + + css('h2', 'h3').each do |node| + node.content = node.content.gsub(/\P{ASCII}/, '').split('.').last + end + + css('pre').each do |node| + node.content = node.content.strip + + classes = node.parent.parent.classes + if classes.include? 'highlight-bash' + node['data-language'] = 'bash' + else + node['data-language'] = 'javascript' + end + + node.parent.parent.replace(node) + end + + doc + end + end + end +end diff --git a/lib/docs/filters/couchdb/entries.rb b/lib/docs/filters/couchdb/entries.rb new file mode 100644 index 0000000000..3d03a9616e --- /dev/null +++ b/lib/docs/filters/couchdb/entries.rb @@ -0,0 +1,53 @@ +module Docs + class Couchdb + class EntriesFilter < Docs::EntriesFilter + SLUG_MAP = { + 'api' => 'API', + 'json' => 'JSON Structures', + 'cluster' => 'Cluster Management', + 'replication' => 'Replication', + 'maintenance' => 'Maintenance', + 'partitioned' => 'Partitioned Databases' + } + + def get_name + at_css('h1').content.gsub(/\P{ASCII}/, '').split('.').last + end + + def get_type + if slug.start_with?('ddocs/views') + 'Views' + elsif slug.start_with?('ddocs') + 'Design Documents' + else + SLUG_MAP[slug[/^(.+?)[-\/]/, 1]] || name + end + end + + def additional_entries + needs_breakup = [ + 'JSON Structure Reference', + 'Design Documents', + 'Partitioned Databases' + ] + + if needs_breakup.include?(name) + entries = [] + + css('section > section').each do |node| + h2 = node.at_css('h2') + + if h2.present? + name = node.at_css('h2').content.split('.').last + entries << [name, node['id']] + end + end + + entries + else + [] + end + end + end + end +end diff --git a/lib/docs/scrapers/couchdb.rb b/lib/docs/scrapers/couchdb.rb new file mode 100644 index 0000000000..a077e195a1 --- /dev/null +++ b/lib/docs/scrapers/couchdb.rb @@ -0,0 +1,39 @@ +module Docs + class Couchdb < UrlScraper + self.name = 'CouchDB' + self.type = 'couchdb' + self.root_path = 'index.html' + + self.links = { + home: 'https://couchdb.apache.org/', + code: 'https://github.com/apache/couchdb' + } + + html_filters.push 'couchdb/clean_html', 'couchdb/entries' + + options[:container] = 'div[itemprop=articleBody]' + options[:only_patterns] = [ + /api\//, + /cluster\//, + /ddocs\//, + /replication\//, + /maintenance\//, + /partitioned-dbs\//, + /json\-structure*/ + ] + options[:rate_limit] = 50 # Docs are subject to Cloudflare limiters. + options[:attribution] = <<-HTML + Copyright © 2025 The Apache Software Foundation — Licensed under the Apache License 2.0 + HTML + + version '3.5' do + self.release = '3.5.1' + self.base_url = "https://docs.couchdb.org/en/#{self.release}" + end + + def get_latest_version(opts) + doc = fetch_doc('https://couchdb.apache.org/', opts) + doc.at_css('.download-pane > h2').content.split(' ').last + end + end +end diff --git a/public/icons/docs/couchdb/16.png b/public/icons/docs/couchdb/16.png new file mode 100644 index 0000000000..7cae0c8b51 Binary files /dev/null and b/public/icons/docs/couchdb/16.png differ diff --git a/public/icons/docs/couchdb/16@2x.png b/public/icons/docs/couchdb/16@2x.png new file mode 100644 index 0000000000..81dc55b202 Binary files /dev/null and b/public/icons/docs/couchdb/16@2x.png differ diff --git a/public/icons/docs/couchdb/SOURCE b/public/icons/docs/couchdb/SOURCE new file mode 100644 index 0000000000..86560405ed --- /dev/null +++ b/public/icons/docs/couchdb/SOURCE @@ -0,0 +1 @@ +https://docs.couchdb.org/en/stable/_static/favicon.ico