From 4a063eb4cbf77d2746ae5ea1d74901fb2352cba7 Mon Sep 17 00:00:00 2001
From: dhuck <dhuck@tinycact.us>
Date: Wed, 15 Mar 2023 17:58:57 -0500
Subject: [PATCH 1/3] lisp comment parsing

---
 comment_parser/parsers/lisp_parser.py         | 59 +++++++++++++++++++
 .../parsers/tests/lisp_parser_test.py         | 37 ++++++++++++
 2 files changed, 96 insertions(+)
 create mode 100644 comment_parser/parsers/lisp_parser.py
 create mode 100644 comment_parser/parsers/tests/lisp_parser_test.py
diff --git a/comment_parser/parsers/lisp_parser.py b/comment_parser/parsers/lisp_parser.py
new file mode 100644
index 0000000..54a30bc
--- /dev/null
+++ b/comment_parser/parsers/lisp_parser.py
@@ -0,0 +1,59 @@
+#!/usr/bin/python
+"""This module provides support for parsing the Lisp family of languages
+
+Works with:
+  Lisp
+  Scheme
+  Racket
+  Clojure (not including the (comment) form
+  ... and other languages which use the leading ; as the comment form
+"""
+
+import re
+from bisect import bisect_left
+from typing import List 
+from comment_parser.parsers import common 
+
+def extract_comments(code: str) -> List[common.Comment]:
+  """Extracts a list of comments from a given Lisp family source code.
+  
+  Comments are represented with the Comment class found in the common module.
+  Lisp family comments come in a single form. Any string of characters begun with
+  `;` it is considered to be a comment. Note that various languages in the lisp 
+  family use multiple `;` to denote certain types of comments. For example, a 
+  comment using a single `;` may just mean an inline comment, but two (`;;`) or 
+  more `;`'s may be considered official documentation. This parser does not 
+  differentiate between the various types of comments, but will consume many `;`
+  characters and return the comment text
+
+  Args:
+    code (str): String containing code to extract comments from.
+  Returns:
+    List[common.Comment]: list of comments in the order that they appear in the 
+      code
+  """
+  pattern = r"""
+    (?P<literal> (\"([^\"\n])*\")+) |
+    (?P<single> ;+(?P<single_content>.*)?$)
+  """
+  
+  compiled = re.compile(pattern, re.VERBOSE | re.MULTILINE)
+  
+  lines_indexes = []
+  for match in re.finditer(r"$", code, re.M):
+    lines_indexes.append(match.start())
+    
+  comments = []
+  
+  for match in compiled.finditer(code):
+    kind = match.lastgroup 
+    
+    start_character = match.start()
+    line_no = bisect_left(lines_indexes, start_character)
+    
+    if kind == "single":
+      comment_content = match.group("single_content")
+      comment = common.Comment(comment_content, line_no + 1)
+      comments.append(comment)
+      
+  return comments
\ No newline at end of file
diff --git a/comment_parser/parsers/tests/lisp_parser_test.py b/comment_parser/parsers/tests/lisp_parser_test.py
new file mode 100644
index 0000000..4467661
--- /dev/null
+++ b/comment_parser/parsers/tests/lisp_parser_test.py
@@ -0,0 +1,37 @@
+#!/usr/bin/python
+"""Tests for comment_parser.parsers.lisp_parser.py"""
+
+import unittest
+from comment_parser.parsers import common
+from comment_parser.parsers import lisp_parser
+
+class LispParerTest(unittest.TestCase):
+    
+  def testSimpleMain(self):
+    code = "; this is a comment\n(format t \"Hello, World!\")"
+    comments = lisp_parser.extract_comments(code)
+    expected = [common.Comment(code[1:19], 1, False)]
+    self.assertEqual(comments, expected)
+    
+  def testSingleLineComment(self):
+    code = "; single line comment"
+    comments = lisp_parser.extract_comments(code)
+    expected = [common.Comment(code[1:], 1, False)]
+    self.assertEqual(comments, expected)
+    
+  def testSingleLineCommentInStringLiteral(self):
+    code = '(format t "; this is not a comment")'
+    comments = lisp_parser.extract_comments(code)
+    self.assertEqual(comments, [])
+    
+  def testMultipleCommentCharacters(self):
+    code = ';; this is a comment'
+    comments = lisp_parser.extract_comments(code)
+    expected = [common.Comment(code[2:], 1, False)]
+    self.assertEqual(comments, expected)
+  
+  def testCommentsAfterLine(self):
+    code = '(t format "Hello World") ; this is a comment'
+    comments = lisp_parser.extract_comments(code)
+    expected = [common.Comment(' this is a comment', 1, False)]
+    self.assertEqual(comments, expected)
\ No newline at end of file

From e559fd5accbbf9be458e53c5fc2ca458914fd18f Mon Sep 17 00:00:00 2001
From: dhuck <dhuck@tinycact.us>
Date: Wed, 15 Mar 2023 20:14:52 -0500
Subject: [PATCH 2/3] added clojure and lisp

---
 comment_parser/comment_parser.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/comment_parser/comment_parser.py b/comment_parser/comment_parser.py
index 25c9b44..05bb41e 100755
--- a/comment_parser/comment_parser.py
+++ b/comment_parser/comment_parser.py
@@ -33,6 +33,7 @@
 from comment_parser.parsers import python_parser
 from comment_parser.parsers import ruby_parser
 from comment_parser.parsers import shell_parser
+from comment_parser.parsers import lisp_parser
 
 MIME_MAP = {
     'application/javascript': js_parser,  # Javascript
@@ -48,6 +49,8 @@
     'text/x-script.python': python_parser,  # Python
     'text/x-shellscript': shell_parser,  # Unix shell
     'text/xml': html_parser,  # XML
+    'text/x-lisp': lisp_parser,  # Lisp
+    'text/x-clojure': lisp_parser,  # Clojure
 }
 
 

From 0a7d2e5fecc4d99d4c13a379351bebf85eb5519d Mon Sep 17 00:00:00 2001
From: dhuck <dhuck@tinycact.us>
Date: Wed, 15 Mar 2023 20:31:22 -0500
Subject: [PATCH 3/3] add racket and scheme

---
 comment_parser/comment_parser.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/comment_parser/comment_parser.py b/comment_parser/comment_parser.py
index 05bb41e..d0e7177 100755
--- a/comment_parser/comment_parser.py
+++ b/comment_parser/comment_parser.py
@@ -51,6 +51,8 @@
     'text/xml': html_parser,  # XML
     'text/x-lisp': lisp_parser,  # Lisp
     'text/x-clojure': lisp_parser,  # Clojure
+    'text/x-racket': lisp_parser,  # Racket
+    'text/x-scheme': lisp_parser,  # Scheme
 }