diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index d3716aa6..017c6814 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -1,13 +1,21 @@
 name: main
-on: [push]
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
 jobs:
   build-and-test:
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12"]
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v6
       - name: git setup
         id: git-setup
         run: |
@@ -18,23 +26,22 @@ jobs:
 
       - name: conda env
         run: |
-          wget -O Mambaforge.sh  "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh"
-          curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh"
-          bash Mambaforge.sh -b -p "${HOME}/conda"
+          wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
+          bash Miniforge3.sh -b -p "${HOME}/conda"
           source "${HOME}/conda/etc/profile.d/conda.sh"
-          source "${HOME}/conda/etc/profile.d/mamba.sh"
           which conda
           conda config --system --add channels defaults
           conda config --system --add channels bioconda
           conda config --system --add channels conda-forge
           conda config --system --set channel_priority strict
-          mamba create -y -n gffutils-env \
+          conda create -y -n gffutils-env \
             python=${{ matrix.python-version }} \
             bedtools
 
           conda activate gffutils-env
-          python setup.py clean sdist
-          (cd dist && pip install gffutils-*.tar.gz)
+          python -m pip install build
+          python -m build
+          (cd dist && python -m pip install gffutils-*.tar.gz)
           cd $TMPDIR
           python -c "import gffutils; print(gffutils.__file__)"
           conda deactivate
@@ -42,11 +49,10 @@ jobs:
       - name: run unit tests
         run: |
           source "${HOME}/conda/etc/profile.d/conda.sh"
-          source "${HOME}/conda/etc/profile.d/mamba.sh"
-
           conda activate gffutils-env
-          pip install pytest hypothesis biopython pybedtools
-          pytest -v --doctest-modules gffutils
+          conda install -y bedtools
+          python -m pip install -e '.[optional,test]'
+          pytest
           conda deactivate
 
       - name: doctests
@@ -61,9 +67,8 @@ jobs:
         if: ${{ (matrix.python-version != 3.8) }}
         run: |
           source "${HOME}/conda/etc/profile.d/conda.sh"
-          source "${HOME}/conda/etc/profile.d/mamba.sh"
-          mamba install -y -n gffutils-env --file docs-requirements.txt
           conda activate gffutils-env
+          python -m pip install -e '.[docs]'
           (cd doc && make clean doctest)
           conda deactivate
 
@@ -72,7 +77,6 @@ jobs:
         if: ${{ (matrix.python-version != 3.8) }}
         run: |
           source "${HOME}/conda/etc/profile.d/conda.sh"
-          source "${HOME}/conda/etc/profile.d/mamba.sh"
           conda activate gffutils-env
           (cd doc && make html)
           conda deactivate
@@ -83,7 +87,6 @@ jobs:
             --branch gh-pages "https://x-acess-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY" \
             /tmp/docs
 
-
           # clean it out and add newly-built docs
           rm -rf /tmp/docs/*
           cp -r doc/build/html/* /tmp/docs
@@ -102,7 +105,7 @@ jobs:
 
       - name: push artifact
         if: ${{ (matrix.python-version == 3.9) }}
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v6
         with:
           name: doc
           path: /tmp/docs
@@ -110,7 +113,7 @@ jobs:
       - name: push docs to gh-pages branch
         # Push docs to gh-pages if this test is running on master branch, and
         # restrict to a single Python version.
-        if: ${{ (github.ref == 'refs/heads/master') && (matrix.python-version == 3.9) }}
+        if: ${{ (github.ref == 'refs/heads/master') && (matrix.python-version == 3.12) }}
         run: |
           cd /tmp/docs
           git push "https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY" gh-pages
diff --git a/.gitignore b/.gitignore
index 29a95722..335fa300 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+env/
 *.swo
 *gfffeature.so
 *.swp
diff --git a/MANIFEST.in b/MANIFEST.in
index 06f76dad..ae40da46 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,12 +1,6 @@
 include README.rst
 include requirements.txt
 include LICENSE
-recursive-include docs/source *.rst
-recursive-include docs/source *.py
-recursive-include docs/source/images *
-recursive-include doc/source/_templates *
-include docs/Makefile
-include docs/make.bat
 include gffutils/test/data/c_elegans_WS199_ann_gff.txt
 include gffutils/test/data/c_elegans_WS199_dna_shortened.fa
 include gffutils/test/data/c_elegans_WS199_shortened_gff.txt
diff --git a/doc/source/api.rst b/doc/source/api.rst
index 2f9adefe..309a689a 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -117,10 +117,10 @@ Integration with other tools
     :toctree: autodocs
     :nosignatures:
 
-    gffutils.biopython_integration.to_seqfeature
-    gffutils.biopython_integration.from_seqfeature
-    gffutils.pybedtools_integration.tsses
-    gffutils.pybedtools_integration.to_bedtool
+    biopython_integration.to_seqfeature
+    biopython_integration.from_seqfeature
+    pybedtools_integration.tsses
+    pybedtools_integration.to_bedtool
 
 
 
@@ -131,10 +131,10 @@ Utilities
     :toctree: autodocs
     :nosignatures:
 
-    gffutils.helpers.asinterval
-    gffutils.helpers.merge_attributes
-    gffutils.helpers.sanitize_gff_db
-    gffutils.helpers.annotate_gff_db
-    gffutils.helpers.infer_dialect
-    gffutils.helpers.example_filename
-    gffutils.inspect.inspect
+    helpers.asinterval
+    helpers.merge_attributes
+    helpers.sanitize_gff_db
+    helpers.annotate_gff_db
+    helpers.infer_dialect
+    helpers.example_filename
+    inspect.inspect
diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst
index f1f7545b..5c7b503f 100644
--- a/doc/source/changelog.rst
+++ b/doc/source/changelog.rst
@@ -3,6 +3,52 @@
 Change log
 ==========
 
+
+v0.14
+-----
+
+- If a value contained a semicolon there would be unexpected behavior (reported
+  in `#212 <https://github.com/daler/gffutils/issues/212>`__). This is solved
+  by adding a new entry to the dialect, ``semicolon in quotes```, and running
+  the necessary regular expression only when inferring dialect, or, if
+  ``semicolon in quotes`` is ``True``, on every feature. In the latter case,
+  this can dramatically increase the parsing time, since in Python regular
+  expressions are relatively slow, but it does correctly parse. Thanks to
+  @DevangThakkar for the fix.
+- While working on that, refactored the attributes parsing to make it clearer
+  to follow along, and added more tests. The refactoring fixed some subtle bugs
+  on corner cases:
+  - Previously, for features with repeated keys, the ``order`` key of dialects
+    would list the repeated keys each time they appeared (i.e., the list had
+    duplicates) which could result in undetermined behavior. The ``order`` key
+    is now unique and only the first occurrence of a repeated key will be added
+    to the order.
+  - Previously, the ``ensembl_gtf.txt`` example file had a leading *space* in
+    front of the attributes. This looks to be an error in the creation of the
+    example file in the first place, but had previously parsed fine. Now the
+    parser (correctly) mis-handles it. Since I'm unaware of any cases in the
+    wild that have a leading space, I actually consider the new parsing, which
+    complains about the space, to be more correct.
+  - Added tests to directly inspect the inferred dialects for the test cases.
+- Preserve GFF directives when ``create_db()`` imports from a file path,
+  matching the behavior for string-backed iterators and fixing
+  `#213 <https://github.com/daler/gffutils/issues/213>`__. This was due to
+  a different path through the code when using a `pathlib.Path` object. In
+  addition to this fix, `pathlib.Path` objects are now converted to `str`
+  throughout the code base with ``os.fspath`` where appropriate.
+- CI, testing, and docs infrastructure updates (miniforge instead of
+  mambaforge; GitHub Action version bumps; skip biopython test if it's not
+  installed (`#233 <https://github.com/daler/gffutils/issues/233>`__); reduce build errors for docs)
+- Fix `#224 <https://github.com/daler/gffutils/issues/224>`__, which was caused
+  by changes to the ``argh`` package used for the command-line tool.
+- Address  `#242 <https://github.com/daler/gffutils/issues/242>`__ (typo in docstring)
+- Migrate to using ``pyproject.toml`` for packaging. This changes how versions are calculated
+  and reported, and removes the need for ``setup.py``. Version is only ever
+  recorded in ``pyproject.toml``; ``version.py`` gets the installed version or
+  parses the TOML if not installed; ``setup.py`` just calls ``setup()`` with no
+  arguments since everything has been migrated to ``pyproject.toml``.
+
+
 v0.13
 -----
 
diff --git a/doc/source/conf.py b/doc/source/conf.py
index c65c4a28..2b85647c 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -53,5 +53,3 @@
 templates_path = ['_templates']
 exclude_patterns = []
 html_theme = 'sphinx_rtd_theme'
-html_static_path = ['_static']
-html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
diff --git a/doc/source/dialect.rst b/doc/source/dialect.rst
index b02d7c6a..aed018b9 100644
--- a/doc/source/dialect.rst
+++ b/doc/source/dialect.rst
@@ -38,7 +38,8 @@ A GTF dialect might look like this::
      'multival separator': ',',
      'quoted GFF2 values': True,
      'repeated keys': False,
-     'trailing semicolon': True}
+     'trailing semicolon': True,
+     'semicolon_in_quotes': False}
 
 In contrast, a GFF dialect might look like this::
 
@@ -49,7 +50,9 @@ In contrast, a GFF dialect might look like this::
      'multival separator': ',',
      'quoted GFF2 values': False,
      'repeated keys': False,
-     'trailing semicolon': False}
+     'trailing semicolon': False,
+     'semicolon_in_quotes': False}
+
 
 As other real-world files are brought to the attention of the developers, it's
 likely that more entries will be added to the dialect.
diff --git a/doc/source/examples.rst b/doc/source/examples.rst
index 6b631236..54d8e45a 100644
--- a/doc/source/examples.rst
+++ b/doc/source/examples.rst
@@ -235,7 +235,7 @@ data upon import into the database:
 ...     return x
 
 
-Now we can supply this tranform function to :func:`create_db`:
+Now we can supply this transform function to :func:`create_db`:
 
 >>> fn = gffutils.example_filename('ensembl_gtf.txt')
 >>> db = gffutils.create_db(fn, ":memory:",
@@ -643,8 +643,8 @@ attributes to have the same format.  To help with this, we can use the
 >>> dialect = helpers.infer_dialect(
 ... 'Transcript "B0019.1" ; WormPep "WP:CE40797" ; Note "amx-2" ; Prediction_status "Partially_confirmed" ; Gene "WBGene00000138" ; CDS "B0019.1" ; WormPep "WP:CE40797" ; Note "amx-2" ; Prediction_status "Partially_confirmed" ; Gene "WBGene00000138"',
 ... )
->>> print(dialect)
-{'leading semicolon': False, 'trailing semicolon': False, 'quoted GFF2 values': True, 'field separator': ' ; ', 'keyval separator': ' ', 'multival separator': ',', 'fmt': 'gtf', 'repeated keys': True, 'order': ['Transcript', 'WormPep', 'Note', 'Prediction_status', 'Gene', 'CDS', 'WormPep', 'Note', 'Prediction_status', 'Gene']}
+>>> print({k: v for k, v in sorted(dialect.items())})
+{'field separator': ' ; ', 'fmt': 'gtf', 'keyval separator': ' ', 'leading semicolon': False, 'multival separator': ',', 'order': ['Transcript', 'WormPep', 'Note', 'Prediction_status', 'Gene', 'CDS'], 'quoted GFF2 values': True, 'repeated keys': True, 'semicolon in quotes': False, 'trailing semicolon': False}
 
 >>> db.dialect = dialect
 
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 24f0d48c..8bf7cb76 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -6,9 +6,9 @@
 Introduction
 ============
 :mod:`gffutils` is a Python package for working with `GFF
-<http://www.sanger.ac.uk/resources/software/gff/spec.htm>`_ and `GTF
-<http://mblab.wustl.edu/GTF22.html>`_ files in a hierarchical manner.  It
-allows operations which would be complicated or time-consuming using
+<https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md>`__
+and `GTF <http://mblab.wustl.edu/GTF22.html>`_ files in a hierarchical manner.
+It allows operations which would be complicated or time-consuming using
 a text-file-only approach.
 
 Below is a short demonstration of :mod:`gffutils`. For the full documentation,
diff --git a/gffutils/__init__.py b/gffutils/__init__.py
index 0cf7853f..0242cb5f 100644
--- a/gffutils/__init__.py
+++ b/gffutils/__init__.py
@@ -5,3 +5,14 @@
 from gffutils.helpers import example_filename
 from gffutils.exceptions import FeatureNotFoundError, DuplicateIDError
 from gffutils.version import version as __version__
+
+__all__ = [
+    "__version__",
+    "create_db",
+    "FeatureDB",
+    "Feature",
+    "DataIterator",
+    "example_filename",
+    "FeatureNotFoundError",
+    "DuplicateIDError",
+]
diff --git a/gffutils/constants.py b/gffutils/constants.py
index 901e7146..2543e64d 100644
--- a/gffutils/constants.py
+++ b/gffutils/constants.py
@@ -127,6 +127,12 @@
     # vs
     #   ID=001; Name=gene1
     "field separator": ";",
+    # Sometimes there are semicolons inside quotes that break things, e.g.,
+    #
+    #   note "Evidence 1a: Function1, Function2"
+    # vs
+    #   note "Evidence 1a: Function; PubMedId: 123, 456"
+    "semicolon in quotes": False,
     # Usually "=" for GFF3; " " for GTF, e.g.,
     #
     #   gene_id "GENE1"
diff --git a/gffutils/contrib/plotting.py b/gffutils/contrib/plotting.py
index 55e3c333..bfaa9032 100644
--- a/gffutils/contrib/plotting.py
+++ b/gffutils/contrib/plotting.py
@@ -1,11 +1,11 @@
+import warnings
+
 from gffutils.helpers import asinterval
 
 try:
     from pybedtools.contrib.plotting import Track
 except ImportError:
-    import warnings
-
-    warnings.warn("Please install pybedtools for plotting.")
+    Track = None
 
 
 class Gene(object):
@@ -49,6 +49,9 @@ def __init__(
         UTRs, CDSs are.  Padding is essentially "full" minus the largest height
         (CDS, 0.9, by default).
         """
+        if Track is None:
+            warnings.warn("Please install pybedtools for plotting.")
+            raise ImportError("pybedtools is required for gffutils.contrib.plotting")
 
         self.heights = {"transcript": 0.2, "utrs": 0.5, "cds": 0.9, "full": 1.0}
         self.kwargs = kwargs
diff --git a/gffutils/create.py b/gffutils/create.py
index e137c1af..6e3fbd24 100644
--- a/gffutils/create.py
+++ b/gffutils/create.py
@@ -76,6 +76,9 @@ def __init__(
         Base class for _GFFDBCreator and _GTFDBCreator; see create_db()
         function for docs
         """
+        if isinstance(dbfn, os.PathLike):
+            dbfn = os.fspath(dbfn)
+
         self._keep_tempfiles = _keep_tempfiles
         if force_merge_fields is None:
             force_merge_fields = []
diff --git a/gffutils/feature.py b/gffutils/feature.py
index 6db720ff..e833774c 100644
--- a/gffutils/feature.py
+++ b/gffutils/feature.py
@@ -1,4 +1,5 @@
 from pyfaidx import Fasta
+import os
 import simplejson as json
 from gffutils import constants
 from gffutils import helpers
@@ -383,6 +384,8 @@ def sequence(self, fasta, use_strand=True):
         -------
         string
         """
+        if isinstance(fasta, os.PathLike):
+            fasta = os.fspath(fasta)
         if isinstance(fasta, str):
             fasta = Fasta(fasta, as_raw=False)
 
diff --git a/gffutils/gffwriter.py b/gffutils/gffwriter.py
index c3026927..895cf372 100644
--- a/gffutils/gffwriter.py
+++ b/gffutils/gffwriter.py
@@ -3,6 +3,7 @@
 ##
 import tempfile
 import shutil
+import os
 from time import strftime, localtime
 from gffutils.version import version
 
@@ -33,6 +34,8 @@ class GFFWriter:
     """
 
     def __init__(self, out, with_header=True, in_place=False):
+        if isinstance(out, os.PathLike):
+            out = os.fspath(out)
         self.out = out
         self.with_header = with_header
         self.in_place = in_place
diff --git a/gffutils/helpers.py b/gffutils/helpers.py
index 234994db..7418d29a 100644
--- a/gffutils/helpers.py
+++ b/gffutils/helpers.py
@@ -498,6 +498,8 @@ def is_gff_db(db_fname):
 
     For now, rely on .db extension.
     """
+    if isinstance(db_fname, os.PathLike):
+        db_fname = os.fspath(db_fname)
     if not os.path.isfile(db_fname):
         return False
     if db_fname.endswith(".db"):
@@ -519,6 +521,8 @@ def canonical_transcripts(db, fasta_filename):
     """
     import pyfaidx
 
+    if isinstance(fasta_filename, os.PathLike):
+        fasta_filename = os.fspath(fasta_filename)
     fasta = pyfaidx.Fasta(fasta_filename, as_raw=False)
     for gene in db.features_of_type("gene"):
 
@@ -580,6 +584,8 @@ def get_gff_db(gff_fname, ext=".db"):
     load that. Otherwise, create a named temporary file,
     serialize the db to that, and return the loaded database.
     """
+    if isinstance(gff_fname, os.PathLike):
+        gff_fname = os.fspath(gff_fname)
     if not os.path.isfile(gff_fname):
         # Not sure how we should deal with errors normally in
         # gffutils -- Ryan?
diff --git a/gffutils/interface.py b/gffutils/interface.py
index 9216cd39..74e37069 100644
--- a/gffutils/interface.py
+++ b/gffutils/interface.py
@@ -161,6 +161,9 @@ def __init__(
         # db.
         from gffutils import create
 
+        if isinstance(dbfn, os.PathLike):
+            dbfn = os.fspath(dbfn)
+
         if isinstance(dbfn, create._DBCreator):
             self.conn = dbfn.conn
             self.dbfn = dbfn.dbfn
@@ -454,27 +457,7 @@ def _relation(
         completely_within=False,
         limit=None,
     ):
-
-        # The following docstring will be included in the parents() and
-        # children() docstrings to maintain consistency, since they both
-        # delegate to this method.
-        """
-        Parameters
-        ----------
-
-        id : string or a Feature object
-
-        level : None or int
-
-            If `level=None` (default), then return all children regardless
-            of level.  If `level` is an integer, then constrain to just that
-            level.
-        {_method_doc}
-
-        Returns
-        -------
-        A generator object that yields :class:`Feature` objects.
-        """
+        """Internal implementation for parent/child relationship queries."""
 
         if isinstance(id, Feature):
             id = id.id
@@ -521,7 +504,22 @@ def children(
     ):
         """
         Return children of feature `id`.
-        {_relation_docstring}
+
+        Parameters
+        ----------
+
+        id : string or a Feature object
+
+        level : None or int
+
+            If `level=None` (default), then return all related children
+            regardless of level. If `level` is an integer, then constrain to
+            just that level.
+        {_method_doc}
+
+        Returns
+        -------
+        A generator object that yields :class:`Feature` objects.
         """
         return self._relation(
             id,
@@ -547,7 +545,22 @@ def parents(
     ):
         """
         Return parents of feature `id`.
-        {_relation_docstring}
+
+        Parameters
+        ----------
+
+        id : string or a Feature object
+
+        level : None or int
+
+            If `level=None` (default), then return all related parents
+            regardless of level. If `level` is an integer, then constrain to
+            just that level.
+        {_method_doc}
+
+        Returns
+        -------
+        A generator object that yields :class:`Feature` objects.
         """
         return self._relation(
             id,
@@ -1285,7 +1298,7 @@ def create_introns(
 
             with open('tmp.gtf', 'w') as fout:
                 for intron in db.create_introns(**intron_kwargs):
-                    fout.write(str(intron) + "\n")
+                    fout.write(str(intron) + "\\n")
             db.update(gffutils.DataIterator('tmp.gtf'), **create_kwargs)
 
         """
@@ -1999,11 +2012,6 @@ def seqids(self):
         for (i,) in c:
             yield i
 
-    # Recycle the docs for _relation so they stay consistent between parents()
-    # and children()
-    children.__doc__ = children.__doc__.format(_relation_docstring=_relation.__doc__)
-    parents.__doc__ = parents.__doc__.format(_relation_docstring=_relation.__doc__)
-
     # Add the docs for methods that call helpers.make_query()
     for method in [parents, children, features_of_type, all_features]:
         method.__doc__ = method.__doc__.format(_method_doc=_method_doc)
diff --git a/gffutils/iterators.py b/gffutils/iterators.py
index ed0c8389..bdd27477 100644
--- a/gffutils/iterators.py
+++ b/gffutils/iterators.py
@@ -279,6 +279,8 @@ def DataIterator(
         provided, you should probably also use `force_dialect_check=False` and
         `checklines=0` but this is not enforced.
     """
+    if isinstance(data, os.PathLike):
+        data = os.fspath(data)
 
     if isinstance(data, _BaseIterator):
         return data
diff --git a/gffutils/parser.py b/gffutils/parser.py
index 058423ad..4ff740a5 100644
--- a/gffutils/parser.py
+++ b/gffutils/parser.py
@@ -1,9 +1,8 @@
 # Portions copied over from BCBio.GFF.GFFParser
 
 import re
-import copy
 import collections
-import urllib
+from urllib import parse
 from gffutils import constants
 from gffutils.exceptions import AttributeStringError
 
@@ -16,7 +15,27 @@
 ch.setFormatter(formatter)
 logger.addHandler(ch)
 
-gff3_kw_pat = re.compile(r"\w+=")
+# Regex for each separator that will be tested
+quoted_semicolon_patterns = dict()
+
+for sep in (" ; ", "; ", ";"):
+    quoted_semicolon_patterns[sep] = re.compile(
+        rf"""
+            {re.escape(sep)}   # The separator we're considering (escaped for VERBOSE mode)
+            (?=                # Positive lookahead: does remaining content match?
+                (?:            # Start non-capturing group
+                    [^"]       # Either: match any character that is NOT a quote
+                    |          # OR
+                    "[^"]*"    # Match a complete quoted string, specifically:
+                               #   - opening quote ", followed by
+                               #   - zero or more non-quote characters [^"]*
+                               #   - followed by closing quote "
+                )*             # Repeat the above pattern zero or more times
+                $              # Until we reach the end of the string
+            )                  # End of lookahead
+        """,
+        re.VERBOSE,
+    )
 
 # Encoding/decoding notes
 # -----------------------
@@ -50,9 +69,9 @@
 #
 # See also issue #98.
 #
-# Note that spaces are NOT encoded. Some GFF files have spaces encoded; in
-# these cases round-trip invariance will not hold since the %20 will be decoded
-# but not re-encoded.
+# Note that spaces are NOT supposed to be encoded. Yet some GFF files have
+# spaces encoded anyway; in these cases round-trip invariance will not hold
+# since the %20 will be decoded but not re-encoded.
 _to_quote = "\n\t\r%;=&,"
 _to_quote += "".join([chr(i) for i in range(32)])
 _to_quote += chr(127)
@@ -74,6 +93,235 @@ def __missing__(self, b):
 quoter = Quoter()
 
 
+def _split_keyvals(keyval_str, dialect=None):
+    """
+    Dialect detection requires partially parsing the attributes.
+    """
+    from gffutils import feature
+
+    quals = feature.dict_class()
+
+    if not keyval_str:
+        return quals, dialect
+
+    infer_dialect = False
+    if dialect is None:
+        infer_dialect = True
+        dialect = {}
+
+    # No known cases yet of different multival separator
+    dialect["multival separator"] = ","
+
+    # Detection for these dialect fields can work on the full attribute
+    # string. Other detection needs to wait until we've further parsed the
+    # attributes.
+    if infer_dialect:
+        dialect["trailing semicolon"] = keyval_str[-1] == ";"
+        dialect["leading semicolon"] = keyval_str[0] == ";"
+        semicolon_in_quotes = False
+        sep = None
+        for sep in (" ; ", "; ", ";"):
+            parts = keyval_str.split(sep)
+            if len(parts) > 1:
+                # If naive split differs from more expensive regex, we infer there was
+                # a semicolon within quoted value and we'll have to use the expensive
+                # method later
+                parts_regex = re.split(quoted_semicolon_patterns[sep], keyval_str)
+                if parts != parts_regex:
+                    semicolon_in_quotes = True
+                break
+        dialect["semicolon in quotes"] = semicolon_in_quotes
+        dialect["field separator"] = sep
+
+    if dialect["trailing semicolon"]:
+        keyval_str = keyval_str.rstrip(";")
+
+    if dialect["leading semicolon"]:
+        keyval_str = keyval_str.lstrip(";")
+
+    if dialect["semicolon in quotes"]:
+        parts = re.split(
+            quoted_semicolon_patterns[dialect["field separator"]], keyval_str
+        )
+    else:
+        parts = keyval_str.split(dialect["field separator"])
+
+    # The next stage of dialect inference works on the 'parts' -- unsplit
+    # keyval pairs -- like:
+    #
+    #    parts = ["ID=001", "Name=gene1"]
+    #
+    # or
+    #
+    #    parts = ["gene_id ENSG001", "gene_biotype protein_coding"]
+    #
+    if infer_dialect:
+        dialect["fmt"] = "gff3"
+
+        # Note: so far, have not found cases where we need to check more than
+        # the first item
+        if "=" in parts[0]:
+            dialect["fmt"] = "gff3"
+            dialect["keyval separator"] = "="
+        else:
+            dialect["fmt"] = "gtf"
+            dialect["keyval separator"] = " "
+
+    # Now we split
+    #
+    #    parts = ["ID=001", "Name=gene1"]
+    #
+    # into
+    #
+    #   key_val_tuples = [("ID", "001"), ("Name", "gene1")]
+    #
+    # in a dialect-dependent manner.
+    kvsep = dialect["keyval separator"]
+    key_val_tuples = [p.split(kvsep) for p in parts]
+
+    # With the split keys we can detect whether any are repeated
+    if infer_dialect:
+        keys = [i[0] for i in key_val_tuples]
+        dialect["repeated keys"] = len(keys) != len(set(keys))
+
+        # For dialect detection, this will help figure out if there is
+        # inconsistent quoting across values. It will only be used in the loop
+        # below if infer_dialect is True
+        quoted_values = []
+
+    # Now work splitting the keys if needed.
+    for i in key_val_tuples:
+
+        if len(i) == 2:
+            # Easy, on-spec case
+            key, val = i
+
+        elif len(i) == 1:
+            # By convention, no value becomes an empty string, e.g. when done
+            # parsing,
+            #
+            #   "ID=001;is_gene;"
+            #
+            # will end up as:
+            #
+            #   {"ID": "001", "is_gene": ""}
+            key = i[0]
+            val = ""
+
+        else:
+            # Multiple *spaces* within quoted values are joined back together
+            # without requiring a regex, in contrast to when there's *field*
+            # separator like a semicolon in the values.
+            #
+            # That is:
+            #
+            #   attributes = 'gene_description "an important gene"; gene_id "g001"'
+            #
+            # when split on spaces, becomes
+            #
+            #   key_val_tuples = [("gene_description", "an", "important", "gene"), ("gene_id", "g001")]
+            #
+            # so here when we only keep the first token as a key, that first
+            # key/val pair will become:
+            #
+            #   {
+            #     "gene_description": ["an important gene"],
+            #     "gene_id": ["g001"],
+            #   }
+            #
+            # Another pathological case, this time for GFF3:
+            #
+            #   Alias=SGN-M1347;ID=T0028;Note=marker name(s): T0028 SGN-M1347 |identity=99.58|escore=2e-126
+            #
+            # will become the following:
+            #
+            #   {
+            #     "Alias": ["SGN-M1347"],
+            #     "ID": ["T0028"],
+            #     "Note": ["marker name(s): T0028 SGN-M1347 |identity=99.58|escore=2e-126"],
+            #   }
+            #
+            key = i[0]
+            val = kvsep.join(i[1:])
+
+        # By convention all values are lists, even if there's only one value
+        # (or even no values)
+        if key not in quals:
+            quals[key] = []
+
+        # This will run on every value, accumulating in quoted_values to check
+        # later for consistency
+        if infer_dialect:
+            quoted = len(val) > 0 and val[0] == '"' and val[-1] == '"'
+            quoted_values.append(quoted)
+            dialect["quoted GFF2 values"] = quoted
+
+        if dialect["quoted GFF2 values"] and val:
+            val = val.strip('"')
+
+        if val:
+            # For repeated keys dialect, don't split on an internal comma. That is,
+            #
+            #   attributes = 'db_xref="g01,g02"; db_xref="XYZ"'
+            #
+            # becomes:
+            #
+            #   {
+            #     "db_xref": ["g01,g02", "XYZ"]
+            #    }
+            #
+            if dialect.get("repeated keys"):
+                quals[key].append(val)
+
+            # Otherwise, split but only if it's a comma without a space. So:
+            #
+            #    attributes = 'db_xref="g01,g02"'
+            #
+            # becomes
+            #    {
+            #      "db_xref": ["g01", "g02"]
+            #    }
+            # but
+            #
+            #    attributes = 'description="kinase, subunit 1"'
+            #                                      ^ note the space here
+            # becomes
+            #    {
+            #      "description": ["kinase, subunit 1"]
+            #    }
+            #
+            else:
+                # E.g. the "kinase, subunit 1" example above
+                if ", " in val:
+                    quals[key].append(val)
+                else:
+                    quals[key].extend(val.split(","))
+
+    # If there was inconsistent quoting, we fall back to "not quoted" so
+    # as to avoid incorrectly stripping off first and last quotes.
+    if infer_dialect and len(set(quoted_values)) > 1:
+        # Prior behavior was to use whatever the first value used
+        dialect["quoted GFF2 values"] = quoted_values[0]
+
+        # Though there could be an argument for considering quotes in mixed
+        # cases to be part of the string, though technically they should be
+        # %-encoded if so.
+        # dialect["quoted GFF2 values"] = False
+
+    # Handle unquoting of %-encoded values
+    if not constants.ignore_url_escape_characters and dialect["fmt"] == "gff3":
+        for key, vals in quals.items():
+            unquoted = [parse.unquote(v) for v in vals]
+            quals[key] = unquoted
+
+    # Now that we're not supporting old Python versions we can rely on dict
+    # insertion order
+    if infer_dialect:
+        dialect["order"] = list(quals.keys())
+
+    return quals, dialect
+
+
 def _reconstruct(keyvals, dialect, keep_order=False, sort_attribute_values=False):
     """
     Reconstructs the original attributes string according to the dialect.
@@ -156,6 +404,20 @@ def sort_key(x):
                 part = key
         else:
             if dialect["fmt"] == "gtf":
+                # By convention, GTF attributes with no value are reconstructed
+                # with an empty string. E.g.:
+                #   'gene_id "gene1"; is_gene;'
+                #
+                # becomes
+                #
+                #   {
+                #     "gene_id": "gene1",
+                #     "is_gene": ""
+                #    }
+                #
+                # and is printed as:
+                #
+                #   'gene_id "gene1"; is_gene "";'
                 part = dialect["keyval separator"].join([key, '""'])
             else:
                 part = key
@@ -169,207 +431,3 @@ def sort_key(x):
         parts_str += ";"
 
     return parts_str
-
-
-# TODO:
-# Cythonize -- profiling shows that the bulk of the time is spent on this
-# function...
-def _split_keyvals(keyval_str, dialect=None):
-    """
-    Given the string attributes field of a GFF-like line, split it into an
-    attributes dictionary and a "dialect" dictionary which contains information
-    needed to reconstruct the original string.
-
-    Lots of logic here to handle all the corner cases.
-
-    If `dialect` is None, then do all the logic to infer a dialect from this
-    attribute string.
-
-    Otherwise, use the provided dialect (and return it at the end).
-    """
-
-    def _unquote_quals(quals, dialect):
-        """
-        Handles the unquoting (decoding) of percent-encoded characters.
-
-        See notes on encoding/decoding above.
-        """
-        if not constants.ignore_url_escape_characters and dialect["fmt"] == "gff3":
-            for key, vals in quals.items():
-                unquoted = [urllib.parse.unquote(v) for v in vals]
-                quals[key] = unquoted
-        return quals
-
-    infer_dialect = False
-    if dialect is None:
-        # Make a copy of default dialect so it can be modified as needed
-        dialect = copy.copy(constants.dialect)
-        infer_dialect = True
-    from gffutils import feature
-
-    quals = feature.dict_class()
-    if not keyval_str:
-        return quals, dialect
-
-    # If a dialect was provided, then use that directly.
-    if not infer_dialect:
-        if dialect["trailing semicolon"]:
-            keyval_str = keyval_str.rstrip(";")
-
-        parts = keyval_str.split(dialect["field separator"])
-
-        kvsep = dialect["keyval separator"]
-        if dialect["leading semicolon"]:
-            pieces = []
-            for p in parts:
-                if p and p[0] == ";":
-                    p = p[1:]
-                pieces.append(p.strip().split(kvsep))
-                key_vals = [(p[0], " ".join(p[1:])) for p in pieces]
-
-        if dialect["fmt"] == "gff3":
-            key_vals = [p.split(kvsep) for p in parts]
-        else:
-            leadingsemicolon = dialect["leading semicolon"]
-            pieces = []
-            for i, p in enumerate(parts):
-                if i == 0 and leadingsemicolon:
-                    p = p[1:]
-                pieces.append(p.strip().split(kvsep))
-                key_vals = [(p[0], " ".join(p[1:])) for p in pieces]
-
-        quoted = dialect["quoted GFF2 values"]
-        for item in key_vals:
-            # Easy if it follows spec
-            if len(item) == 2:
-                key, val = item
-
-            # Only key provided?
-            elif len(item) == 1:
-                key = item[0]
-                val = ""
-
-            else:
-                key = item[0]
-                val = dialect["keyval separator"].join(item[1:])
-
-            try:
-                quals[key]
-            except KeyError:
-                quals[key] = []
-
-            if quoted:
-                if len(val) > 0 and val[0] == '"' and val[-1] == '"':
-                    val = val[1:-1]
-
-            if val:
-                # TODO: if there are extra commas for a value, just use empty
-                # strings
-                # quals[key].extend([v for v in val.split(',') if v])
-                vals = val.split(",")
-                quals[key].extend(vals)
-
-        quals = _unquote_quals(quals, dialect)
-        return quals, dialect
-
-    # If we got here, then we need to infer the dialect....
-    #
-    # Reset the order to an empty list so that it will only be populated with
-    # keys that are found in the file.
-    dialect["order"] = []
-
-    # ensembl GTF has trailing semicolon
-    if keyval_str[-1] == ";":
-        keyval_str = keyval_str[:-1]
-        dialect["trailing semicolon"] = True
-
-    # GFF2/GTF has a semicolon with at least one space after it.
-    # Spaces can be on both sides (e.g. wormbase)
-    # GFF3 works with no spaces.
-    # So split on the first one we can recognize...
-    for sep in (" ; ", "; ", ";"):
-        parts = keyval_str.split(sep)
-        if len(parts) > 1:
-            dialect["field separator"] = sep
-            break
-
-    # Is it GFF3?  They have key-vals separated by "="
-    if gff3_kw_pat.match(parts[0]):
-        key_vals = [p.split("=") for p in parts]
-        dialect["fmt"] = "gff3"
-        dialect["keyval separator"] = "="
-
-    # Otherwise, key-vals separated by space.  Key is first item.
-    else:
-        dialect["keyval separator"] = " "
-        pieces = []
-        for p in parts:
-            # Fix misplaced semicolons in keys in some GFF2 files
-            if p and p[0] == ";":
-                p = p[1:]
-                dialect["leading semicolon"] = True
-            pieces.append(p.strip().split(" "))
-        key_vals = [(p[0], " ".join(p[1:])) for p in pieces]
-
-    for item in key_vals:
-
-        # Easy if it follows spec
-        if len(item) == 2:
-            key, val = item
-
-        # Only key provided?
-        elif len(item) == 1:
-            key = item[0]
-            val = ""
-
-        # Pathological cases where values of a key have within them the key-val
-        # separator, e.g.,
-        #  Alias=SGN-M1347;ID=T0028;Note=marker name(s): T0028 SGN-M1347 |identity=99.58|escore=2e-126
-        #                                                                         ^            ^
-        else:
-            key = item[0]
-            val = dialect["keyval separator"].join(item[1:])
-
-        # Is the key already in there?
-        if key in quals:
-            dialect["repeated keys"] = True
-        else:
-            quals[key] = []
-
-        # Remove quotes in GFF2
-        if len(val) > 0 and val[0] == '"' and val[-1] == '"':
-            val = val[1:-1]
-            dialect["quoted GFF2 values"] = True
-        if val:
-
-            # TODO: if there are extra commas for a value, just use empty
-            # strings
-            # quals[key].extend([v for v in val.split(',') if v])
-
-            # See issue #198, where commas within a description can incorrectly
-            # cause the dialect inference to conclude that there are not
-            # repeated keys.
-            #
-            # More description in PR #208.
-            if dialect["repeated keys"]:
-                quals[key].append(val)
-            else:
-                vals = val.split(",")
-
-                # If anything starts with a leading space, then we infer that
-                # it was part of a description or some other typographical
-                # interpretation, not a character to split multiple vals on --
-                # and append the original val rather than the split vals.
-                if any([i[0] == " " for i in vals if i]):
-                    quals[key].append(val)
-                else:
-                    quals[key].extend(vals)
-
-        # keep track of the order of keys
-        dialect["order"].append(key)
-
-    if (dialect["keyval separator"] == " ") and (dialect["quoted GFF2 values"]):
-        dialect["fmt"] = "gtf"
-
-    quals = _unquote_quals(quals, dialect)
-    return quals, dialect
diff --git a/gffutils/pybedtools_integration.py b/gffutils/pybedtools_integration.py
index 5c5c2b90..e01e4911 100644
--- a/gffutils/pybedtools_integration.py
+++ b/gffutils/pybedtools_integration.py
@@ -113,7 +113,7 @@ def tsses(
     if they overlap (as in the first two):
 
 
-    >>> print(tsses(db))                        # doctest: +NORMALIZE_WHITESPACE
+    >>> print(gffutils.pybedtools_integration.tsses(db))                        # doctest: +NORMALIZE_WHITESPACE
     chr2L	gffutils_derived	transcript_TSS	7529	7529	.	+	.	gene_id "FBgn0031208"; transcript_id "FBtr0300689";
     chr2L	gffutils_derived	transcript_TSS	7529	7529	.	+	.	gene_id "FBgn0031208"; transcript_id "FBtr0300690";
     chr2L	gffutils_derived	transcript_TSS	11000	11000	.	-	.	gene_id "Fk_gene_1"; transcript_id "transcript_Fk_gene_1";
@@ -124,7 +124,7 @@ def tsses(
     Default merging, showing the first two TSSes merged and reported as
     a single unique TSS for the gene. Note the conversion to BED:
 
-    >>> x = tsses(db, merge_overlapping=True)
+    >>> x = gffutils.pybedtools_integration.tsses(db, merge_overlapping=True)
     >>> print(x)  # doctest: +NORMALIZE_WHITESPACE
     chr2L	7528	7529	FBgn0031208	.	+
     chr2L	10999	11000	Fk_gene_1	.	-
@@ -135,7 +135,7 @@ def tsses(
     be easier to parse than the original GTF or GFF file. With no merging
     specified, we must add `as_bed6=True` to see the names in BED format.
 
-    >>> x = tsses(db, attrs=['gene_id', 'transcript_id'], as_bed6=True)
+    >>> x = gffutils.pybedtools_integration.tsses(db, attrs=['gene_id', 'transcript_id'], as_bed6=True)
     >>> print(x)  # doctest: +NORMALIZE_WHITESPACE
     chr2L	7528	7529	FBgn0031208:FBtr0300689	.	+
     chr2L	7528	7529	FBgn0031208:FBtr0300690	.	+
@@ -145,7 +145,7 @@ def tsses(
 
     Use a 3kb merge distance so the last 2 features are merged together:
 
-    >>> x = tsses(db, merge_overlapping=True, merge_kwargs=dict(d=3000))
+    >>> x = gffutils.pybedtools_integration.tsses(db, merge_overlapping=True, merge_kwargs=dict(d=3000))
     >>> print(x)  # doctest: +NORMALIZE_WHITESPACE
     chr2L	7528	7529	FBgn0031208	.	+
     chr2L	10999	12500	Fk_gene_1,Fk_gene_2	.	-
@@ -154,7 +154,7 @@ def tsses(
 
     The set of unique TSSes for each gene, +1kb upstream and 500bp downstream:
 
-    >>> x = tsses(db, merge_overlapping=True)
+    >>> x = gffutils.pybedtools_integration.tsses(db, merge_overlapping=True)
     >>> x = x.slop(l=1000, r=500, s=True, genome='dm3')
     >>> print(x)  # doctest: +NORMALIZE_WHITESPACE
     chr2L	6528	8029	FBgn0031208	.	+
diff --git a/gffutils/scripts/gffutils-cli b/gffutils/scripts/gffutils-cli
index 051b76d5..70a882b5 100755
--- a/gffutils/scripts/gffutils-cli
+++ b/gffutils/scripts/gffutils-cli
@@ -76,7 +76,7 @@ def fetch(db, ids):
      (like grep -v)''')
 @arg('--exclude-self', help='''Use this to suppress reporting the IDs you've
      provided.''')
-def children(db, ids, limit=None, exclude=None, exclude_self=False):
+def children(db, ids, *, limit=None, exclude=None, exclude_self=False):
     """
     Fetch children from the database according to ID.
     """
@@ -110,7 +110,7 @@ def children(db, ids, limit=None, exclude=None, exclude_self=False):
      (like grep -v)''')
 @arg('--exclude-self', help='''Use this to suppress reporting the IDs you've
      provided.''')
-def parents(db, ids, limit=None, exclude=None, exclude_self=False):
+def parents(db, ids, *, limit=None, exclude=None, exclude_self=False):
     """
     Fetch parents from the database according to ID.
     """
@@ -167,7 +167,7 @@ def common(db):
 @arg('--disable-infer-transcripts', help='''Disable inferring of transcript
      extents for GTF files. Use this if your GTF file already has "transcript"
      featuretypes''')
-def create(filename, output=None, force=False, quiet=False, merge="merge",
+def create(filename, *, output=None, force=False, quiet=False, merge="merge",
            disable_infer_genes=False, disable_infer_transcripts=False):
     """
     Create a database.
@@ -198,7 +198,7 @@ def clean(filename):
 @arg('--in-place',
      help='''Sanitize file in-place: overwrites current file with sanitized
      version.''')
-def sanitize(filename,
+def sanitize(filename, *,
              in_memory=True,
              in_place=False):
     """
@@ -225,7 +225,7 @@ def sanitize(filename,
 @arg('filename', help='''GFF or GTF file to use.''')
 @arg('--in-place', help='''Remove duplicates in place (overwrite current
      file.)''')
-def rmdups(filename, in_place=False):
+def rmdups(filename, *, in_place=False):
     """
     Remove duplicates from a GFF file.
     """
@@ -278,7 +278,7 @@ def convert(filename):
 @arg('--featuretype', help='''Restrict to a particular featuretype.  This can
      be faster than doing a grep on the output, since it restricts the search
      space in the database''')
-def search(db, text, featuretype=None):
+def search(db, text, *, featuretype=None):
     """
     Search the attributes.
     """
diff --git a/gffutils/test/attr_test_cases.py b/gffutils/test/attr_test_cases.py
index b08afbe3..9ee1de96 100644
--- a/gffutils/test/attr_test_cases.py
+++ b/gffutils/test/attr_test_cases.py
@@ -15,36 +15,130 @@
 
 
 """
+
 attrs = [
+    dict(
+        str="ID=001;Name=gene1",
+        attrs={
+            "ID": ["001"],
+            "Name": ["gene1"],
+        },
+        ok=None,
+        dialect_mods={"order": ["ID", "Name"]},
+    ),
+    dict(
+        str="ID=001;Name=gene1;",
+        attrs={
+            "ID": ["001"],
+            "Name": ["gene1"],
+        },
+        ok=None,
+        dialect_mods={"trailing semicolon": True, "order": ["ID", "Name"]},
+    ),
+    dict(
+        str="ID=001; Name=gene1;",
+        attrs={
+            "ID": ["001"],
+            "Name": ["gene1"],
+        },
+        ok=None,
+        dialect_mods={
+            "trailing semicolon": True,
+            "field separator": "; ",
+            "order": ["ID", "Name"],
+        },
+    ),
+    dict(
+        str='ID="001"',
+        attrs={"ID": ["001"]},
+        ok=None,
+        dialect_mods={
+            "quoted GFF2 values": True,
+            "order": ["ID"],
+        },
+    ),
+    dict(
+        str='ID="001"; Name="gene1"; types="a,b,c"',
+        attrs={"ID": ["001"], "Name": ["gene1"], "types": ["a", "b", "c"]},
+        ok=None,
+        dialect_mods={
+            "quoted GFF2 values": True,
+            "field separator": "; ",
+            "order": ["ID", "Name", "types"],
+        },
+    ),
+    dict(
+        str='ID="001"; Name="gene1"; types="a"; types="b"; types="c"',
+        attrs={"ID": ["001"], "Name": ["gene1"], "types": ["a", "b", "c"]},
+        ok=None,
+        dialect_mods={
+            "quoted GFF2 values": True,
+            "field separator": "; ",
+            "repeated keys": True,
+            "order": ["ID", "Name", "types"],
+        },
+    ),
+    dict(
+        str="Name=gene1;ID=001",
+        attrs={"Name": ["gene1"], "ID": ["001"]},
+        ok=None,
+        dialect_mods={"order": ["Name", "ID"]},
+    ),
+    dict(
+        str='gene_id "001";gene_name "gene1"',
+        attrs={"gene_id": ["001"], "gene_name": ["gene1"]},
+        ok=None,
+        dialect_mods={
+            "fmt": "gtf",
+            "quoted GFF2 values": True,
+            "keyval separator": " ",
+            "order": ["gene_id", "gene_name"],
+        },
+    ),
     # c_elegans_WS199_shortened_gff.txt
-    (
-        "count=1;gene=amx-2;sequence=SAGE:ggcagagtcttttggca;" "transcript=B0019.1",
-        {
+    dict(
+        str="count=1;gene=amx-2;sequence=SAGE:ggcagagtcttttggca;transcript=B0019.1",
+        attrs={
             "count": ["1"],
             "gene": ["amx-2"],
             "sequence": ["SAGE:ggcagagtcttttggca"],
             "transcript": ["B0019.1"],
         },
-        None,
+        ok=None,
+        dialect_mods={"order": ["count", "gene", "sequence", "transcript"]},
     ),
     # ensembl_gtf.txt
-    (
-        'gene_id "Y74C9A.6"; transcript_id "Y74C9A.6"; exon_number "1"; '
-        'gene_name "Y74C9A.6"; transcript_name "NR_001477.2";',
-        {
+    dict(
+        str=(
+            'gene_id "Y74C9A.6"; transcript_id "Y74C9A.6"; exon_number "1"; gene_name "Y74C9A.6"; transcript_name "NR_001477.2";'
+        ),
+        attrs={
             "gene_id": ["Y74C9A.6"],
             "transcript_id": ["Y74C9A.6"],
             "exon_number": ["1"],
             "gene_name": ["Y74C9A.6"],
             "transcript_name": ["NR_001477.2"],
         },
-        None,
+        ok=None,
+        dialect_mods={
+            "trailing semicolon": True,
+            "fmt": "gtf",
+            "keyval separator": " ",
+            "field separator": "; ",
+            "quoted GFF2 values": True,
+            "order": [
+                "gene_id",
+                "transcript_id",
+                "exon_number",
+                "gene_name",
+                "transcript_name",
+            ],
+        },
     ),
     # F3-unique-3.v2.gff
-    (
-        "g=A3233312322232122211;i=1;p=1.000;q=23,12,18,17,10,24,19,14,27,9,23"
-        ",9,16,20,11,7,8,4,4,14;u=0,0,0,1",
-        {
+    dict(
+        str="g=A3233312322232122211;i=1;p=1.000;q=23,12,18,17,10,24,19,14,27,9,23,9,16,20,11,7,8,4,4,14;u=0,0,0,1",
+        attrs={
             "g": ["A3233312322232122211"],
             "i": ["1"],
             "p": ["1.000"],
@@ -72,20 +166,27 @@
             ],
             "u": ["0", "0", "0", "1"],
         },
-        None,
+        ok=None,
+        dialect_mods={"order": ["g", "i", "p", "q", "u"]},
     ),
     # glimmer_nokeyval.gff3
-    (
-        "ID=GL0000006;Name=GL0000006;Lack 3'-end;",
-        {"ID": ["GL0000006"], "Name": ["GL0000006"], "Lack 3'-end": []},
-        None,
+    dict(
+        str="ID=GL0000006;Name=GL0000006;Lack 3'-end;",
+        attrs={"ID": ["GL0000006"], "Name": ["GL0000006"], "Lack 3'-end": []},
+        ok=None,
+        dialect_mods={
+            "order": ["ID", "Name", "Lack 3'-end"],
+            "trailing semicolon": True,
+        },
     ),
     # hybrid1.gff3
-    (
-        "ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:"
-        "A00469,Swissprot:P01241,PFAM:PF00103,AFFX-U95:1332_f_at,Swissprot:"
-        "SOMA_HUMAN;Note=growth%20hormone%201;Alias=GH1",
-        {
+    dict(
+        str=(
+            "ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:"
+            "A00469,Swissprot:P01241,PFAM:PF00103,AFFX-U95:1332_f_at,Swissprot:"
+            "SOMA_HUMAN;Note=growth%20hormone%201;Alias=GH1"
+        ),
+        attrs={
             "ID": ["A00469"],
             "Dbxref": [
                 "AFFX-U133:205840_x_at",
@@ -99,9 +200,10 @@
             "Note": ["growth hormone 1"],
             "Alias": ["GH1"],
         },
-        "ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:"
+        ok="ID=A00469;Dbxref=AFFX-U133:205840_x_at,Locuslink:2688,Genbank-mRNA:"
         "A00469,Swissprot:P01241,PFAM:PF00103,AFFX-U95:1332_f_at,Swissprot:"
         "SOMA_HUMAN;Note=growth hormone 1;Alias=GH1",
+        dialect_mods={"order": ["ID", "Dbxref", "Note", "Alias"]},
     ),
     # jgi_gff2.txt
     #
@@ -109,19 +211,30 @@
     # quoted but string values are.  Only way to make this be invariant is to
     # keep track of the "flavor" of each attribute; not sure it's worth the
     # effort / processing time.
-    (
-        'name "fgenesh1_pg.C_chr_1000007"; transcriptId 873',
-        {"name": ["fgenesh1_pg.C_chr_1000007"], "transcriptId": ["873"]},
-        'name "fgenesh1_pg.C_chr_1000007"; transcriptId "873"',
+    dict(
+        str='name "fgenesh1_pg.C_chr_1000007"; transcriptId 873',
+        attrs={"name": ["fgenesh1_pg.C_chr_1000007"], "transcriptId": ["873"]},
+        ok='name "fgenesh1_pg.C_chr_1000007"; transcriptId "873"',
+        dialect_mods={
+            "order": ["name", "transcriptId"],
+            "quoted GFF2 values": True,
+            "keyval separator": " ",
+            "fmt": "gtf",
+            "field separator": "; ",
+        },
     ),
     # mouse_extra_comma.gff3: extra comma line
     #
     # Note extra empty string in the dictionary's "Parent" field.
     #
-    (
-        "Name=CDS:NC_000083.5:LOC100040603;Parent=XM_001475631.1,",
-        {"Name": ["CDS:NC_000083.5:LOC100040603"], "Parent": ["XM_001475631.1", ""]},
-        None,
+    dict(
+        str="Name=CDS:NC_000083.5:LOC100040603;Parent=XM_001475631.1,",
+        attrs={
+            "Name": ["CDS:NC_000083.5:LOC100040603"],
+            "Parent": ["XM_001475631.1", ""],
+        },
+        ok=None,
+        dialect_mods={"order": ["Name", "Parent"]},
     ),
     # mouse_extra_comma.gff3
     #
@@ -135,20 +248,23 @@
     #
     # In both cases, the dictionary entry is simply an empty list; it's just in
     # the reconstruction where things get tricky.
-    (
-        "ID=;Parent=XM_001475631.1",
-        {"ID": [], "Parent": ["XM_001475631.1"]},
-        "ID;Parent=XM_001475631.1",
+    dict(
+        str="ID=;Parent=XM_001475631.1",
+        attrs={"ID": [], "Parent": ["XM_001475631.1"]},
+        ok="ID;Parent=XM_001475631.1",
+        dialect_mods={"order": ["ID", "Parent"]},
     ),
     # ncbi_gff3.txt
-    (
-        "ID=NC_008596.1:speB:unknown_transcript_1;Parent=NC_008596.1:speB;"
-        "locus_tag=MSMEG_1072;EC_number=3.5.3.11;note=identified%20by%20mat"
-        "ch%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20prote"
-        "in%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;p"
-        "rotein_id=YP_885468.1;db_xref=GI:118469242;db_xref=GeneID:4535378;"
-        "exon_number=1",
-        {
+    dict(
+        str=(
+            "ID=NC_008596.1:speB:unknown_transcript_1;Parent=NC_008596.1:speB;"
+            "locus_tag=MSMEG_1072;EC_number=3.5.3.11;note=identified%20by%20mat"
+            "ch%20to%20protein%20family%20HMM%20PF00491%3B%20match%20to%20prote"
+            "in%20family%20HMM%20TIGR01230;transl_table=11;product=agmatinase;p"
+            "rotein_id=YP_885468.1;db_xref=GI:118469242;db_xref=GeneID:4535378;"
+            "exon_number=1"
+        ),
+        attrs={
             "ID": ["NC_008596.1:speB:unknown_transcript_1"],
             "Parent": ["NC_008596.1:speB"],
             "locus_tag": ["MSMEG_1072"],
@@ -164,18 +280,39 @@
             "db_xref": ["GI:118469242", "GeneID:4535378"],
             "exon_number": ["1"],
         },
-        "ID=NC_008596.1:speB:unknown_transcript_1;Parent=NC_008596.1:speB;"
+        ok="ID=NC_008596.1:speB:unknown_transcript_1;Parent=NC_008596.1:speB;"
         "locus_tag=MSMEG_1072;EC_number=3.5.3.11;note=identified by mat"
         "ch to protein family HMM PF00491%3B match to prote"
         "in family HMM TIGR01230;transl_table=11;product=agmatinase;p"
         "rotein_id=YP_885468.1;db_xref=GI:118469242;db_xref=GeneID:4535378;"
         "exon_number=1",
+        dialect_mods={
+            "order": [
+                "ID",
+                "Parent",
+                "locus_tag",
+                "EC_number",
+                "note",
+                "transl_table",
+                "product",
+                "protein_id",
+                "db_xref",
+                "exon_number",
+            ],
+            "repeated keys": True,
+        },
     ),
     # wormbase_gff2_alt.txt
     #
-    (
-        'CDS "cr01.sctg102.wum.2.1"',
-        {"CDS": ["cr01.sctg102.wum.2.1"]},
-        None,
+    dict(
+        str='CDS "cr01.sctg102.wum.2.1"',
+        attrs={"CDS": ["cr01.sctg102.wum.2.1"]},
+        ok=None,
+        dialect_mods={
+            "order": ["CDS"],
+            "quoted GFF2 values": True,
+            "keyval separator": " ",
+            "fmt": "gtf",
+        },
     ),
 ]
diff --git a/gffutils/test/data/ensembl_gtf.txt b/gffutils/test/data/ensembl_gtf.txt
index f54f8fdd..88de6d51 100644
--- a/gffutils/test/data/ensembl_gtf.txt
+++ b/gffutils/test/data/ensembl_gtf.txt
@@ -1,33 +1,33 @@
-I	snoRNA	exon	3747	3909	.	-	.	 gene_id "Y74C9A.6"; transcript_id "Y74C9A.6"; exon_number "1"; gene_name "Y74C9A.6"; transcript_name "NR_001477.2";
-I	protein_coding	exon	12764812	12764949	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "1"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12764812	12764937	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "1"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	start_codon	12764935	12764937	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "1"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	exon	12764291	12764471	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "2"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12764291	12764471	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "2"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12763979	12764102	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "3"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12763979	12764102	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "3"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12763729	12763882	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "4"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12763729	12763882	.	-	1	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "4"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12763448	12763655	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "5"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12763448	12763655	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "5"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12763112	12763249	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "6"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12763112	12763249	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "6"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12762648	12762806	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "7"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12762648	12762806	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "7"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12762127	12762268	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "8"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12762127	12762268	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "8"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12761799	12761953	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "9"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12761799	12761953	.	-	1	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "9"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12761172	12761516	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "10"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12761172	12761516	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "10"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12760834	12760904	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "11"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12760834	12760904	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "11"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12760365	12760494	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "12"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12760365	12760494	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "12"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12760227	12760319	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "13"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12760227	12760319	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "13"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12759949	12760013	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "14"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12759949	12760013	.	-	2	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "14"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	exon	12759579	12759828	.	-	.	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "15"; gene_name "amx-2"; transcript_name "B0019.1";
-I	protein_coding	CDS	12759748	12759828	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "15"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
-I	protein_coding	stop_codon	12759745	12759747	.	-	0	 gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "15"; gene_name "amx-2"; transcript_name "B0019.1";
+I	snoRNA	exon	3747	3909	.	-	.	gene_id "Y74C9A.6"; transcript_id "Y74C9A.6"; exon_number "1"; gene_name "Y74C9A.6"; transcript_name "NR_001477.2";
+I	protein_coding	exon	12764812	12764949	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "1"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12764812	12764937	.	-	0	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "1"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	start_codon	12764935	12764937	.	-	0	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "1"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	exon	12764291	12764471	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "2"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12764291	12764471	.	-	0	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "2"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12763979	12764102	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "3"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12763979	12764102	.	-	2	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "3"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12763729	12763882	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "4"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12763729	12763882	.	-	1	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "4"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12763448	12763655	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "5"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12763448	12763655	.	-	0	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "5"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12763112	12763249	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "6"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12763112	12763249	.	-	2	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "6"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12762648	12762806	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "7"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12762648	12762806	.	-	2	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "7"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12762127	12762268	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "8"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12762127	12762268	.	-	2	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "8"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12761799	12761953	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "9"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12761799	12761953	.	-	1	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "9"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12761172	12761516	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "10"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12761172	12761516	.	-	2	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "10"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12760834	12760904	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "11"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12760834	12760904	.	-	2	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "11"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12760365	12760494	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "12"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12760365	12760494	.	-	0	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "12"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12760227	12760319	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "13"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12760227	12760319	.	-	2	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "13"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12759949	12760013	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "14"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12759949	12760013	.	-	2	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "14"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	exon	12759579	12759828	.	-	.	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "15"; gene_name "amx-2"; transcript_name "B0019.1";
+I	protein_coding	CDS	12759748	12759828	.	-	0	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "15"; gene_name "amx-2"; transcript_name "B0019.1"; protein_id "B0019.1";
+I	protein_coding	stop_codon	12759745	12759747	.	-	0	gene_id "B0019.1"; transcript_id "B0019.1"; exon_number "15"; gene_name "amx-2"; transcript_name "B0019.1";
diff --git a/gffutils/test/parser_test.py b/gffutils/test/parser_test.py
index ed578a7e..68b30481 100644
--- a/gffutils/test/parser_test.py
+++ b/gffutils/test/parser_test.py
@@ -53,17 +53,32 @@ def test_attrs_OK(item):
     (see attr_test_cases.py for details); `acceptable_reconstruction` handles
     those.
     """
-    attr_str, attr_dict, acceptable_reconstruction = item
-    result, dialect = parser._split_keyvals(attr_str)
+    attr_str = item["str"]
+    attr_dict = item["attrs"]
+    acceptable_reconstruction = item["ok"]
+    dialect_mods = item["dialect_mods"]
+
+    result, inferred_dialect = parser._split_keyvals(attr_str)
     result = dict(result)
     assert result == attr_dict, result
 
-    reconstructed = parser._reconstruct(result, dialect, keep_order=True)
+    reconstructed = parser._reconstruct(result, inferred_dialect, keep_order=True)
     if acceptable_reconstruction:
         assert reconstructed == acceptable_reconstruction, reconstructed
     else:
         assert reconstructed == attr_str, reconstructed
 
+    # Get the default dialect for comparison, and update it with any diffs
+    # indicated by the test case
+    default_dialect = constants.dialect.copy()
+    default_dialect.update(dialect_mods)
+
+    print(inferred_dialect)
+    print(dialect_mods)
+    print(attr_str)
+    assert default_dialect == inferred_dialect
+
+
 
 def parser_smoke_test():
     """
@@ -75,7 +90,7 @@ def parser_smoke_test():
     parser.logger.setLevel(logging.CRITICAL)
     for filename in TEST_FILENAMES:
         p = iterators._FileIterator(filename)
-        for i in p:
+        for _ in p:
             continue
 
 
@@ -93,7 +108,8 @@ def test_empty_recontruct():
 def test_empty_split_keyvals():
     attrs, dialect = parser._split_keyvals(keyval_str=None)
     assert attrs == feature.dict_class()
-    assert dialect == constants.dialect
+    # assert dialect == constants.dialect
+    assert dialect is None
 
 
 def test_repeated_keys_conflict():
@@ -115,7 +131,7 @@ def test_parser_from_string():
     #
     # _StringIterator has been removed and is instead handled by DataIterator
     # creating a temp file and returning a _FileIterator.
-    return True
+    pass
 
 
 def test_valid_line_count():
diff --git a/gffutils/test/test_1.py b/gffutils/test/test_1.py
index 2b88cc04..1780d996 100644
--- a/gffutils/test/test_1.py
+++ b/gffutils/test/test_1.py
@@ -14,10 +14,7 @@
 import tempfile
 import http.server as SimpleHTTPServer
 
-if sys.version_info.major == 3:
-    import socketserver as SocketServer
-else:
-    import SocketServer
+import socketserver as SocketServer
 
 import multiprocessing
 import json
@@ -482,58 +479,51 @@ def test_sanitize_gff():
     print("Sanitized GFF successfully.")
 
 
-def test_region():
-
+@pytest.mark.parametrize("kwargs,expected", [
+    # previously failed, see issue #45
+    (dict(seqid="chr2L", start=1, end=2e9, completely_within=True), 27),
+    (dict(region="chr2L", start=0), ValueError),
+    (dict(region="chr2L", end=0), ValueError),
+    (dict(region="chr2L", seqid=0), ValueError),
+    # these coords should catch everything
+    (dict(region="chr2L:7529-12500"), 27),
+    # stranded versions:
+    (dict(region="chr2L:7529-12500", strand="."), 0),
+    (dict(region="chr2L:7529-12500", strand="+"), 21),
+    (dict(region="chr2L:7529-12500", strand="-"), 6),
+    # different ways of selecting only that last exon in the last gene:
+    (dict(seqid="chr2L", start=11500, featuretype="exon"), 1),
+    (dict(seqid="chr2L", start=9500, featuretype="exon", strand="+"), 1),
+    # alternative method
+    (dict(seqid="chr2L", start=7529, end=12500), 27),
+    # since default completely_within=False, this catches anything that
+    # falls after 7680.  So it only excludes the 5'UTR, which ends at 7679.
+    (dict(seqid="chr2L", start=7680), 26),
+    # but completely_within=True will exclude the gene and mRNAs, first
+    # exon and the 5'UTR
+    (dict(seqid="chr2L", start=7680, completely_within=True), 22),
+    # similarly, this will *exclude* anything before 7680
+    (dict(seqid="chr2L", end=7680), 5),
+    # and also similarly, this will only get us the 5'UTR which is the only
+    # feature falling completely before 7680
+    (dict(seqid="chr2L", end=7680, completely_within=True), 1),
+    # and there's only features from chr2L in this file, so this catches
+    # everything too
+    (dict(region="chr2L"), 27),
+    # using seqid should work similarly to `region` with only chromosome
+    (dict(seqid="chr2L"), 27),
+    # nonexistent
+    (dict(region="nowhere"), 0),
+])
+def test_region(kwargs, expected):
     db_fname = gffutils.example_filename("FBgn0031208.gff")
     db = gffutils.create_db(db_fname, ":memory:", keep_order=True)
 
-    def _check(item):
-        kwargs, expected = item
-        try:
-            obs = list(db.region(**kwargs))
-            assert len(obs) == expected, "expected %s got %s" % (expected, len(obs))
-        except expected:
-            pass
-
-    regions = [
-        # previously failed, see issue #45
-        (dict(seqid="chr2L", start=1, end=2e9, completely_within=True), 27),
-        (dict(region="chr2L", start=0), ValueError),
-        (dict(region="chr2L", end=0), ValueError),
-        (dict(region="chr2L", seqid=0), ValueError),
-        # these coords should catch everything
-        (dict(region="chr2L:7529-12500"), 27),
-        # stranded versions:
-        (dict(region="chr2L:7529-12500", strand="."), 0),
-        (dict(region="chr2L:7529-12500", strand="+"), 21),
-        (dict(region="chr2L:7529-12500", strand="-"), 6),
-        # different ways of selecting only that last exon in the last gene:
-        (dict(seqid="chr2L", start=11500, featuretype="exon"), 1),
-        (dict(seqid="chr2L", start=9500, featuretype="exon", strand="+"), 1),
-        # alternative method
-        (dict(seqid="chr2L", start=7529, end=12500), 27),
-        # since default completely_within=False, this catches anything that
-        # falls after 7680.  So it only excludes the 5'UTR, which ends at 7679.
-        (dict(seqid="chr2L", start=7680), 26),
-        # but completely_within=True will exclude the gene and mRNAs, first
-        # exon and the 5'UTR
-        (dict(seqid="chr2L", start=7680, completely_within=True), 22),
-        # similarly, this will *exclude* anything before 7680
-        (dict(seqid="chr2L", end=7680), 5),
-        # and also similarly, this will only get us the 5'UTR which is the only
-        # feature falling completely before 7680
-        (dict(seqid="chr2L", end=7680, completely_within=True), 1),
-        # and there's only features from chr2L in this file, so this catches
-        # everything too
-        (dict(region="chr2L"), 27),
-        # using seqid should work similarly to `region` with only chromosome
-        (dict(seqid="chr2L"), 27),
-        # nonexistent
-        (dict(region="nowhere"), 0),
-    ]
-
-    for item in regions:
-        yield _check, item
+    try:
+        obs = list(db.region(**kwargs))
+        assert len(obs) == expected, "expected %s got %s" % (expected, len(obs))
+    except expected:
+        pass
 
 
 def test_nonascii():
diff --git a/gffutils/test/test_biopython_integration.py b/gffutils/test/test_biopython_integration.py
index 58c5866a..e9f8e81d 100644
--- a/gffutils/test/test_biopython_integration.py
+++ b/gffutils/test/test_biopython_integration.py
@@ -1,6 +1,10 @@
 from gffutils import example_filename
 import gffutils
 import gffutils.biopython_integration as bp
+import pytest
+
+# Skip tests entirely if BioPython not available
+pytest.importorskip('Bio')
 
 
 def test_roundtrip():
diff --git a/gffutils/test/test_issues.py b/gffutils/test/test_issues.py
index 79996ba5..9e4686a0 100644
--- a/gffutils/test/test_issues.py
+++ b/gffutils/test/test_issues.py
@@ -6,9 +6,12 @@
 import os
 import tempfile
 import difflib
+from pathlib import Path
 from textwrap import dedent
 import gffutils
 from gffutils import feature
+from gffutils import helpers
+from gffutils.gffwriter import GFFWriter
 from gffutils import merge_criteria as mc
 
 import pytest
@@ -200,7 +203,10 @@ def test_pr_144():
     assert f.attributes["a"] == [""]
     assert str(f) == ".	.	.	.	.	.	.	.	a"
     g = gffutils.feature.feature_from_line(str(f))
-    assert g == f
+    g.dialect["fmt"] = "gff3"
+    print(g.attributes)
+    print(g.dialect)
+    assert str(g) == str(f)
 
 
 def test_pr_172():
@@ -452,21 +458,43 @@ def test_issue_198():
 
     assert f.attributes["description"] == ["WASP family homolog 7, pseudogene"]
 
-    # If we remove one of the db_xref keys, then the parser sees the comma and
-    # figures it's a multivalue key.
+    # If we remove one of the db_xref keys, then previously the parser saw the
+    # comma and figured it was a multivalue key, and split it. Now, it's
+    # correctly identified as a single-value key.
+    #
+    # Note that we still have gene_synonym as a repeated key.
     line = 'NC_000001.11	BestRefSeq	gene	14362	29370	.	-	.	gene_id "WASH7P"; transcript_id ""; db_xref "GeneID:653635"; description "WASP family homolog 7, pseudogene"; gbkey "Gene"; gene "WASH7P"; gene_biotype "transcribed_pseudogene"; gene_synonym "FAM39F"; gene_synonym "WASH5P"; pseudo "true";'
     f = feature.feature_from_line(line)
+    assert f.dialect["repeated keys"]
 
     # Previous result, note leading space --------------------------->| |
     # assert f.attributes['description'] == ['WASP family homolog 7', ' pseudogene']
+
+    # Current result: not split.
     assert f.attributes["description"] == ["WASP family homolog 7, pseudogene"]
 
-    # But removing that space before "pseudogene" means it's interpreted as
-    # a multivalue attribute
+    # Removing that space before "pseudogene" might mean it's a multivalue, but
+    # we decide on the convention that if keys are repeated at all, that wins.
+    # So we still don't split
     line = 'NC_000001.11	BestRefSeq	gene	14362	29370	.	-	.	gene_id "WASH7P"; transcript_id ""; db_xref "GeneID:653635"; description "WASP family homolog 7,pseudogene"; gbkey "Gene"; gene "WASH7P"; gene_biotype "transcribed_pseudogene"; gene_synonym "FAM39F"; gene_synonym "WASH5P"; pseudo "true";'
     f = feature.feature_from_line(line)
+    assert f.dialect["repeated keys"]
+    assert f.attributes["description"] == ["WASP family homolog 7,pseudogene"]
+
+    # But if we get rid of all repeated keys, it's interpreted as multiple values
+    line = 'NC_000001.11	BestRefSeq	gene	14362	29370	.	-	.	gene_id "WASH7P"; transcript_id ""; db_xref "GeneID:653635"; description "WASP family homolog 7,pseudogene"; gbkey "Gene"; gene "WASH7P"; gene_biotype "transcribed_pseudogene"; gene_synonym "FAM39F"; pseudo "true";'
+    f = feature.feature_from_line(line)
+    assert not f.dialect["repeated keys"]
     assert f.attributes["description"] == ["WASP family homolog 7", "pseudogene"]
 
+    # ....but if there's a ", " (comma followed by space) instead of just
+    # comma, then it's not split.
+    line = 'NC_000001.11	BestRefSeq	gene	14362	29370	.	-	.	gene_id "WASH7P"; transcript_id ""; db_xref "GeneID:653635"; description "WASP family homolog 7, pseudogene"; gbkey "Gene"; gene "WASH7P"; gene_biotype "transcribed_pseudogene"; gene_synonym "FAM39F"; pseudo "true";'
+    f = feature.feature_from_line(line)
+    assert not f.dialect["repeated keys"]
+    assert f.attributes["description"] == ["WASP family homolog 7, pseudogene"]
+
+
     # Confirm behavior of corner cases like a trailing comma
     line = "chr17	RefSeq	CDS	6806527	6806553	.	+	0	Name=CDS:NC_000083.5:LOC100040603;Parent=XM_001475631.1,"
     f = feature.feature_from_line(line)
@@ -578,7 +606,7 @@ def _check(txt, expected_keys, dialect_trailing_semicolon):
     )
 
 
-def test_issue_213():
+def test_issue_213(tmp_path):
     # GFF header directives seem to be not parsed when building a db from
     # a file, even though it seems to work fine from a string.
     data = dedent(
@@ -599,16 +627,73 @@ def test_issue_213():
     db = gffutils.create_db(data, dbfn=":memory:", from_string=True, verbose=False)
     assert db.directives == ["gff-version 3"], db.directives
 
-    # Ensure they're parsed into the db from a file
-    tmp = tempfile.NamedTemporaryFile(delete=False).name
+    tmp = tmp_path / "issue_213.gff3"
     with open(tmp, "w") as fout:
         fout.write(data + "\n")
-    db = gffutils.create_db(tmp, ":memory:")
-    assert db.directives == ["gff-version 3"], db.directives
-    assert len(db.directives) == 1
 
-    # Ensure they're parsed into the db from a file, and going to a file (to
-    # exactly replicate example in #213)
-    db = gffutils.create_db(tmp, dbfn="issue_213.db", force=True)
-    assert db.directives == ["gff-version 3"], db.directives
-    assert len(db.directives) == 1
+    # Ensure they're parsed into the db from a file path for both str/Path.
+    for input_path in (str(tmp), tmp):
+        db = gffutils.create_db(input_path, ":memory:")
+        assert db.directives == ["gff-version 3"], db.directives
+        assert len(db.directives) == 1
+
+    # Ensure they're parsed into the db for all str/Path input-output
+    # combinations when both source and destination are file-backed.
+    for input_path, output_path in (
+        (str(tmp), str(tmp_path / "issue_213_str_str.db")),
+        (str(tmp), tmp_path / "issue_213_str_path.db"),
+        (tmp, str(tmp_path / "issue_213_path_str.db")),
+        (tmp, tmp_path / "issue_213_path_path.db"),
+    ):
+        db = gffutils.create_db(input_path, dbfn=output_path, force=True)
+        assert db.directives == ["gff-version 3"], db.directives
+        assert len(db.directives) == 1
+
+
+
+def test_pathlike_inputs(tmp_path):
+    """
+    Ensure various functions work with Path and str.
+    """
+    gff = Path(gffutils.example_filename("FBgn0031208.gff"))
+    fasta = Path(gffutils.example_filename("dm6-chr2L.fa"))
+    db_path = tmp_path / "pathlike.db"
+    out_path = tmp_path / "pathlike.gff3"
+    staged_gff = tmp_path / "pathlike-input.gff3"
+    staged_gff.write_text(gff.read_text())
+    staged_gff_db = Path("%s.%s" % (staged_gff, ".db"))
+
+    db = gffutils.create_db(gff, db_path, force=True)
+    assert db.dbfn == os.fspath(db_path)
+
+    reopened = gffutils.FeatureDB(db_path)
+    reopened.delete([], make_backup=True)
+    assert (tmp_path / "pathlike.db.bak").exists()
+
+    writer = GFFWriter(out_path)
+    writer.write_rec(next(reopened.all_features()))
+    writer.close()
+    assert out_path.exists()
+
+    assert helpers.is_gff_db(db_path)
+    gffutils.create_db(staged_gff, staged_gff_db, force=True)
+    assert helpers.get_gff_db(staged_gff) == os.fspath(staged_gff_db)
+
+    seq = reopened["FBgn0031208"].sequence(fasta)
+    expected_seq = reopened["FBgn0031208"].sequence(os.fspath(fasta))
+    assert seq == expected_seq
+
+def test_issue_212():
+
+
+    data = dedent(
+        """
+    NC_000964.3	RefSeq	CDS	410	1747	.	+	0	gene_id "BSU_00010"; transcript_id "unassigned_transcript_1"; db_xref "EnsemblGenomes-Gn:BSU00010"; db_xref "EnsemblGenomes-Tr:CAB11777"; db_xref "GOA:P05648"; db_xref "InterPro:IPR001957"; db_xref "InterPro:IPR003593"; db_xref "InterPro:IPR010921"; db_xref "InterPro:IPR013159"; db_xref "InterPro:IPR013317"; db_xref "InterPro:IPR018312"; db_xref "InterPro:IPR020591"; db_xref "InterPro:IPR024633"; db_xref "InterPro:IPR027417"; db_xref "PDB:4TPS"; db_xref "SubtiList:BG10065"; db_xref "UniProtKB/Swiss-Prot:P05648"; db_xref "GenBank:NP_387882.1"; db_xref "GeneID:939978"; experiment "publication(s) with functional evidences, PMID:2167836, 2846289, 12682299, 16120674, 1779750, 28166228"; gbkey "CDS"; gene "dnaA"; locus_tag "BSU_00010"; note "Evidence 1a: Function from experimental evidences in the studied strain; PubMedId: 2167836, 2846289, 12682299, 16120674, 1779750, 28166228; Product type f : factor"; product "chromosomal replication initiator informational ATPase"; protein_id "NP_387882.1"; transl_table "11"; exon_number "1";
+        """
+    )
+    inferred_dialect = gffutils.helpers.infer_dialect(data.split('\t')[-1])
+    assert inferred_dialect["semicolon in quotes"]
+
+    f = next(iter(gffutils.DataIterator(data, from_string=True, dialect=inferred_dialect)))
+    assert f.dialect["semicolon in quotes"]
+    assert f.attributes["note"] == ["Evidence 1a: Function from experimental evidences in the studied strain; PubMedId: 2167836, 2846289, 12682299, 16120674, 1779750, 28166228; Product type f : factor"]
diff --git a/gffutils/version.py b/gffutils/version.py
index 70fcf056..2508eeac 100644
--- a/gffutils/version.py
+++ b/gffutils/version.py
@@ -1 +1,30 @@
-version = "0.13"
+import re
+from importlib.metadata import PackageNotFoundError, version as distribution_version
+from pathlib import Path
+
+
+def _detect_version():
+    """
+    v0.14 migrated to pyproject.toml format, and the version is now only stored
+    there. If this package is installed, resolve the installed version.
+    Otherwise, inspect pyproject.toml.
+    """
+    try:
+        return distribution_version("gffutils")
+    except PackageNotFoundError:
+        pyproject = Path(__file__).resolve().parent.parent / "pyproject.toml"
+        try:
+            contents = pyproject.read_text(encoding="utf-8")
+        except OSError:
+            return "0+unknown"
+
+        # tomllib is in py3.11+ and we're supporting earlier versions, so rely
+        # on regex here. Add "+unknown" to indicate possible divergence from
+        # the cloned checkout.
+        match = re.search(r'^version = "([^"]+)"$', contents, re.MULTILINE)
+        if match:
+            return match.group(1) + "+unknown"
+        return "0+unknown"
+
+
+version = _detect_version()
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..2d737311
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,69 @@
+[build-system]
+# setuptools 77.0.3 is when support for plain text values for `license` key,
+# used below, is supported.
+requires = ["setuptools>=77.0.3"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "gffutils"
+version = "0.14"
+readme = { file = "README.rst", content-type = "text/x-rst" }
+description = "Work with GFF and GTF files in a flexible database framework"
+authors = [
+  { name = "Ryan Dale", email = "ryan.dale@nih.gov" },
+]
+license = "MIT"
+license-files = ["LICENSE"]
+requires-python = ">=3.8"
+dependencies = [
+  "pyfaidx>=0.5.5.2",
+  "argh>=0.26.2",
+  "argcomplete>=1.9.4",
+  "simplejson",
+]
+classifiers = [
+  "Intended Audience :: Science/Research",
+  "Topic :: Scientific/Engineering :: Bio-Informatics",
+  "Programming Language :: Python",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.8",
+  "Programming Language :: Python :: 3.9",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Topic :: Software Development :: Libraries :: Python Modules",
+]
+urls = { Homepage = "https://github.com/daler/gffutils" }
+
+[project.optional-dependencies]
+test = [
+  "pytest",
+]
+optional = [
+  "biopython>=1.73",
+  "pybedtools>=0.8.0",
+]
+docs = [
+  "make",
+  "numpydoc",
+  "sphinx",
+  "sphinx_rtd_theme",
+  "sphinx-autoapi",
+]
+
+[tool.pytest.ini_options]
+addopts = "-v --doctest-modules"
+testpaths = ["gffutils"]
+markers = [
+  "slow: marks tests that are slow to run",
+]
+
+[tool.setuptools]
+include-package-data = true
+script-files = ["gffutils/scripts/gffutils-cli"]
+
+[tool.setuptools.packages.find]
+include = ["gffutils", "gffutils.*"]
+
+[tool.setuptools.package-data]
+gffutils = ["test/data/*"]
diff --git a/setup.py b/setup.py
index ef56d1c6..1c12336c 100644
--- a/setup.py
+++ b/setup.py
@@ -1,37 +1,4 @@
-
-import os
-import sys
+# v0.14 migrated to pyproject.toml.
 from setuptools import setup
 
-version_py = os.path.join(os.path.dirname(__file__), 'gffutils', 'version.py')
-version = open(version_py).read().strip().split('=')[-1].replace('"', '')
-requirements = open(os.path.join(os.path.dirname(__file__), 'requirements.txt')).readlines()
-setup(
-    name='gffutils',
-    version=version,
-    install_requires=requirements,
-    packages=['gffutils', 'gffutils.scripts', 'gffutils.test',
-              'gffutils.test.data'],
-    scripts=['gffutils/scripts/gffutils-cli'],
-    author='Ryan Dale',
-    package_dir={'gffutils': 'gffutils'},
-    package_data = {'gffutils': ['test/data/*']},
-    description="Work with GFF and GTF files in a flexible "
-    "database framework",
-    long_description=open("README.rst").read(),
-    author_email='ryan.dale@nih.gov',
-    url='https://github.com/daler/gffutils',
-    classifiers=[
-        'Intended Audience :: Science/Research',
-        'License :: OSI Approved :: MIT License',
-        'Topic :: Scientific/Engineering :: Bio-Informatics',
-        'Programming Language :: Python',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.8',
-        'Programming Language :: Python :: 3.9',
-        'Programming Language :: Python :: 3.10',
-        'Programming Language :: Python :: 3.11',
-        'Programming Language :: Python :: 3.12',
-        'Topic :: Software Development :: Libraries :: Python Modules',
-    ],
-)
+setup()