diff --git a/.coverage b/.coverage
new file mode 100644
index 00000000..cb8c5ebe
Binary files /dev/null and b/.coverage differ
diff --git a/load_corpus.py b/load_corpus.py
index f325f30d..668e7524 100755
--- a/load_corpus.py
+++ b/load_corpus.py
@@ -18,18 +18,19 @@
     parser.add_argument('-f', action="store", help="optional list of features, either in json (generated by"
                                                    " Superstyl) or simple txt (one word per line)", default=False)
     parser.add_argument('-t', action='store', help="types of features (words, chars, affixes - "
-                                                   "as per Sapkota et al. 2015 - or pos). pos are currently"
-                                                   "only implemented for Modern English", type=str,
-                        default="words", choices=["words", "chars", "affixes", "pos"])
+                                                   "as per Sapkota et al. 2015 -, as well as lemma or pos, met_line, "
+                                                   "met_syll (those four last only for TEI files with proper annotation)"
+                                                   , type=str,
+                        default="words", choices=["words", "chars", "affixes", "pos", "lemma", "met_line", "met_syll"])
     parser.add_argument('-n', action='store', help="n grams lengths (default 1)", default=1, type=int)
     parser.add_argument('-k', action='store', help="How many most frequent?", default=5000, type=int)
     parser.add_argument('--freqs', action='store', help="relative, absolute or binarised freqs",
                         default="relative",
                         choices=["relative", "absolute", "binary"]
                         )
-    parser.add_argument('-x', action='store', help="format (txt, xml or tei) WARNING: only txt is fully implemented",
+    parser.add_argument('-x', action='store', help="format (txt, xml, tei, or txm) WARNING: only txt is fully implemented",
                         default="txt",
-                        choices=["txt", "xml", "tei"]
+                        choices=["txt", "xml", "tei", 'txm']
                         )
     parser.add_argument('--sampling', action='store_true', help="Sample the texts?", default=False)
     parser.add_argument('--sample_units', action='store', help="Units of length for sampling "
diff --git a/superstyl/load.py b/superstyl/load.py
index 4528c6f6..fe52683e 100644
--- a/superstyl/load.py
+++ b/superstyl/load.py
@@ -14,10 +14,11 @@ def load_corpus(data_paths, feat_list=None, feats="words", n=1, k=5000, freqsTyp
     Main function to load a corpus from a collection of file, and an optional list of features to extract.
     :param data_paths: paths to the source files
     :param feat_list: an optional list of features (as created by load_corpus), default None
-    :param feats: the type of features, one of 'words', 'chars', 'affixes, and 'POS'. Affixes are inspired by
-    Sapkota et al. 2015, and include space_prefix, space_suffix, prefix, suffix, and, if keep_pos, punctuation n-grams.
-    POS are currently only implemented for Modern English
-    TODO: add met_line, met_syll
+    :param feats: the type of features, one of 'words', 'chars', 'affixes, 'lemma', 'pos', 'met_line' and 'met_syll'.
+    Affixes are inspired by Sapkota et al. 2015, and include space_prefix, space_suffix, prefix, suffix, and,
+    if keep_punct, punctuation n-grams. From TEI, pos, lemma, met_line or met_syll can
+    be extracted; met_line is the prosodic (stress) annotation of a full verse; met_syll is a char n-gram of prosodic
+    annotation
     :param n: n grams lengths (default 1)
     :param k: How many most frequent? The function takes the rank of k (if k is smaller than the total number of features),
     gets its frequencies, and only include features of superior or equal total frequencies.
@@ -45,6 +46,12 @@ def load_corpus(data_paths, feat_list=None, feats="words", n=1, k=5000, freqsTyp
     :return a pandas dataFrame of text metadata and feature frequencies; a global list of features with their frequencies
     """
 
+    if feats in ('lemma', 'pos', 'met_line', 'met_syll') and format != 'tei':
+        raise ValueError("lemma, pos, met_line or met_syll are only possible with adequate tei format (@lemma, @pos, @met)")
+
+    if feats in ('met_line', 'met_syll') and units != 'lines':
+        raise ValueError("met_line or met_syll are only possible with tei format that includes lines and @met")
+
     embeddedFreqs = False
     if embedding:
         print(".......loading embedding.......")
@@ -81,7 +88,6 @@ def load_corpus(data_paths, feat_list=None, feats="words", n=1, k=5000, freqsTyp
 
     my_feats = [m[0] for m in feat_list] # keeping only the features without the frequencies
     myTexts = fex.get_counts(myTexts, feat_list=my_feats, feats=feats, n=n, freqsType=freqsType)
-
     if embedding:
         print(".......embedding counts.......")
         myTexts, my_feats = embed.get_embedded_counts(myTexts, my_feats, model, topn=neighbouring_size)
diff --git a/superstyl/load_from_config.py b/superstyl/load_from_config.py
index 103dd853..4bc7cca4 100644
--- a/superstyl/load_from_config.py
+++ b/superstyl/load_from_config.py
@@ -1,12 +1,11 @@
 import json
-import superstyl
 import pandas as pd
 import os
 import glob
 
 from superstyl.load import load_corpus
 
-def load_corpus_from_config(config_path):
+def load_corpus_from_config(config_path, is_test=False):
     """
     Load a corpus based on a JSON configuration file.
     
@@ -55,7 +54,7 @@ def load_corpus_from_config(config_path):
     
     # Get sampling parameters
     sampling_params = config.get('sampling', {})
-    
+
     # Use the first feature to create the base corpus with sampling
     feature_configs = config.get('features', [])
     if not feature_configs:
@@ -87,9 +86,9 @@ def load_corpus_from_config(config_path):
             'sampling': sampling_params.get('enabled', False),
             'units': sampling_params.get('units', 'words'),
             'size': sampling_params.get('sample_size', 3000),
-            'step': sampling_params.get('sample_step', None),
+            'step': sampling_params.get('step', None),
             'max_samples': sampling_params.get('max_samples', None),
-            'samples_random': sampling_params.get('sample_random', False),
+            'samples_random': sampling_params.get('samples_random', False),
             'keep_punct': feature_config.get('keep_punct', False),
             'keep_sym': feature_config.get('keep_sym', False),
             'no_ascii': feature_config.get('no_ascii', False),
@@ -115,6 +114,7 @@ def load_corpus_from_config(config_path):
         # Check for feature list file
         feat_list = None
         feat_list_path = feature_config.get('feat_list')
+        print(feat_list_path)
         if feat_list_path:
             if feat_list_path.endswith('.json'):
                 with open(feat_list_path, 'r') as f:
@@ -133,9 +133,9 @@ def load_corpus_from_config(config_path):
             'sampling': sampling_params.get('enabled', False),
             'units': sampling_params.get('units', 'words'),
             'size': sampling_params.get('sample_size', 3000),
-            'step': sampling_params.get('sample_step', None),
+            'step': sampling_params.get('step', None),
             'max_samples': sampling_params.get('max_samples', None),
-            'samples_random': sampling_params.get('sample_random', False),
+            'samples_random': sampling_params.get('samples_random', False),
             'keep_punct': config.get('keep_punct', False),
             'keep_sym': config.get('keep_sym', False),
             'no_ascii': config.get('no_ascii', False),
@@ -146,11 +146,17 @@ def load_corpus_from_config(config_path):
         }
         
         print(f"Loading {feature_name}...")
+
         corpus, features = load_corpus(paths, feat_list=feat_list, **params)
         
         # Store corpus and features
         corpora[feature_name] = corpus
-        feature_lists[feature_name] = features
+
+        if feat_list is not None and is_test:
+            feature_lists[feature_name] = feat_list
+        else:
+            feature_lists[feature_name] = features
+        
     
     # Create a merged dataset
     print("Creating merged dataset...")
@@ -170,6 +176,8 @@ def load_corpus_from_config(config_path):
     
     # Add features from each corpus
     for name, corpus in corpora.items():
+        single_feature = []
+
         feature_cols = [col for col in corpus.columns if col not in ['author', 'lang']]
         
         # Rename columns to avoid duplicates
@@ -181,8 +189,9 @@ def load_corpus_from_config(config_path):
         
         # Add features to the combined list with prefixes
         for feature in feature_lists[name]:
-            all_features.append((f"{name}_{feature[0]}", feature[1]))
+            single_feature.append((feature[0], feature[1]))
     
+        all_features.append(single_feature)
     # Return the merged corpus and combined feature list
     return merged, all_features
 
diff --git a/superstyl/preproc/features_extract.py b/superstyl/preproc/features_extract.py
index fe6ba5c0..d04d8c12 100755
--- a/superstyl/preproc/features_extract.py
+++ b/superstyl/preproc/features_extract.py
@@ -10,7 +10,7 @@ def count_features(text, feats ="words", n = 1):
     Get feature counts from  a text (words, chars or POS n-grams, or affixes(+punct if keep_punct),
     following Sapkota et al., NAACL 2015
     :param text: the source text
-    :param feats: the type of feats: words, chars, POS (supported only for English), or affixes
+    :param feats: the type of features, one of 'words', 'chars', 'affixes, 'lemma', 'pos', 'met_line' and 'met_syll'.
     :param n: the length of n-grams
     :return: features absolute frequencies in text as a counter, and the total of frequencies
     """
@@ -20,9 +20,9 @@ def count_features(text, feats ="words", n = 1):
         raise ValueError("Text cannot be empty.")
     if n < 1 or not isinstance(n, int):
         raise ValueError("n must be a positive integer.")
-    if feats not in ["words", "chars", "affixes", "pos", "met_line", "met_syll"]:
-        raise ValueError("Unsupported feature type. Choose from 'words', 'chars', 'affixes', 'met_line', 'met_syll', or 'pos'.")
-    if feats == "words":
+    if feats not in ["words", "chars", "affixes", "lemma", "pos", "met_line", "met_syll"]:
+        raise ValueError("Unsupported feature type. Choose from 'words', 'chars', 'affixes', 'met_line', 'met_syll', 'lemma' or 'pos'.")
+    if feats in ("words", "lemma", "pos"):
         tokens = nltk.tokenize.wordpunct_tokenize(text)
         if n > 1:
             tokens = ["_".join(t) for t in list(nltk.ngrams(tokens, n))]
@@ -46,20 +46,6 @@ def count_features(text, feats ="words", n = 1):
                                 ]
         tokens = affs + space_affs_and_punct
 
-    #POS in english with NLTK - need to propose spacy later on
-    elif feats == "pos":
-        try:
-            nltk.data.find('taggers/averaged_perceptron_tagger_eng')
-        except:
-            nltk.download('averaged_perceptron_tagger_eng')
-        words = nltk.tokenize.wordpunct_tokenize(text)
-        pos_tags = [pos for word, pos in nltk.pos_tag(words)]
-        if n > 1:
-            tokens = ["_".join(t) for t in list(nltk.ngrams(pos_tags, n))]
-        else:
-            tokens = pos_tags
-        total = len(tokens)
-
     elif feats == "met_line":
         tokens = text.split()
         if n > 1:
@@ -73,7 +59,7 @@ def count_features(text, feats ="words", n = 1):
 
     #Adding an error message in case some distracted guy like me would enter something wrong:
     else:
-        raise ValueError("Unsupported feature type. Choose from 'words', 'chars', 'affixes', 'met_line', 'met_syll' or 'pos'.")
+        raise ValueError("Unsupported feature type. Choose from 'words', 'chars', 'affixes', 'met_line', 'met_syll', 'lemmas' or 'pos'.")
 
     counts = Counter()
     counts.update(tokens)
@@ -108,7 +94,7 @@ def get_feature_list(myTexts, feats="words", n=1, freqsType="relative"):
     """
     :param myTexts: a 'myTexts' object, containing documents to be processed
     :param feat_list: a list of features to be selected
-    :param feats: type of feats (words, chars, affixes, POS, met_line, or met_syll)
+    :param feats: the type of features, one of 'words', 'chars', 'affixes, 'lemma', 'pos', 'met_line' and 'met_syll'.
     :param freqsType: "relative", "absolute" or "binary" frequencies
     :param n: n-grams length
     :return: list of features, with total frequency
@@ -142,14 +128,12 @@ def get_doc_frequency(myTexts):
     return feats_doc_freq
 
 
-
-
 def get_counts(myTexts, feat_list=None, feats = "words", n = 1, freqsType = "relative"):
     """
     Get counts for a collection of texts
     :param myTexts: the document collection
     :param feat_list: a list of features to be selected (None for all)
-    :param feats: the type of feats (words, chars, affixes, POS)
+    :param feats: the type of features, one of 'words', 'chars', 'affixes, 'lemma', 'pos', 'met_line' and 'met_syll'.
     :param n: the length of n-grams
     :param freqsType: relative, absolute or binarised freqs
     :return: the collection with, for each text, a 'wordCounts' dictionary
diff --git a/superstyl/preproc/pipe.py b/superstyl/preproc/pipe.py
index c86949da..7c675b68 100755
--- a/superstyl/preproc/pipe.py
+++ b/superstyl/preproc/pipe.py
@@ -1,5 +1,3 @@
-import unicodedata
-
 from lxml import etree
 import regex as re
 import unidecode
@@ -52,6 +50,153 @@ def XML_to_text(path):
         return aut, re.sub(r"\s+", " ", str(myxsl(my_doc)))
 
 
+def txm_to_units(path, units="lines", feats="words"):
+    """
+    Extract units from TXM file
+    :param path: path to TXM file
+    :param units: units to extract ("lines"/"verses" or "words")
+    :param feats: features to extract ("words", "lemma", or "pos")
+    :return: list of extracted units
+    """
+    myxsl = etree.XML('''<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+    xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:txm="http://textometrie.org/1.0" 
+    version="1.0">
+
+    <xsl:output method="text"/>
+    <xsl:param name="units"></xsl:param>
+    <xsl:param name="feats"></xsl:param>
+
+    <xsl:template match="/">
+        <xsl:choose>
+            <xsl:when test="$units = 'verses'">
+                <xsl:apply-templates select="descendant::tei:l"/>
+            </xsl:when>
+            <xsl:when test="$units = 'words'">
+                <xsl:apply-templates select="descendant::tei:w"/>
+            </xsl:when>
+        </xsl:choose>
+    </xsl:template>
+
+    <xsl:template match="tei:l">
+        <xsl:apply-templates select="descendant::tei:w[
+            not(txm:ana[@type='#frpos'] = 'NOMpro')
+            ]"/>
+        <xsl:text>&#xA;</xsl:text>
+    </xsl:template>
+
+    <xsl:template match="tei:w">
+        <xsl:text> </xsl:text>
+        <xsl:choose>
+            <xsl:when test="$feats = 'lemma'">
+                <xsl:value-of select="txm:lemma"/>
+            </xsl:when>
+            <xsl:when test="$feats = 'pos'">
+                <xsl:value-of select="txm:ana[@type='#frpos']"/>
+            </xsl:when>
+            <xsl:otherwise>
+                <xsl:apply-templates select="txm:form"/>
+            </xsl:otherwise>
+        </xsl:choose>
+        <xsl:if test="$units = 'words'">
+            <!-- Then one word per line -->
+            <xsl:text>&#xA;</xsl:text>
+        </xsl:if>
+    </xsl:template>
+
+</xsl:stylesheet>''')
+    myxsl = etree.XSLT(myxsl)
+
+    with open(path, 'r') as f:
+        my_doc = etree.parse(f)
+
+    units_tokens = str(myxsl(my_doc, units=etree.XSLT.strparam(units), feats=etree.XSLT.strparam(feats))).splitlines()
+    return units_tokens
+
+def tei_to_units(path, feats="words", units="lines"):
+
+    if feats in ["met_syll", "met_line"]:
+        feats = "met"
+    myxsl = etree.XML('''<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+        xmlns:tei="http://www.tei-c.org/ns/1.0"  
+        version="1.0">
+
+        <xsl:output method="text"/>
+
+        <xsl:param name="units"></xsl:param>
+        <xsl:param name="feats"></xsl:param>
+        <xsl:param name="keep_punct"></xsl:param>
+
+        <xsl:template match="/">
+            <xsl:choose>
+                <xsl:when test="$units = 'verses'">
+                    <xsl:apply-templates select="descendant::tei:l"/>
+                </xsl:when>
+                <xsl:when test="$units = 'words'">
+                    <xsl:apply-templates select="descendant::tei:w"/>
+                </xsl:when>
+            </xsl:choose>
+        </xsl:template>
+
+        <xsl:template match="tei:l">
+            <xsl:choose>
+                <xsl:when test="$feats = 'met'">
+                        <xsl:choose>
+                            <xsl:when test="$keep_punct = 'true'">
+                                <xsl:value-of select="@met"/>
+                            </xsl:when>
+                            <xsl:otherwise>
+                                <xsl:value-of select="translate(@met, '.', '')"/>
+                            </xsl:otherwise>
+                        </xsl:choose>
+                </xsl:when>
+                <xsl:otherwise>
+                    <xsl:apply-templates select="descendant::tei:w"/>
+                </xsl:otherwise>
+            </xsl:choose>
+            <xsl:text>&#xA;</xsl:text>
+        </xsl:template>
+
+        <xsl:template match="tei:w">
+            <xsl:text> </xsl:text>
+            <xsl:choose>
+                <xsl:when test="$feats = 'met'">
+                    <xsl:value-of select="@met"/>
+                </xsl:when>
+                <xsl:when test="$feats = 'lemma'">
+                    <xsl:value-of select="@lemma"/>
+                </xsl:when>
+                <xsl:when test="$feats = 'pos'">
+                    <xsl:value-of select="@pos"/>
+                </xsl:when>
+                <xsl:otherwise>
+                    <xsl:apply-templates/>
+                </xsl:otherwise>
+            </xsl:choose>
+            <xsl:if test="$units = 'words'">
+                <!-- Then one word per line -->
+                <xsl:text>&#xA;</xsl:text>
+            </xsl:if>
+        </xsl:template>
+
+    </xsl:stylesheet>''')
+    myxsl = etree.XSLT(myxsl)
+
+    with open(path, 'r') as f:
+        my_doc = etree.parse(f)
+
+    units_tokens = str(myxsl(my_doc, units=etree.XSLT.strparam(units), feats=etree.XSLT.strparam(feats))).splitlines()
+    return units_tokens
+
+def specialXML_to_text(path, format="tei", feats="words"):
+    aut = path.split('/')[-1].split("_")[0]
+    if format=="tei":
+        units_tokens = tei_to_units(path, feats=feats, units="words")
+
+    if format=="txm":
+        units_tokens = txm_to_units(path, feats=feats, units="words")
+
+    return aut, re.sub(r"\s+", " ", str(' '.join(units_tokens)))
+
 def TXT_to_text(path):
     """
     Get main text from xml file
@@ -147,7 +292,7 @@ def load_texts(paths, identify_lang=False, feats="words", format="txt", keep_pun
     Loads a collection of documents into a 'myTexts' object for further processing.
     TODO: a proper class
     :param paths: path to docs
-    TODO: add feats!
+    :param feats: the type of features, one of 'words', 'chars', 'affixes, 'lemma', 'pos', 'met_line' and 'met_syll'.
     :param identify_lang: whether or not try to identify lang (default: False)
     :param format: format of the source files (implemented values: txt [default], xml)
     :param keep_punct: whether or not to keep punctuation and caps.
@@ -165,6 +310,9 @@ def load_texts(paths, identify_lang=False, feats="words", format="txt", keep_pun
         if format=='xml':
             aut, text = XML_to_text(path)
 
+        if format in ('tei', 'txm'):
+            aut, text = specialXML_to_text(path, format=format, feats=feats)
+
         else:
             aut, text = TXT_to_text(path)
 
@@ -198,7 +346,7 @@ def get_samples(path, size, step=None, samples_random=False, max_samples=10,
     :param max_samples: maximum number of samples per author/clas
     :param units: the units to use, one of "words" or "verses"
     :param format: type of document, one of full text, TEI or simple XML (ONLY TEI and TXT IMPLEMENTED)
-    :param feats: the type of features,  TODO: document
+    :param feats: the type of features, one of 'words', 'chars', 'affixes, 'lemma', 'pos', 'met_line' and 'met_syll'.
     """
 
     if samples_random and step is not None:
@@ -213,127 +361,26 @@ def get_samples(path, size, step=None, samples_random=False, max_samples=10,
     if units == "words" and format == "txt":
         my_doc = TXT_to_text(path)
         text = normalise(my_doc[1], keep_punct=keep_punct, keep_sym=keep_sym, no_ascii=no_ascii)
-        units = nltk.tokenize.wordpunct_tokenize(text)
+        units_tokens = nltk.tokenize.wordpunct_tokenize(text)
 
-    #TODO: DOCUMENT this format as TXM, and keep it only for retrocompatibility
+    #Kept only for retrocompatibility with Psysché
     if units == "verses" and format == "txm":
-        myxsl = etree.XML('''<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-        xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:txm="http://textometrie.org/1.0" 
-        version="1.0">
-
-        <xsl:output method="text"/>
-
-        <xsl:template match="/">
-            <xsl:apply-templates select="descendant::tei:l"/>
-        </xsl:template>
-
-        <xsl:template match="tei:l">
-            <xsl:apply-templates select="descendant::tei:w[
-                not(txm:ana[@type='#frpos'] = 'NOMpro')
-                ]"/>
-            <xsl:text>&#xA;</xsl:text>
-        </xsl:template>
-
-        <xsl:template match="tei:w">
-            <xsl:text> </xsl:text>
-            <xsl:apply-templates select="txm:form"/>
-        </xsl:template>
-
-    </xsl:stylesheet>''')
-        myxsl = etree.XSLT(myxsl)
-
-        with open(path, 'r') as f:
-            my_doc = etree.parse(f)
-
-        units = str(myxsl(my_doc)).splitlines()
-
-    # and now generating output
-    samples = []
+        units_tokens = txm_to_units(path, units=units)
 
     if format == "tei":
-        if feats in ["met_syll", "met_line"]:
-            feats = "met"
-        myxsl = etree.XML('''<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-    xmlns:tei="http://www.tei-c.org/ns/1.0"  
-    version="1.0">
-    
-    <xsl:output method="text"/>
-    
-    <xsl:param name="units"></xsl:param>
-    <xsl:param name="feats"></xsl:param>
-    <xsl:param name="keep_punct"></xsl:param>
-    
-    <xsl:template match="/">
-        <xsl:choose>
-            <xsl:when test="$units = 'verses'">
-                <xsl:apply-templates select="descendant::tei:l"/>
-            </xsl:when>
-            <xsl:when test="$units = 'words'">
-                <xsl:apply-templates select="descendant::tei:w"/>
-            </xsl:when>
-        </xsl:choose>
-    </xsl:template>
-    
-    <xsl:template match="tei:l">
-        <xsl:choose>
-            <xsl:when test="$feats = 'met'">
-                    <xsl:choose>
-                        <xsl:when test="$keep_punct = 'true'">
-                            <xsl:value-of select="@met"/>
-                        </xsl:when>
-                        <xsl:otherwise>
-                            <xsl:value-of select="translate(@met, '.', '')"/>
-                        </xsl:otherwise>
-                    </xsl:choose>
-            </xsl:when>
-            <xsl:otherwise>
-                <xsl:apply-templates select="descendant::tei:w"/>
-            </xsl:otherwise>
-        </xsl:choose>
-        <xsl:text>&#xA;</xsl:text>
-    </xsl:template>
-    
-    <xsl:template match="tei:w">
-        <xsl:text> </xsl:text>
-        <xsl:choose>
-            <xsl:when test="$feats = 'met'">
-                <xsl:value-of select="@met"/>
-            </xsl:when>
-            <xsl:when test="$feats = 'lemma'">
-                <xsl:value-of select="@lemma"/>
-            </xsl:when>
-            <xsl:when test="$feats = 'pos'">
-                <xsl:value-of select="@pos"/>
-            </xsl:when>
-            <xsl:otherwise>
-                <xsl:apply-templates/>
-            </xsl:otherwise>
-        </xsl:choose>
-        <xsl:if test="$units = 'words'">
-            <!-- Then one word per line -->
-            <xsl:text>&#xA;</xsl:text>
-        </xsl:if>
-    </xsl:template>
-    
-</xsl:stylesheet>''')
-        myxsl = etree.XSLT(myxsl)
-
-        with open(path, 'r') as f:
-            my_doc = etree.parse(f)
-
-        units = str(myxsl(my_doc, units=etree.XSLT.strparam(units), feats=etree.XSLT.strparam(feats))).splitlines()
+        units_tokens = tei_to_units(path, units=units, feats=feats)
 
     # and now generating output
     samples = []
 
     if samples_random:
         for k in range(max_samples):
-            samples.append({"start": str(k)+'s', "end": str(k)+'e', "text": list(random.choices(units, k=size))})
+            samples.append({"start": str(k)+'s', "end": str(k)+'e', "text": list(random.choices(units_tokens, k=size))})
 
     else:
         current = 0
-        while current + size <= len(units):
-            samples.append({"start": current, "end": current + size, "text": list(units[current:(current + size)])})
+        while current + size <= len(units_tokens):
+            samples.append({"start": current, "end": current + size, "text": list(units_tokens[current:(current + size)])})
             current = current + step
 
     return samples
@@ -353,7 +400,7 @@ def docs_to_samples(paths, size, step=None, units="words", samples_random=False,
     :param keep_punct: whether to keep punctuation and caps.
     :param max_samples: maximum number of samples per author/class.
     :param identify_lang: whether to try to identify lang (default: False)
-    :param feats: TODO
+    :param feats: the type of features, one of 'words', 'chars', 'affixes, 'lemma', 'pos', 'met_line' and 'met_syll'.
     :return: a myTexts object
     """
     myTexts = []
diff --git a/superstyl/svm.py b/superstyl/svm.py
index 204987a5..abceb823 100755
--- a/superstyl/svm.py
+++ b/superstyl/svm.py
@@ -288,13 +288,13 @@ def plot_coefficients(coefs, feature_names, current_class, top_features=10):
 
 
 
-def plot_rolling(final_predictions, smoothing=3):
+def plot_rolling(final_predictions, smoothing=3, xlab = "Index (segment center)"):
     """
     Plots the rolling stylometry results as lines of decision function values over the text.
     
     Parameters:
-    final_predictions_path : str
-        Path to the CSV file containing final predictions generated by the SVM pipeline.
+    final_predictions : Pandas dataframe containing the final predictions out of train_svm
+        .
     
     smoothing : int or None
         The window size for smoothing the curves.
@@ -304,24 +304,25 @@ def plot_rolling(final_predictions, smoothing=3):
     """
 
     # Extract the segment center from the filename
+    my_final_predictions = final_predictions.copy() # to avoid modifying in place
     segment_centers = []
-    for fname in final_predictions['filename']:
+    for fname in my_final_predictions['filename']:
         parts = fname.split('_')[-1].split('-')
         start = int(parts[0])
         end = int(parts[1])
         center = (start + end) / 2.0
         segment_centers.append(center)
 
-    final_predictions['segment_center'] = segment_centers
+    my_final_predictions['segment_center'] = segment_centers
 
-    final_predictions['filename'] = [fname.split('_')[1] for fname in final_predictions['filename']]
+    my_final_predictions['filename'] = [fname.split('_')[1] for fname in my_final_predictions['filename']]
     
     # Identify candidate columns
     known_cols = {'filename', 'author', 'segment_center'}
-    candidate_cols = [c for c in final_predictions.columns if c not in known_cols]
+    candidate_cols = [c for c in my_final_predictions.columns if c not in known_cols]
 
-    for work in final_predictions['filename'].unique():
-        fpreds_work = final_predictions[final_predictions['filename'] == work]
+    for work in my_final_predictions['filename'].unique():
+        fpreds_work = my_final_predictions[my_final_predictions['filename'] == work]
         # Sort by segment center to ensure chronological order
         fpreds_work = fpreds_work.sort_values('segment_center')
 
@@ -336,7 +337,7 @@ def plot_rolling(final_predictions, smoothing=3):
             plt.plot(fpreds_work['segment_center'], fpreds_work[col], label=col, linewidth=2)
 
         plt.title('Rolling Stylometry Decision Functions Over ' + work)
-        plt.xlabel('Word index (segment center)')
+        plt.xlabel(xlab)
         plt.ylabel('Decision Function Value')
         plt.ylim(min(-2, min(fpreds_work[candidate_cols].min()) - 0.2),
                  max(1, max(fpreds_work[candidate_cols].max())) + 0.2)
diff --git a/tests/test_error_handling.py b/tests/test_error_handling.py
new file mode 100644
index 00000000..cc37116a
--- /dev/null
+++ b/tests/test_error_handling.py
@@ -0,0 +1,320 @@
+import unittest
+import superstyl.load
+import superstyl.preproc.features_extract
+from superstyl.load_from_config import load_corpus_from_config
+import os
+import tempfile
+import json
+import glob
+
+THIS_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+class ErrorHandlingTests(unittest.TestCase):
+    """Tests for error handling and ValueError raising"""
+    
+    def setUp(self):
+        """Set up test files paths"""
+        self.test_paths = sorted(glob.glob(os.path.join(THIS_DIR, "testdata/*.txt")))
+        self.temp_dir = tempfile.TemporaryDirectory()
+    
+    def tearDown(self):
+        """Clean up temporary directory"""
+        self.temp_dir.cleanup()
+    
+    # =========================================================================
+    # Tests pour load.py - ValueError pour formats incompatibles
+    # =========================================================================
+    
+    def test_load_corpus_lemma_requires_tei(self):
+        # SCENARIO: lemma features require TEI format
+        # GIVEN: Attempting to use lemma with non-TEI format
+        
+        # WHEN/THEN: Should raise ValueError
+        with self.assertRaises(ValueError) as context:
+            superstyl.load.load_corpus(
+                self.test_paths,
+                feats="lemma",
+                format="txt"
+            )
+        
+        self.assertIn("lemma", str(context.exception))
+        self.assertIn("tei", str(context.exception).lower())
+    
+    def test_load_corpus_pos_requires_tei(self):
+        # SCENARIO: pos features require TEI format
+        # GIVEN: Attempting to use pos with non-TEI format
+        
+        # WHEN/THEN: Should raise ValueError
+        with self.assertRaises(ValueError) as context:
+            superstyl.load.load_corpus(
+                self.test_paths,
+                feats="pos",
+                format="txt"
+            )
+        
+        self.assertIn("pos", str(context.exception))
+        self.assertIn("tei", str(context.exception).lower())
+    
+    def test_load_corpus_met_line_requires_tei(self):
+        # SCENARIO: met_line features require TEI format
+        # GIVEN: Attempting to use met_line with non-TEI format
+        
+        # WHEN/THEN: Should raise ValueError
+        with self.assertRaises(ValueError) as context:
+            superstyl.load.load_corpus(
+                self.test_paths,
+                feats="met_line",
+                format="txt"
+            )
+        
+        self.assertIn("met_line", str(context.exception))
+        self.assertIn("tei", str(context.exception).lower())
+    
+    def test_load_corpus_met_syll_requires_tei(self):
+        # SCENARIO: met_syll features require TEI format
+        # GIVEN: Attempting to use met_syll with non-TEI format
+        
+        # WHEN/THEN: Should raise ValueError
+        with self.assertRaises(ValueError) as context:
+            superstyl.load.load_corpus(
+                self.test_paths,
+                feats="met_syll",
+                format="txt"
+            )
+        
+        self.assertIn("met_syll", str(context.exception))
+        self.assertIn("tei", str(context.exception).lower())
+    
+    def test_load_corpus_met_line_requires_lines_unit(self):
+        # SCENARIO: met_line requires units='lines'
+        # GIVEN: Attempting to use met_line with units='words'
+        
+        # Create a dummy TEI file for this test
+        tei_path = os.path.join(self.temp_dir.name, "test_met.xml")
+        with open(tei_path, 'w') as f:
+            f.write('<?xml version="1.0"?><TEI xmlns="http://www.tei-c.org/ns/1.0"><text><body><l met="01">test</l></body></text></TEI>')
+        
+        # WHEN/THEN: Should raise ValueError
+        with self.assertRaises(ValueError) as context:
+            superstyl.load.load_corpus(
+                [tei_path],
+                feats="met_line",
+                format="tei",
+                units="words"  # Wrong unit type
+            )
+        
+        self.assertIn("met_line", str(context.exception))
+        self.assertIn("lines", str(context.exception))
+    
+    def test_load_corpus_met_syll_requires_lines_unit(self):
+        # SCENARIO: met_syll requires units='lines'
+        # GIVEN: Attempting to use met_syll with units='words'
+        
+        # Create a dummy TEI file for this test
+        tei_path = os.path.join(self.temp_dir.name, "test_met2.xml")
+        with open(tei_path, 'w') as f:
+            f.write('<?xml version="1.0"?><TEI xmlns="http://www.tei-c.org/ns/1.0"><text><body><l met="01">test</l></body></text></TEI>')
+        
+        # WHEN/THEN: Should raise ValueError
+        with self.assertRaises(ValueError) as context:
+            superstyl.load.load_corpus(
+                [tei_path],
+                feats="met_syll",
+                format="tei",
+                units="words"  # Wrong unit type
+            )
+        
+        self.assertIn("met_syll", str(context.exception))
+        self.assertIn("lines", str(context.exception))
+    
+    # =========================================================================
+    # Tests pour features_extract.py - ValueError pour paramètres invalides
+    # =========================================================================
+    
+    def test_count_features_empty_text(self):
+        # SCENARIO: Empty text should raise ValueError
+        # GIVEN: An empty string as text
+        
+        # WHEN/THEN: Should raise ValueError
+        with self.assertRaises(ValueError) as context:
+            superstyl.preproc.features_extract.count_features(
+                "",  # Empty text
+                feats="words",
+                n=1
+            )
+        
+        self.assertIn("empty", str(context.exception).lower())
+    
+    def test_count_features_invalid_n_zero(self):
+        # SCENARIO: n must be positive
+        # GIVEN: n=0
+        
+        # WHEN/THEN: Should raise ValueError
+        with self.assertRaises(ValueError) as context:
+            superstyl.preproc.features_extract.count_features(
+                "test text",
+                feats="words",
+                n=0  # Invalid n
+            )
+        
+        self.assertIn("positive", str(context.exception).lower())
+    
+    def test_count_features_invalid_n_negative(self):
+        # SCENARIO: n must be positive
+        # GIVEN: n=-1
+        
+        # WHEN/THEN: Should raise ValueError
+        with self.assertRaises(ValueError) as context:
+            superstyl.preproc.features_extract.count_features(
+                "test text",
+                feats="words",
+                n=-1  # Invalid n
+            )
+        
+        self.assertIn("positive", str(context.exception).lower())
+    
+    def test_count_features_invalid_n_not_integer(self):
+        # SCENARIO: n must be an integer
+        # GIVEN: n=1.5 (float)
+        
+        # WHEN/THEN: Should raise ValueError
+        with self.assertRaises(ValueError) as context:
+            superstyl.preproc.features_extract.count_features(
+                "test text",
+                feats="words",
+                n=1.5  # Not an integer
+            )
+        
+        self.assertIn("integer", str(context.exception).lower())
+    
+    def test_count_features_invalid_not_string(self):
+        # SCENARIO: text must be a string
+        # GIVEN: text is not a string (e.g., None)
+        
+        # WHEN/THEN: Should raise ValueError
+        with self.assertRaises(ValueError) as context:
+            superstyl.preproc.features_extract.count_features(
+                None,  # Not a string
+                feats="words",
+                n=1
+            )
+        
+        self.assertIn("string", str(context.exception).lower())
+    
+    def test_count_features_unsupported_feats_type(self):
+        # SCENARIO: feats must be a supported type
+        # GIVEN: An unsupported feats type
+        
+        # WHEN/THEN: Should raise ValueError
+        with self.assertRaises(ValueError) as context:
+            superstyl.preproc.features_extract.count_features(
+                "test text",
+                feats="unsupported_type",  # Invalid feats type
+                n=1
+            )
+        
+        self.assertIn("Unsupported", str(context.exception))
+    
+    def test_get_counts_invalid_frequency_type(self):
+        # SCENARIO: freqsType must be valid
+        # GIVEN: An unsupported frequency type
+        
+        myTexts = [{"name": "test", "text": "test text"}]
+        
+        # WHEN/THEN: Should raise ValueError
+        with self.assertRaises(ValueError) as context:
+            superstyl.preproc.features_extract.get_counts(
+                myTexts,
+                feats="words",
+                freqsType="invalid_type"  # Invalid frequency type
+            )
+        
+        self.assertIn("Unsupported frequency type", str(context.exception))
+    
+    # =========================================================================
+    # Tests pour load_from_config.py - Branches non couvertes
+    # =========================================================================
+    
+    def test_load_from_config_with_json_feature_list(self):
+        # SCENARIO: Load corpus with JSON feature list (ligne 119)
+        # GIVEN: A config with a JSON feature list
+        
+        # Create a JSON feature list
+        feature_list = [["the", 0], ["is", 0]]
+        feature_list_path = os.path.join(self.temp_dir.name, "features.json")
+        with open(feature_list_path, 'w') as f:
+            json.dump(feature_list, f)
+        
+        # Create config
+        config = {
+            "paths": self.test_paths,
+            "format": "txt",
+            "features": [
+                {
+                    "name": "test_feature",
+                    "type": "words",
+                    "n": 1,
+                    "feat_list": feature_list_path  # JSON feature list
+                }
+            ]
+        }
+        
+        config_path = os.path.join(self.temp_dir.name, "config.json")
+        with open(config_path, 'w') as f:
+            json.dump(config, f)
+        
+        # WHEN: Loading corpus from config
+        corpus, features = load_corpus_from_config(config_path)
+        
+        # THEN: Should load successfully with JSON feature list
+        self.assertIsNotNone(corpus)
+        self.assertIsNotNone(features)
+    
+    def test_load_from_config_test_mode_uses_feat_list(self):
+        # SCENARIO: In test mode, use provided feat_list (ligne 156)
+        # GIVEN: A config with feat_list in test mode
+        
+        # Create a JSON feature list
+        feature_list = [["the", 0], ["is", 0], ["text", 0]]
+        feature_list_path = os.path.join(self.temp_dir.name, "test_features.json")
+        with open(feature_list_path, 'w') as f:
+            json.dump(feature_list, f)
+        
+        # Create config with multiple features (triggers is_test logic)
+        config = {
+            "paths": self.test_paths,
+            "format": "txt",
+            "features": [
+                {
+                    "name": "feat1",
+                    "type": "words",
+                    "n": 1,
+                    "feat_list": feature_list_path
+                },
+                {
+                    "name": "feat2",
+                    "type": "chars",
+                    "n": 2,
+                    "feat_list": feature_list_path
+                }
+            ]
+        }
+        
+        config_path = os.path.join(self.temp_dir.name, "multi_config.json")
+        with open(config_path, 'w') as f:
+            json.dump(config, f)
+        
+        # WHEN: Loading corpus from config
+        corpus, features = load_corpus_from_config(config_path, is_test=True)
+        
+        # THEN: Should use the provided feature list
+        self.assertIsNotNone(corpus)
+        self.assertIsNotNone(features)
+        # features should be a list of feature lists
+        self.assertIsInstance(features, list)
+        self.assertEqual(len(features), 2)  # Two feature sets
+
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file
diff --git a/tests/test_load_corpus.py b/tests/test_load_corpus.py
index 5de65d26..076aa199 100644
--- a/tests/test_load_corpus.py
+++ b/tests/test_load_corpus.py
@@ -286,22 +286,7 @@ def test_load_corpus(self):
         self.assertEqual(sorted(feats), sorted(expected_feats))
         self.assertEqual(corpus.to_dict(), expected_corpus)
 
-        # WHEN
-        corpus, feats = superstyl.load.load_corpus(sorted(self.paths[1:]), feats="pos", n=1, format="txt", freqsType="absolute")
-
-        # THEN
-        expected_feats = [('DT', 4), ('NN', 2), ('VBZ', 2), ('RB', 1)]
-        expected_corpus = {
-        'author': {'Smith_Letter1.txt': 'Smith', 'Smith_Letter2.txt': 'Smith'},
-        'lang': {'Smith_Letter1.txt': 'NA', 'Smith_Letter2.txt': 'NA'},
-        'DT': {'Smith_Letter1.txt': 2 , 'Smith_Letter2.txt': 2},
-        'NN': {'Smith_Letter1.txt': 1 , 'Smith_Letter2.txt': 1},  
-        'VBZ': {'Smith_Letter1.txt': 1, 'Smith_Letter2.txt': 1},
-        'RB': {'Smith_Letter1.txt': 0, 'Smith_Letter2.txt': 1}
-        }
-
-        self.assertEqual(sorted(feats), sorted(expected_feats))
-        self.assertEqual(corpus.to_dict(), expected_corpus)
+        # TODO: add tests for lemma, pos, met_line, met_syll, and loading from tei, and from txm
 
         # Now, test embedding
         # WHEN
diff --git a/tests/test_xml_loading.py b/tests/test_xml_loading.py
new file mode 100644
index 00000000..c874bbb8
--- /dev/null
+++ b/tests/test_xml_loading.py
@@ -0,0 +1,304 @@
+import unittest
+import superstyl.preproc.pipe
+import os
+import glob
+
+THIS_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+class XMLLoadingTests(unittest.TestCase):
+    """Tests for XML, TEI, and TXM file loading functions"""
+    
+    def setUp(self):
+        """Set up test files paths"""
+        self.xml_path = os.path.join(THIS_DIR, "testdata", "Smith_Song1.xml")
+        self.tei_path = os.path.join(THIS_DIR, "testdata", "Dupont_TEIPoem1.xml")
+        self.txm_path = os.path.join(THIS_DIR, "testdata", "Smith_TXM1.xml")
+    
+    def test_XML_to_text(self):
+        # SCENARIO: Load text from a simple XML file
+        # GIVEN: An XML file with author and text elements
+        
+        # WHEN: Loading the XML file
+        aut, text = superstyl.preproc.pipe.XML_to_text(self.xml_path)
+        
+        # THEN: Author and text are correctly extracted
+        self.assertEqual(aut, "Smith")
+        self.assertIn("test song", text)
+        self.assertIn("lyrics", text)
+        # Check that whitespace is normalized
+        self.assertNotIn("  ", text)
+    
+    def test_tei_to_units_words(self):
+        # SCENARIO: Extract words from a TEI file
+        # GIVEN: A TEI file with annotated words
+        
+        # WHEN: Extracting words as units
+        units_tokens = superstyl.preproc.pipe.tei_to_units(
+            self.tei_path, 
+            feats="words", 
+            units="words"
+        )
+        
+        # THEN: Words are extracted, one per line
+        self.assertIsInstance(units_tokens, list)
+        self.assertGreater(len(units_tokens), 0)
+        # Each word should be on a separate line
+        self.assertIn("This", [u.strip() for u in units_tokens])
+        self.assertIn("is", [u.strip() for u in units_tokens])
+    
+    def test_tei_to_units_verses(self):
+        # SCENARIO: Extract verses (lines) from a TEI file
+        # GIVEN: A TEI file with verse lines
+        
+        # WHEN: Extracting verses as units
+        units_tokens = superstyl.preproc.pipe.tei_to_units(
+            self.tei_path, 
+            feats="words", 
+            units="verses"
+        )
+        
+        # THEN: Each verse is on a separate line
+        self.assertIsInstance(units_tokens, list)
+        # We should have 2 lines in our test file
+        self.assertEqual(len(units_tokens), 2)
+    
+    def test_tei_to_units_lemma(self):
+        # SCENARIO: Extract lemmas from a TEI file
+        # GIVEN: A TEI file with lemma annotations
+        
+        # WHEN: Extracting lemmas
+        units_tokens = superstyl.preproc.pipe.tei_to_units(
+            self.tei_path, 
+            feats="lemma", 
+            units="words"
+        )
+        
+        # THEN: Lemmas are extracted
+        self.assertIsInstance(units_tokens, list)
+        self.assertIn("this", [u.strip() for u in units_tokens])
+        self.assertIn("be", [u.strip() for u in units_tokens])
+    
+    def test_tei_to_units_pos(self):
+        # SCENARIO: Extract POS tags from a TEI file
+        # GIVEN: A TEI file with POS annotations
+        
+        # WHEN: Extracting POS tags
+        units_tokens = superstyl.preproc.pipe.tei_to_units(
+            self.tei_path, 
+            feats="pos", 
+            units="words"
+        )
+        
+        # THEN: POS tags are extracted
+        self.assertIsInstance(units_tokens, list)
+        self.assertIn("DET", [u.strip() for u in units_tokens])
+        self.assertIn("VERB", [u.strip() for u in units_tokens])
+    
+    def test_tei_to_units_met_syll(self):
+        # SCENARIO: Extract metrical syllables from a TEI file
+        # GIVEN: A TEI file with metrical annotations
+        
+        # WHEN: Extracting metrical syllables with met_syll feature
+        units_tokens = superstyl.preproc.pipe.tei_to_units(
+            self.tei_path, 
+            feats="met_syll", 
+            units="verses"
+        )
+        
+        # THEN: Metrical annotations are extracted
+        self.assertIsInstance(units_tokens, list)
+        # The @met attributes should be present
+        self.assertGreater(len(units_tokens), 0)
+    
+    def test_tei_to_units_met_line(self):
+        # SCENARIO: Extract metrical lines from a TEI file
+        # GIVEN: A TEI file with metrical annotations on lines
+        
+        # WHEN: Extracting metrical patterns at line level
+        units_tokens = superstyl.preproc.pipe.tei_to_units(
+            self.tei_path, 
+            feats="met_line", 
+            units="verses"
+        )
+        
+        # THEN: Metrical patterns for each line are extracted
+        self.assertIsInstance(units_tokens, list)
+        self.assertEqual(len(units_tokens), 2)
+        # Should contain the metrical patterns
+        self.assertIn("01010101", units_tokens[0])
+        self.assertIn("10101010", units_tokens[1])
+    
+    def test_txm_to_units_words(self):
+        # SCENARIO: Extract words from a TXM file
+        # GIVEN: A TXM file with annotated words
+        
+        # WHEN: Extracting words as units
+        units_tokens = superstyl.preproc.pipe.txm_to_units(
+            self.txm_path, 
+            units="words"
+        )
+        
+        # THEN: Words are extracted
+        # Note: When extracting individual words (units='words'), 
+        # the NOMpro filter is not applied
+        self.assertIsInstance(units_tokens, list)
+        self.assertGreater(len(units_tokens), 0)
+        text_content = ' '.join(units_tokens)
+        # All words should be present including those with NOMpro
+        self.assertIn("This", text_content)
+        self.assertIn("test", text_content)
+    
+    def test_txm_to_units_verses(self):
+        # SCENARIO: Extract verses from a TXM file
+        # GIVEN: A TXM file with verse lines
+        
+        # WHEN: Extracting verses as units
+        units_tokens = superstyl.preproc.pipe.txm_to_units(
+            self.txm_path, 
+            units="verses"
+        )
+        
+        # THEN: Each verse is extracted and NOMpro words are filtered out
+        self.assertIsInstance(units_tokens, list)
+        self.assertEqual(len(units_tokens), 2)
+        # Check that NOMpro words are excluded in verse mode
+        text_content = ' '.join(units_tokens)
+        self.assertNotIn("here", text_content)  # "here" has NOMpro tag and should be filtered
+        self.assertIn("This", text_content)  # Regular words should be present
+    
+    def test_txm_to_units_lemma(self):
+        # SCENARIO: Extract lemmas from a TXM file
+        # GIVEN: A TXM file with lemma annotations
+        
+        # WHEN: Extracting lemmas
+        units_tokens = superstyl.preproc.pipe.txm_to_units(
+            self.txm_path,
+            units="words",
+            feats="lemma"
+        )
+        
+        # THEN: Lemmas are extracted
+        self.assertIsInstance(units_tokens, list)
+        self.assertIn("be", [u.strip() for u in units_tokens])  # lemma of "is"
+        self.assertIn("this", [u.strip() for u in units_tokens])
+    
+    def test_txm_to_units_pos(self):
+        # SCENARIO: Extract POS tags from a TXM file
+        # GIVEN: A TXM file with POS annotations
+        
+        # WHEN: Extracting POS tags
+        units_tokens = superstyl.preproc.pipe.txm_to_units(
+            self.txm_path,
+            units="words",
+            feats="pos"
+        )
+        
+        # THEN: POS tags are extracted
+        self.assertIsInstance(units_tokens, list)
+        self.assertIn("DET", [u.strip() for u in units_tokens])
+        self.assertIn("VERB", [u.strip() for u in units_tokens])
+    
+    def test_specialXML_to_text_tei(self):
+        # SCENARIO: Load text from a TEI file using specialXML_to_text
+        # GIVEN: A TEI format file
+        
+        # WHEN: Loading with format="tei"
+        aut, text = superstyl.preproc.pipe.specialXML_to_text(
+            self.tei_path, 
+            format="tei", 
+            feats="words"
+        )
+        
+        # THEN: Author is extracted from filename and text is normalized
+        self.assertEqual(aut, "Dupont")
+        self.assertIsInstance(text, str)
+        self.assertGreater(len(text), 0)
+        # Check that whitespace is normalized (single spaces)
+        self.assertNotIn("  ", text)
+    
+    def test_specialXML_to_text_txm(self):
+        # SCENARIO: Load text from a TXM file using specialXML_to_text
+        # GIVEN: A TXM format file
+        
+        # WHEN: Loading with format="txm"
+        aut, text = superstyl.preproc.pipe.specialXML_to_text(
+            self.txm_path, 
+            format="txm", 
+            feats="words"
+        )
+        
+        # THEN: Author is extracted from filename and text is normalized
+        self.assertEqual(aut, "Smith")
+        self.assertIsInstance(text, str)
+        self.assertGreater(len(text), 0)
+        # Text should contain words from the TXM file
+        self.assertIn("test", text.lower())
+    
+    def test_specialXML_to_text_with_lemma(self):
+        # SCENARIO: Load lemmas from a TEI file
+        # GIVEN: A TEI file with lemma annotations
+        
+        # WHEN: Loading with feats="lemma"
+        aut, text = superstyl.preproc.pipe.specialXML_to_text(
+            self.tei_path, 
+            format="tei", 
+            feats="lemma"
+        )
+        
+        # THEN: Lemmas are in the text
+        self.assertEqual(aut, "Dupont")
+        self.assertIn("be", text)  # lemma of "is"
+        self.assertIn("this", text)
+    
+    def test_specialXML_to_text_with_pos(self):
+        # SCENARIO: Load POS tags from a TEI file
+        # GIVEN: A TEI file with POS annotations
+        
+        # WHEN: Loading with feats="pos"
+        aut, text = superstyl.preproc.pipe.specialXML_to_text(
+            self.tei_path, 
+            format="tei", 
+            feats="pos"
+        )
+        
+        # THEN: POS tags are in the text
+        self.assertEqual(aut, "Dupont")
+        self.assertIn("DET", text)
+        self.assertIn("VERB", text)
+    
+    def test_specialXML_to_text_txm_with_lemma(self):
+        # SCENARIO: Load lemmas from a TXM file
+        # GIVEN: A TXM file with lemma annotations
+        
+        # WHEN: Loading with feats="lemma"
+        aut, text = superstyl.preproc.pipe.specialXML_to_text(
+            self.txm_path,
+            format="txm",
+            feats="lemma"
+        )
+        
+        # THEN: Lemmas are in the text
+        self.assertEqual(aut, "Smith")
+        self.assertIn("be", text)  # lemma of "is"
+        self.assertIn("this", text)
+    
+    def test_specialXML_to_text_txm_with_pos(self):
+        # SCENARIO: Load POS tags from a TXM file
+        # GIVEN: A TXM file with POS annotations
+        
+        # WHEN: Loading with feats="pos"
+        aut, text = superstyl.preproc.pipe.specialXML_to_text(
+            self.txm_path,
+            format="txm",
+            feats="pos"
+        )
+        
+        # THEN: POS tags are in the text
+        self.assertEqual(aut, "Smith")
+        self.assertIn("DET", text)
+        self.assertIn("VERB", text)
+
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file
diff --git a/tests/testdata/Dupont_TEIPoem1.xml b/tests/testdata/Dupont_TEIPoem1.xml
new file mode 100644
index 00000000..a737babd
--- /dev/null
+++ b/tests/testdata/Dupont_TEIPoem1.xml
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<TEI xmlns="http://www.tei-c.org/ns/1.0">
+    <teiHeader>
+        <fileDesc>
+            <titleStmt>
+                <title>Test Poem</title>
+            </titleStmt>
+        </fileDesc>
+    </teiHeader>
+    <text>
+        <body>
+            <lg>
+                <l met="01010101">
+                    <w lemma="this" pos="DET">This</w>
+                    <w lemma="be" pos="VERB">is</w>
+                    <w lemma="the" pos="DET">the</w>
+                    <w lemma="first" pos="ADJ">first</w>
+                    <w lemma="line" pos="NOUN">line</w>
+                </l>
+                <l met="10101010">
+                    <w lemma="and" pos="CONJ">And</w>
+                    <w lemma="this" pos="DET">this</w>
+                    <w lemma="be" pos="VERB">is</w>
+                    <w lemma="the" pos="DET">the</w>
+                    <w lemma="second" pos="ADJ">second</w>
+                </l>
+            </lg>
+        </body>
+    </text>
+</TEI>
\ No newline at end of file
diff --git a/tests/testdata/Smith_Song1.xml b/tests/testdata/Smith_Song1.xml
new file mode 100644
index 00000000..d713cc96
--- /dev/null
+++ b/tests/testdata/Smith_Song1.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<song>
+    <author>Smith</author>
+    <text>This is a test song with some lyrics</text>
+</song>
\ No newline at end of file
diff --git a/tests/testdata/Smith_TXM1.xml b/tests/testdata/Smith_TXM1.xml
new file mode 100644
index 00000000..0411bf57
--- /dev/null
+++ b/tests/testdata/Smith_TXM1.xml
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:txm="http://textometrie.org/1.0">
+    <teiHeader>
+        <fileDesc>
+            <titleStmt>
+                <title>Test TXM Text</title>
+            </titleStmt>
+        </fileDesc>
+    </teiHeader>
+    <text>
+        <body>
+            <lg>
+                <l>
+                    <w>
+                        <txm:form>This</txm:form>
+                        <txm:lemma>this</txm:lemma>
+                        <txm:ana type="#frpos">DET</txm:ana>
+                    </w>
+                    <w>
+                        <txm:form>is</txm:form>
+                        <txm:lemma>be</txm:lemma>
+                        <txm:ana type="#frpos">VERB</txm:ana>
+                    </w>
+                    <w>
+                        <txm:form>a</txm:form>
+                        <txm:lemma>a</txm:lemma>
+                        <txm:ana type="#frpos">DET</txm:ana>
+                    </w>
+                    <w>
+                        <txm:form>test</txm:form>
+                        <txm:lemma>test</txm:lemma>
+                        <txm:ana type="#frpos">NOUN</txm:ana>
+                    </w>
+                </l>
+                <l>
+                    <w>
+                        <txm:form>Second</txm:form>
+                        <txm:lemma>second</txm:lemma>
+                        <txm:ana type="#frpos">ADJ</txm:ana>
+                    </w>
+                    <w>
+                        <txm:form>line</txm:form>
+                        <txm:lemma>line</txm:lemma>
+                        <txm:ana type="#frpos">NOUN</txm:ana>
+                    </w>
+                    <w>
+                        <txm:form>here</txm:form>
+                        <txm:lemma>here</txm:lemma>
+                        <txm:ana type="#frpos">NOMpro</txm:ana>
+                    </w>
+                </l>
+            </lg>
+        </body>
+    </text>
+</TEI>
\ No newline at end of file