diff --git a/packtools/sps/models/tablewrap.py b/packtools/sps/models/tablewrap.py index 68102e5b7..3c09b968c 100644 --- a/packtools/sps/models/tablewrap.py +++ b/packtools/sps/models/tablewrap.py @@ -106,6 +106,42 @@ def graphic_long_desc(self): return long_desc_elem.text return None + @property + def has_tr_in_table(self): + """Check if has direct children (invalid per NISO JATS).""" + table = self.element.find(".//table") + if table is not None: + return table.find("tr") is not None + return False + + @property + def has_th_outside_thead(self): + """Check if .""" + table = self.element.find(".//table") + if table is not None: + all_th = table.findall(".//th") + thead_th = table.findall(".//thead//th") + return len(all_th) > len(thead_th) + return False + + @property + def has_td_outside_tbody(self): + """Check if .""" + table = self.element.find(".//table") + if table is not None: + all_td = table.findall(".//td") + tbody_td = table.findall(".//tbody//td") + return len(all_td) > len(tbody_td) + return False + + @property + def has_tbody(self): + """Check if
appears outside of
appears outside of
has a element.""" + table = self.element.find(".//table") + if table is not None: + return table.find(".//tbody") is not None + return False + @property def data(self): return { @@ -119,6 +155,10 @@ def data(self): "graphic": self.graphic, "graphic_alt_text": self.graphic_alt_text, "graphic_long_desc": self.graphic_long_desc, + "has_tr_in_table": self.has_tr_in_table, + "has_th_outside_thead": self.has_th_outside_thead, + "has_td_outside_tbody": self.has_td_outside_tbody, + "has_tbody": self.has_tbody, } diff --git a/packtools/sps/validation/tablewrap.py b/packtools/sps/validation/tablewrap.py index 3ce8d3277..75977e27e 100644 --- a/packtools/sps/validation/tablewrap.py +++ b/packtools/sps/validation/tablewrap.py @@ -79,10 +79,15 @@ def get_default_params(self): return { "absent_error_level": "WARNING", "id_error_level": "CRITICAL", + "label_or_caption_error_level": "CRITICAL", "label_error_level": "CRITICAL", "caption_error_level": "CRITICAL", "table_error_level": "CRITICAL", - "alternatives_error_level": "CRITICAL" + "alternatives_error_level": "CRITICAL", + "tr_in_table_error_level": "ERROR", + "th_in_thead_error_level": "ERROR", + "td_in_tbody_error_level": "ERROR", + "tbody_error_level": "WARNING", } def validate(self): @@ -92,10 +97,13 @@ def validate(self): """ validations = [ self.validate_id, - self.validate_label, - self.validate_caption, + self.validate_label_or_caption, self.validate_table, self.validate_alternatives, + self.validate_tr_not_in_table, + self.validate_th_in_thead, + self.validate_td_in_tbody, + self.validate_tbody, ] return [response for validate in validations if (response := validate())] @@ -128,20 +136,27 @@ def validate_id(self): advice_params={}, ) - def validate_label(self): + def validate_label_or_caption(self): """ - Validates the presence of label in the . + Validates the presence of " + "
in the . + At least one of with must be present. Returns: The validation result in the expected format. """ label = self.data.get("label") - is_valid = bool(label) - table_id = self.data.get("table_wrap_id") - advice = f'Wrap each label with <label> inside {self.xml}. Consult SPS documentation for more detail.' + caption = self.data.get("caption") + is_valid = bool(label) or bool(caption) + obtained = [] + if label: + obtained.append(f"label={label}") + if caption: + obtained.append(f"caption={caption}") + obtained_str = ", ".join(obtained) if obtained else None + advice = f'Add <label> or <caption> with <title> inside {self.xml}. Consult SPS documentation for more detail.' return build_response( - title="label", + title="label or caption", parent={ "parent": self.data.get("parent"), "parent_id": self.data.get("parent_id"), @@ -149,46 +164,15 @@ def validate_label(self): "parent_lang": self.data.get("parent_lang"), }, item="table-wrap", - sub_item="label", + sub_item="label or caption", validation_type="exist", is_valid=is_valid, - expected="label", - obtained=label, + expected="<label> or <caption> with <title>", + obtained=obtained_str, advice=advice, data=self.data, - error_level=self.rules["label_error_level"], - advice_text='Wrap each label with <label> inside {xml}. Consult SPS documentation for more detail.', - advice_params={"xml": self.xml}, - ) - - def validate_caption(self): - """ - Validates the presence of caption in the <table-wrap>. - - Returns: - The validation result in the expected format. - """ - caption = self.data.get("caption") - is_valid = bool(caption) - table_id = self.data.get("table_wrap_id") - return build_response( - title="caption", - parent={ - "parent": self.data.get("parent"), - "parent_id": self.data.get("parent_id"), - "parent_article_type": self.data.get("parent_article_type"), - "parent_lang": self.data.get("parent_lang"), - }, - item="table-wrap", - sub_item="caption", - validation_type="exist", - is_valid=is_valid, - expected="caption", - obtained=caption, - advice=f'Wrap each caption with <caption> inside {self.xml}. Consult SPS documentation for more detail.', - data=self.data, - error_level=self.rules["caption_error_level"], - advice_text='Wrap each caption with <caption> inside {xml}. Consult SPS documentation for more detail.', + error_level=self.rules["label_or_caption_error_level"], + advice_text='Add <label> or <caption> with <title> inside {xml}. Consult SPS documentation for more detail.', advice_params={"xml": self.xml}, ) @@ -201,7 +185,6 @@ def validate_table(self): """ table = self.data.get("table") is_valid = bool(table) - table_id = self.data.get("table_wrap_id") return build_response( title="table", parent={ @@ -230,11 +213,9 @@ def validate_alternatives(self): Returns: The validation result in the expected format. """ - graphic = 1 if self.data.get("graphic") else 0 # self.data.get("graphic") retorna uma referência para uma representação da tabela, se houver - table = 1 if self.data.get("table") else 0 # self.data.get("table") retorna uma codificação de tabela, se houver - alternatives = self.data.get("alternative_elements") # uma lista com as tags internas à <alternatives> - - table_id = self.data.get("table_wrap_id") + graphic = 1 if self.data.get("graphic") else 0 + table = 1 if self.data.get("table") else 0 + alternatives = self.data.get("alternative_elements") if graphic + table > 1 and len(alternatives) == 0: expected = "alternatives" @@ -278,3 +259,132 @@ def validate_alternatives(self): advice_text=advice_text, advice_params=advice_params, ) + + def validate_tr_not_in_table(self): + """ + Validates that <tr> is not a direct child of <table>. + The first level of <table> must not contain <tr> (NISO JATS model). + + Returns: + The validation result in the expected format, or None if no <table> exists. + """ + if not self.data.get("table"): + return None + has_tr = self.data.get("has_tr_in_table") + is_valid = not has_tr + return build_response( + title="table structure", + parent={ + "parent": self.data.get("parent"), + "parent_id": self.data.get("parent_id"), + "parent_article_type": self.data.get("parent_article_type"), + "parent_lang": self.data.get("parent_lang"), + }, + item="table-wrap", + sub_item="table/tr", + validation_type="exist", + is_valid=is_valid, + expected="<tr> must not be a direct child of <table>", + obtained="<table><tr>..." if has_tr else "<tr> not found as direct child of <table>", + advice=f'Remove <tr> as a direct child of <table> in {self.xml}. Use <thead> or <tbody> to wrap <tr> elements.', + data=self.data, + error_level=self.rules["tr_in_table_error_level"], + advice_text='Remove <tr> as a direct child of <table> in {xml}. Use <thead> or <tbody> to wrap <tr> elements.', + advice_params={"xml": self.xml}, + ) + + def validate_th_in_thead(self): + """ + Validates that <th> only appears as a descendant of <thead>. + + Returns: + The validation result in the expected format, or None if no <table> exists. + """ + if not self.data.get("table"): + return None + has_th_outside = self.data.get("has_th_outside_thead") + is_valid = not has_th_outside + return build_response( + title="th in thead", + parent={ + "parent": self.data.get("parent"), + "parent_id": self.data.get("parent_id"), + "parent_article_type": self.data.get("parent_article_type"), + "parent_lang": self.data.get("parent_lang"), + }, + item="table-wrap", + sub_item="th", + validation_type="exist", + is_valid=is_valid, + expected="<th> only as descendant of <thead>", + obtained="<th> found outside <thead>" if has_th_outside else "<th> only in <thead>", + advice=f'Move <th> elements to be inside <thead> in {self.xml}. Consult SPS documentation for more detail.', + data=self.data, + error_level=self.rules["th_in_thead_error_level"], + advice_text='Move <th> elements to be inside <thead> in {xml}. Consult SPS documentation for more detail.', + advice_params={"xml": self.xml}, + ) + + def validate_td_in_tbody(self): + """ + Validates that <td> only appears as a descendant of <tbody>. + + Returns: + The validation result in the expected format, or None if no <table> exists. + """ + if not self.data.get("table"): + return None + has_td_outside = self.data.get("has_td_outside_tbody") + is_valid = not has_td_outside + return build_response( + title="td in tbody", + parent={ + "parent": self.data.get("parent"), + "parent_id": self.data.get("parent_id"), + "parent_article_type": self.data.get("parent_article_type"), + "parent_lang": self.data.get("parent_lang"), + }, + item="table-wrap", + sub_item="td", + validation_type="exist", + is_valid=is_valid, + expected="<td> only as descendant of <tbody>", + obtained="<td> found outside <tbody>" if has_td_outside else "<td> only in <tbody>", + advice=f'Move <td> elements to be inside <tbody> in {self.xml}. Consult SPS documentation for more detail.', + data=self.data, + error_level=self.rules["td_in_tbody_error_level"], + advice_text='Move <td> elements to be inside <tbody> in {xml}. Consult SPS documentation for more detail.', + advice_params={"xml": self.xml}, + ) + + def validate_tbody(self): + """ + Validates the presence of <tbody> in <table>. + + Returns: + The validation result in the expected format, or None if no <table> exists. + """ + if not self.data.get("table"): + return None + has_tbody = self.data.get("has_tbody") + is_valid = bool(has_tbody) + return build_response( + title="tbody", + parent={ + "parent": self.data.get("parent"), + "parent_id": self.data.get("parent_id"), + "parent_article_type": self.data.get("parent_article_type"), + "parent_lang": self.data.get("parent_lang"), + }, + item="table-wrap", + sub_item="tbody", + validation_type="exist", + is_valid=is_valid, + expected="<tbody> element in <table>", + obtained="<tbody>" if has_tbody else None, + advice=f'Add <tbody> inside <table> in {self.xml}. Consult SPS documentation for more detail.', + data=self.data, + error_level=self.rules["tbody_error_level"], + advice_text='Add <tbody> inside <table> in {xml}. Consult SPS documentation for more detail.', + advice_params={"xml": self.xml}, + ) diff --git a/packtools/sps/validation_rules/tablewrap_rules.json b/packtools/sps/validation_rules/tablewrap_rules.json index da4b65f9e..9af620329 100644 --- a/packtools/sps/validation_rules/tablewrap_rules.json +++ b/packtools/sps/validation_rules/tablewrap_rules.json @@ -2,9 +2,14 @@ "table_wrap_rules": { "absent_error_level": "WARNING", "id_error_level": "CRITICAL", + "label_or_caption_error_level": "CRITICAL", "label_error_level": "CRITICAL", "caption_error_level": "CRITICAL", "table_error_level": "CRITICAL", - "alternatives_error_level": "CRITICAL" + "alternatives_error_level": "CRITICAL", + "tr_in_table_error_level": "ERROR", + "th_in_thead_error_level": "ERROR", + "td_in_tbody_error_level": "ERROR", + "tbody_error_level": "WARNING" } } diff --git a/tests/sps/models/test_tablewrap.py b/tests/sps/models/test_tablewrap.py index eb1f1b0de..7a2f76840 100644 --- a/tests/sps/models/test_tablewrap.py +++ b/tests/sps/models/test_tablewrap.py @@ -135,6 +135,12 @@ def test_data(self): '</thead>' '</table>', "graphic": "1980-5381-neco-28-02-579-gt02.svg", + "graphic_alt_text": None, + "graphic_long_desc": None, + "has_tr_in_table": False, + "has_th_outside_thead": False, + "has_td_outside_tbody": False, + "has_tbody": False, } self.assertDictEqual(self.table_wrap_obj.data, expected_data) @@ -252,6 +258,12 @@ def test_get_article_table_wrappers(self): 'table codification' '</table>', "graphic": "1980-5381-neco-28-02-579-gt02.svg", + "graphic_alt_text": None, + "graphic_long_desc": None, + "has_tr_in_table": False, + "has_th_outside_thead": False, + "has_td_outside_tbody": False, + "has_tbody": False, }, { "alternative_elements": ["graphic", "table"], @@ -277,6 +289,12 @@ def test_get_article_table_wrappers(self): 'table codification' '</table>', "graphic": "1980-5381-neco-28-02-579-gt03.svg", + "graphic_alt_text": None, + "graphic_long_desc": None, + "has_tr_in_table": False, + "has_th_outside_thead": False, + "has_td_outside_tbody": False, + "has_tbody": False, }, ] @@ -318,6 +336,12 @@ def test_get_sub_article_translation_table_wrappers(self): 'table codification' '</table>', "graphic": "1980-5381-neco-28-02-579-gt04.svg", + "graphic_alt_text": None, + "graphic_long_desc": None, + "has_tr_in_table": False, + "has_th_outside_thead": False, + "has_td_outside_tbody": False, + "has_tbody": False, }, { "alternative_elements": ["graphic", "table"], @@ -343,6 +367,12 @@ def test_get_sub_article_translation_table_wrappers(self): 'table codification' '</table>', "graphic": "1980-5381-neco-28-02-579-gt05.svg", + "graphic_alt_text": None, + "graphic_long_desc": None, + "has_tr_in_table": False, + "has_th_outside_thead": False, + "has_td_outside_tbody": False, + "has_tbody": False, }, ] @@ -388,6 +418,12 @@ def test_get_all_table_wrappers(self): 'table codification' '</table>', "graphic": "1980-5381-neco-28-02-579-gt02.svg", + "graphic_alt_text": None, + "graphic_long_desc": None, + "has_tr_in_table": False, + "has_th_outside_thead": False, + "has_td_outside_tbody": False, + "has_tbody": False, }, { "alternative_elements": ["graphic", "table"], @@ -413,6 +449,12 @@ def test_get_all_table_wrappers(self): 'table codification' '</table>', "graphic": "1980-5381-neco-28-02-579-gt03.svg", + "graphic_alt_text": None, + "graphic_long_desc": None, + "has_tr_in_table": False, + "has_th_outside_thead": False, + "has_td_outside_tbody": False, + "has_tbody": False, }, { "alternative_elements": ["graphic", "table"], @@ -438,6 +480,12 @@ def test_get_all_table_wrappers(self): 'table codification' '</table>', "graphic": "1980-5381-neco-28-02-579-gt04.svg", + "graphic_alt_text": None, + "graphic_long_desc": None, + "has_tr_in_table": False, + "has_th_outside_thead": False, + "has_td_outside_tbody": False, + "has_tbody": False, }, { "alternative_elements": ["graphic", "table"], @@ -463,6 +511,12 @@ def test_get_all_table_wrappers(self): 'table codification' '</table>', "graphic": "1980-5381-neco-28-02-579-gt05.svg", + "graphic_alt_text": None, + "graphic_long_desc": None, + "has_tr_in_table": False, + "has_th_outside_thead": False, + "has_td_outside_tbody": False, + "has_tbody": False, }, ] diff --git a/tests/sps/validation/test_tablewrap.py b/tests/sps/validation/test_tablewrap.py index e63676f35..1337e9688 100644 --- a/tests/sps/validation/test_tablewrap.py +++ b/tests/sps/validation/test_tablewrap.py @@ -1,7 +1,7 @@ import unittest from lxml import etree -from packtools.sps.validation.tablewrap import ArticleTableWrapValidation +from packtools.sps.validation.tablewrap import ArticleTableWrapValidation, TableWrapValidation class TableWrapValidationTest(unittest.TestCase): @@ -21,28 +21,12 @@ def test_validate_absent(self): ).validate() ) - expected = [ - { - "title": "table-wrap presence", - "parent": "article", - "parent_id": None, - "parent_article_type": "research-article", - "parent_lang": "pt", - "item": "table-wrap", - "sub_item": None, - "validation_type": "exist", - "response": "WARNING", - "expected_value": "<table-wrap> element", - "got_value": None, - "message": "Got None, expected <table-wrap> element", - "advice": "Add <table-wrap> element to properly illustrate the content.", - "data": None, - } - ] - - for i, item in enumerate(expected): - with self.subTest(i): - self.assertDictEqual(item, obtained[i]) + self.assertEqual(1, len(obtained)) + result = obtained[0] + self.assertEqual("table-wrap presence", result["title"]) + self.assertEqual("WARNING", result["response"]) + self.assertEqual("table-wrap", result["item"]) + self.assertIsNone(result["data"]) def test_validate_id(self): self.maxDiff = None @@ -52,8 +36,8 @@ def test_validate_id(self): "<body>" "<table-wrap>" "<label>Table 1</label>" - "<caption>table caption</caption>" - "<table>table codification</table>" + "<caption><title>table caption
data
" "" "" "" @@ -64,54 +48,126 @@ def test_validate_id(self): ).validate() ) - expected = [ - { - "title": "id", - "parent": "article", - "parent_id": None, - "parent_article_type": "research-article", - "parent_lang": "pt", - "item": "table-wrap", - "sub_item": "id", - "validation_type": "exist", - "response": "CRITICAL", - "expected_value": "id", - "got_value": None, - "message": "Got None, expected id", - "advice": "Identify the id", - "data": { - "alternative_parent": "table-wrap", - "table_wrap_id": None, - "label": "Table 1", - "caption": "table caption", - "footnotes": [], - "alternative_elements": [], - "table": '' - 'table codification' - '
', - "graphic": None, - "parent": "article", - "parent_id": None, - "parent_article_type": "research-article", - "parent_lang": "pt", - }, - } - ] - - for i, item in enumerate(expected): - with self.subTest(i): - self.assertDictEqual(item, obtained[i]) - - def test_validate_label(self): + # Find the id validation result + id_results = [r for r in obtained if r["title"] == "id"] + self.assertEqual(1, len(id_results)) + result = id_results[0] + self.assertEqual("CRITICAL", result["response"]) + self.assertEqual("table-wrap", result["item"]) + self.assertEqual("id", result["sub_item"]) + self.assertIsNone(result["got_value"]) + + def test_validate_id_present(self): + self.maxDiff = None + xml_tree = etree.fromstring( + '
' + "" + '' + "" + "table caption" + "
data
" + "
" + "" + "
" + ) + obtained = list( + ArticleTableWrapValidation( + xml_tree=xml_tree, rules={"id_error_level": "CRITICAL"} + ).validate() + ) + + id_results = [r for r in obtained if r["title"] == "id"] + self.assertEqual(1, len(id_results)) + result = id_results[0] + self.assertEqual("OK", result["response"]) + self.assertEqual("t01", result["got_value"]) + + def test_validate_label_or_caption_both_missing(self): + self.maxDiff = None + xml_tree = etree.fromstring( + '
' + "" + '' + "
data
" + "
" + "" + "
" + ) + obtained = list( + ArticleTableWrapValidation( + xml_tree=xml_tree, + rules={"label_or_caption_error_level": "CRITICAL"}, + ).validate() + ) + + results = [r for r in obtained if r["title"] == "label or caption"] + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual("CRITICAL", result["response"]) + self.assertIsNone(result["got_value"]) + + def test_validate_label_or_caption_only_label(self): + self.maxDiff = None + xml_tree = etree.fromstring( + '
' + "" + '' + "" + "
data
" + "
" + "" + "
" + ) + obtained = list( + ArticleTableWrapValidation( + xml_tree=xml_tree, + rules={"label_or_caption_error_level": "CRITICAL"}, + ).validate() + ) + + results = [r for r in obtained if r["title"] == "label or caption"] + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual("OK", result["response"]) + + def test_validate_label_or_caption_only_caption(self): + self.maxDiff = None + xml_tree = etree.fromstring( + '
' + "" + '' + "Risk factors" + "
data
" + "
" + "" + "
" + ) + obtained = list( + ArticleTableWrapValidation( + xml_tree=xml_tree, + rules={"label_or_caption_error_level": "CRITICAL"}, + ).validate() + ) + + results = [r for r in obtained if r["title"] == "label or caption"] + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual("OK", result["response"]) + + def test_validate_label_or_caption_both_present(self): self.maxDiff = None xml_tree = etree.fromstring( '
' "" '' - "table caption" - "table codification
" + "" + "Risk factors" + "
data
" "
" "" "
" @@ -119,52 +175,16 @@ def test_validate_label(self): obtained = list( ArticleTableWrapValidation( xml_tree=xml_tree, - rules={ - "label_error_level": "CRITICAL", - }, + rules={"label_or_caption_error_level": "CRITICAL"}, ).validate() ) - expected = [ - { - "title": "label", - "parent": "article", - "parent_id": None, - "parent_article_type": "research-article", - "parent_lang": "pt", - "item": "table-wrap", - "sub_item": "label", - "validation_type": "exist", - "response": "CRITICAL", - "expected_value": "label", - "got_value": None, - "message": "Got None, expected label", - "advice": "Identify the label", - "data": { - "alternative_parent": "table-wrap", - "table_wrap_id": "t01", - "label": None, - "caption": "table caption", - "footnotes": [], - "alternative_elements": [], - "table": '' - 'table codification' - '
', - "graphic": None, - "parent": "article", - "parent_id": None, - "parent_article_type": "research-article", - "parent_lang": "pt", - }, - } - ] - - for i, item in enumerate(expected): - with self.subTest(i): - self.assertDictEqual(item, obtained[i]) - - def test_validate_caption(self): + results = [r for r in obtained if r["title"] == "label or caption"] + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual("OK", result["response"]) + + def test_validate_table_missing(self): self.maxDiff = None xml_tree = etree.fromstring( '
" '' "" - "table codification
" + "Table caption" "
" "" "
" @@ -180,52 +200,17 @@ def test_validate_caption(self): obtained = list( ArticleTableWrapValidation( xml_tree=xml_tree, - rules={ - "caption_error_level": "CRITICAL", - }, + rules={"table_error_level": "CRITICAL"}, ).validate() ) - expected = [ - { - "title": "caption", - "parent": "article", - "parent_id": None, - "parent_article_type": "research-article", - "parent_lang": "pt", - "item": "table-wrap", - "sub_item": "caption", - "validation_type": "exist", - "response": "CRITICAL", - "expected_value": "caption", - "got_value": None, - "message": "Got None, expected caption", - "advice": "Identify the caption", - "data": { - "alternative_parent": "table-wrap", - "table_wrap_id": "t01", - "label": "Table 1", - "caption": "", - "footnotes": [], - "alternative_elements": [], - "table": '' - 'table codification' - '
', - "graphic": None, - "parent": "article", - "parent_id": None, - "parent_article_type": "research-article", - "parent_lang": "pt", - }, - } - ] - - for i, item in enumerate(expected): - with self.subTest(i): - self.assertDictEqual(item, obtained[i]) - - def test_validate_table(self): + results = [r for r in obtained if r["title"] == "table"] + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual("CRITICAL", result["response"]) + self.assertIsNone(result["got_value"]) + + def test_validate_table_present(self): self.maxDiff = None xml_tree = etree.fromstring( '
" '' "" - "Table caption" + "Table caption" + "
data
" "
" "" "
" @@ -245,41 +231,10 @@ def test_validate_table(self): ).validate() ) - expected = [ - { - "title": "table", - "parent": "article", - "parent_id": None, - "parent_article_type": "research-article", - "parent_lang": "pt", - "item": "table-wrap", - "sub_item": "table", - "validation_type": "exist", - "response": "CRITICAL", - "expected_value": "table", - "got_value": None, - "message": "Got None, expected table", - "advice": "Identify the table", - "data": { - "alternative_parent": "table-wrap", - "table_wrap_id": "t01", - "label": "Table 1", - "caption": "Table caption", - 'footnotes': [], - "alternative_elements": [], - "graphic": None, - "table": None, - "parent": "article", - "parent_id": None, - "parent_article_type": "research-article", - "parent_lang": "pt", - }, - } - ] - - for i, item in enumerate(expected): - with self.subTest(i): - self.assertDictEqual(item, obtained[i]) + results = [r for r in obtained if r["title"] == "table"] + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual("OK", result["response"]) def test_validate_required_alternatives(self): self.maxDiff = None @@ -289,8 +244,8 @@ def test_validate_required_alternatives(self): "" '' "" - "Table caption" - "table codification
" + "Table caption" + "
data
" '' "
" "" @@ -303,44 +258,11 @@ def test_validate_required_alternatives(self): ).validate() ) - expected = [ - { - "title": "alternatives", - "parent": "article", - "parent_id": None, - "parent_article_type": "research-article", - "parent_lang": "pt", - "item": "table-wrap", - "sub_item": "alternatives", - "validation_type": "exist", - "response": "CRITICAL", - "expected_value": "alternatives", - "got_value": None, - "message": "Got None, expected alternatives", - "advice": "Identify the alternatives", - "data": { - "alternative_parent": "table-wrap", - "table_wrap_id": "t01", - "label": "Table 1", - "caption": "Table caption", - "footnotes": [], - "alternative_elements": [], - "graphic": "1980-5381-neco-28-02-579-gt05.svg", - "table": '' - 'table codification' - '
', - "parent": "article", - "parent_id": None, - "parent_article_type": "research-article", - "parent_lang": "pt", - }, - } - ] - - for i, item in enumerate(expected): - with self.subTest(i): - self.assertDictEqual(item, obtained[i]) + results = [r for r in obtained if r["title"] == "alternatives"] + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual("CRITICAL", result["response"]) + self.assertIsNone(result["got_value"]) def test_validate_not_required_alternatives(self): self.maxDiff = None @@ -350,9 +272,9 @@ def test_validate_not_required_alternatives(self): "" '' "" - "Table caption" + "Table caption" "" - "table codification
" + "
data
" "
" "
" "" @@ -365,42 +287,397 @@ def test_validate_not_required_alternatives(self): ).validate() ) - expected = [ - { - "title": "alternatives", - "parent": "article", - "parent_id": None, - "parent_article_type": "research-article", - "parent_lang": "pt", - "item": "table-wrap", - "sub_item": "alternatives", - "validation_type": "exist", - "response": "CRITICAL", - "expected_value": None, - "got_value": "alternatives", - "message": "Got alternatives, expected None", - "advice": "Remove the alternatives", - "data": { - "alternative_parent": "table-wrap", - "table_wrap_id": "t01", - "label": "Table 1", - "caption": "Table caption", - "footnotes": [], - "alternative_elements": ["table"], - "graphic": None, - "table": 'table codification
', - "parent": "article", - "parent_id": None, - "parent_article_type": "research-article", - "parent_lang": "pt", - }, - } - ] - - for i, item in enumerate(expected): - with self.subTest(i): - self.assertDictEqual(item, obtained[i]) + results = [r for r in obtained if r["title"] == "alternatives"] + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual("CRITICAL", result["response"]) + self.assertEqual("alternatives", result["got_value"]) + + +class TableWrapTrValidationTest(unittest.TestCase): + """Tests for Rule #3: must not be a direct child of .""" + + def test_validate_tr_not_in_table_valid(self): + """ inside is valid.""" + self.maxDiff = None + xml_tree = etree.fromstring( + '
' + "" + '' + "" + "
" + "
Sample data
" + "" + "" + "" + "
Data 1Data 2
" + "" + "" + "" + ) + obtained = list( + ArticleTableWrapValidation( + xml_tree=xml_tree, + rules={"tr_in_table_error_level": "ERROR"}, + ).validate() + ) + + results = [r for r in obtained if r["title"] == "table structure"] + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual("OK", result["response"]) + + def test_validate_tr_in_table_invalid(self): + """ as direct child of is invalid.""" + self.maxDiff = None + xml_tree = etree.fromstring( + '
' + "" + '' + "" + "
" + "
Sample data
" + "" + "
Data 1Data 2
" + "" + "" + "" + ) + obtained = list( + ArticleTableWrapValidation( + xml_tree=xml_tree, + rules={"tr_in_table_error_level": "ERROR"}, + ).validate() + ) + + results = [r for r in obtained if r["title"] == "table structure"] + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual("ERROR", result["response"]) + self.assertEqual("table/tr", result["sub_item"]) + + +class TableWrapThValidationTest(unittest.TestCase): + """Tests for Rule #4: must only appear within .""" + + def test_validate_th_in_thead_valid(self): + """ inside is valid.""" + self.maxDiff = None + xml_tree = etree.fromstring( + '
' + "" + '' + "" + "Sample data" + "" + "" + "" + "
Header 1Header 2
Data 1Data 2
" + "
" + "" + "
" + ) + obtained = list( + ArticleTableWrapValidation( + xml_tree=xml_tree, + rules={"th_in_thead_error_level": "ERROR"}, + ).validate() + ) + + results = [r for r in obtained if r["title"] == "th in thead"] + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual("OK", result["response"]) + + def test_validate_th_outside_thead_invalid(self): + """ outside (e.g. in ) is invalid.""" + self.maxDiff = None + xml_tree = etree.fromstring( + '
' + "" + '' + "" + "Sample data" + "" + "" + "" + "" + "
Header 1Data 1
" + "
" + "" + "
" + ) + obtained = list( + ArticleTableWrapValidation( + xml_tree=xml_tree, + rules={"th_in_thead_error_level": "ERROR"}, + ).validate() + ) + + results = [r for r in obtained if r["title"] == "th in thead"] + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual("ERROR", result["response"]) + self.assertEqual("th", result["sub_item"]) + + +class TableWrapTdValidationTest(unittest.TestCase): + """Tests for Rule #5: must only appear within .""" + + def test_validate_td_in_tbody_valid(self): + """ inside is valid.""" + self.maxDiff = None + xml_tree = etree.fromstring( + '
' + "" + '' + "" + "Sample data" + "" + "" + "" + "
Header 1
Data 1
" + "
" + "" + "
" + ) + obtained = list( + ArticleTableWrapValidation( + xml_tree=xml_tree, + rules={"td_in_tbody_error_level": "ERROR"}, + ).validate() + ) + + results = [r for r in obtained if r["title"] == "td in tbody"] + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual("OK", result["response"]) + + def test_validate_td_outside_tbody_invalid(self): + """ outside (e.g. in ) is invalid.""" + self.maxDiff = None + xml_tree = etree.fromstring( + '
' + "" + '' + "" + "Sample data" + "" + "" + "" + "
Header as td
Data 1
" + "
" + "" + "
" + ) + obtained = list( + ArticleTableWrapValidation( + xml_tree=xml_tree, + rules={"td_in_tbody_error_level": "ERROR"}, + ).validate() + ) + + results = [r for r in obtained if r["title"] == "td in tbody"] + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual("ERROR", result["response"]) + self.assertEqual("td", result["sub_item"]) + + +class TableWrapTbodyValidationTest(unittest.TestCase): + """Tests for Rule #7: must be present in .""" + + def test_validate_tbody_present(self): + """ present in
is valid.""" + self.maxDiff = None + xml_tree = etree.fromstring( + '
' + "" + '' + "" + "
" + "
Sample data
" + "" + "
Data 1
" + "" + "" + "" + ) + obtained = list( + ArticleTableWrapValidation( + xml_tree=xml_tree, + rules={"tbody_error_level": "WARNING"}, + ).validate() + ) + + results = [r for r in obtained if r["title"] == "tbody"] + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual("OK", result["response"]) + + def test_validate_tbody_missing(self): + """ missing from is invalid.""" + self.maxDiff = None + xml_tree = etree.fromstring( + '
' + "" + '' + "" + "
" + "
Sample data
" + "" + "
Data 1
" + "" + "" + "" + ) + obtained = list( + ArticleTableWrapValidation( + xml_tree=xml_tree, + rules={"tbody_error_level": "WARNING"}, + ).validate() + ) + + results = [r for r in obtained if r["title"] == "tbody"] + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual("WARNING", result["response"]) + self.assertEqual("tbody", result["sub_item"]) + + +class TableWrapCompleteValidationTest(unittest.TestCase): + """Tests for fully valid table-wrap elements.""" + + def test_fully_valid_table_wrap(self): + """A fully valid table-wrap with all required elements should pass all validations.""" + self.maxDiff = None + xml_tree = etree.fromstring( + '
' + "" + '' + "" + "Sample data" + "" + "" + "" + "
Header 1Header 2
Data 1Data 2
" + "
" + "" + "
" + ) + obtained = list( + ArticleTableWrapValidation( + xml_tree=xml_tree, + rules={}, + ).validate() + ) + + # All validations should pass (response == "OK") + for result in obtained: + self.assertEqual("OK", result["response"], f"Validation '{result['title']}' should pass but got {result['response']}") + + def test_valid_table_wrap_with_empty_title(self): + """Table-wrap with empty inside <caption> should pass label_or_caption validation.""" + self.maxDiff = None + xml_tree = etree.fromstring( + '<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" ' + 'dtd-version="1.0" article-type="research-article" xml:lang="pt">' + "<body>" + '<table-wrap id="t01">' + "<label>Table 1</label>" + "<caption><title/></caption>" + "<table>" + "<tbody><tr><td>Data 1</td></tr></tbody>" + "</table>" + "</table-wrap>" + "</body>" + "</article>" + ) + obtained = list( + ArticleTableWrapValidation( + xml_tree=xml_tree, + rules={}, + ).validate() + ) + + results = [r for r in obtained if r["title"] == "label or caption"] + self.assertEqual(1, len(results)) + # label is present, so this should pass + self.assertEqual("OK", results[0]["response"]) + + def test_table_wrap_with_table_wrap_foot(self): + """Table with table-wrap-foot should pass all validations.""" + self.maxDiff = None + xml_tree = etree.fromstring( + '<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" ' + 'dtd-version="1.0" article-type="research-article" xml:lang="pt">' + "<body>" + '<table-wrap id="t01">' + "<label>Table 1</label>" + "<caption><title>Results" + "" + "" + "
Value 1Value 2
" + "" + "

Data are expressed as means.

" + "
" + "" + "" + "" + ) + obtained = list( + ArticleTableWrapValidation( + xml_tree=xml_tree, + rules={}, + ).validate() + ) + + for result in obtained: + self.assertEqual("OK", result["response"], f"Validation '{result['title']}' should pass but got {result['response']}") + + +class TableWrapNoTableNisoValidationTest(unittest.TestCase): + """Tests that NISO JATS validations are skipped when no is present.""" + + def test_no_table_skips_niso_validations(self): + """When
is missing, NISO JATS validations (tr, th, td, tbody) should be skipped.""" + self.maxDiff = None + xml_tree = etree.fromstring( + '
' + "" + '' + "" + "
" + "" + "" + "" + ) + obtained = list( + ArticleTableWrapValidation( + xml_tree=xml_tree, + rules={}, + ).validate() + ) + + # NISO JATS validations should not be present since there's no
Table caption
+ niso_titles = {"table structure", "th in thead", "td in tbody", "tbody"} + niso_results = [r for r in obtained if r["title"] in niso_titles] + self.assertEqual(0, len(niso_results)) + + # But table validation should still report the missing table + table_results = [r for r in obtained if r["title"] == "table"] + self.assertEqual(1, len(table_results)) + self.assertNotEqual("OK", table_results[0]["response"]) if __name__ == "__main__":