Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions packtools/sps/models/tablewrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,42 @@ def graphic_long_desc(self):
return long_desc_elem.text
return None

@property
def has_tr_in_table(self):
"""Check if <table> has direct <tr> children (invalid per NISO JATS)."""
table = self.element.find(".//table")
if table is not None:
return table.find("tr") is not None
return False

@property
def has_th_outside_thead(self):
"""Check if <th> appears outside of <thead>."""
table = self.element.find(".//table")
if table is not None:
all_th = table.findall(".//th")
thead_th = table.findall(".//thead//th")
return len(all_th) > len(thead_th)
return False

@property
def has_td_outside_tbody(self):
"""Check if <td> appears outside of <tbody>."""
table = self.element.find(".//table")
if table is not None:
all_td = table.findall(".//td")
tbody_td = table.findall(".//tbody//td")
return len(all_td) > len(tbody_td)
return False

@property
def has_tbody(self):
"""Check if <table> has a <tbody> element."""
table = self.element.find(".//table")
if table is not None:
return table.find(".//tbody") is not None
return False

@property
def data(self):
return {
Expand All @@ -119,6 +155,10 @@ def data(self):
"graphic": self.graphic,
"graphic_alt_text": self.graphic_alt_text,
"graphic_long_desc": self.graphic_long_desc,
"has_tr_in_table": self.has_tr_in_table,
"has_th_outside_thead": self.has_th_outside_thead,
"has_td_outside_tbody": self.has_td_outside_tbody,
"has_tbody": self.has_tbody,
}


Expand Down
212 changes: 161 additions & 51 deletions packtools/sps/validation/tablewrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,15 @@ def get_default_params(self):
return {
"absent_error_level": "WARNING",
"id_error_level": "CRITICAL",
"label_or_caption_error_level": "CRITICAL",
"label_error_level": "CRITICAL",
"caption_error_level": "CRITICAL",
"table_error_level": "CRITICAL",
"alternatives_error_level": "CRITICAL"
"alternatives_error_level": "CRITICAL",
"tr_in_table_error_level": "ERROR",
"th_in_thead_error_level": "ERROR",
"td_in_tbody_error_level": "ERROR",
"tbody_error_level": "WARNING",
}

def validate(self):
Expand All @@ -92,10 +97,13 @@ def validate(self):
"""
validations = [
self.validate_id,
self.validate_label,
self.validate_caption,
self.validate_label_or_caption,
self.validate_table,
self.validate_alternatives,
self.validate_tr_not_in_table,
self.validate_th_in_thead,
self.validate_td_in_tbody,
self.validate_tbody,
]
return [response for validate in validations if (response := validate())]

Expand Down Expand Up @@ -128,67 +136,43 @@ def validate_id(self):
advice_params={},
)

def validate_label(self):
def validate_label_or_caption(self):
"""
Validates the presence of label in the <table-wrap>.
Validates the presence of <label> or <caption> in the <table-wrap>.
At least one of <label> or <caption> with <title> must be present.

Returns:
The validation result in the expected format.
"""
label = self.data.get("label")
is_valid = bool(label)
table_id = self.data.get("table_wrap_id")
advice = f'Wrap each label with <label> inside {self.xml}. Consult SPS documentation for more detail.'
caption = self.data.get("caption")
is_valid = bool(label) or bool(caption)
obtained = []
if label:
obtained.append(f"label={label}")
if caption:
obtained.append(f"caption={caption}")
obtained_str = ", ".join(obtained) if obtained else None
advice = f'Add <label> or <caption> with <title> inside {self.xml}. Consult SPS documentation for more detail.'

return build_response(
title="label",
title="label or caption",
parent={
"parent": self.data.get("parent"),
"parent_id": self.data.get("parent_id"),
"parent_article_type": self.data.get("parent_article_type"),
"parent_lang": self.data.get("parent_lang"),
},
item="table-wrap",
sub_item="label",
sub_item="label or caption",
validation_type="exist",
is_valid=is_valid,
expected="label",
obtained=label,
expected="<label> or <caption> with <title>",
obtained=obtained_str,
advice=advice,
data=self.data,
error_level=self.rules["label_error_level"],
advice_text='Wrap each label with <label> inside {xml}. Consult SPS documentation for more detail.',
advice_params={"xml": self.xml},
)

def validate_caption(self):
"""
Validates the presence of caption in the <table-wrap>.

Returns:
The validation result in the expected format.
"""
caption = self.data.get("caption")
is_valid = bool(caption)
table_id = self.data.get("table_wrap_id")
return build_response(
title="caption",
parent={
"parent": self.data.get("parent"),
"parent_id": self.data.get("parent_id"),
"parent_article_type": self.data.get("parent_article_type"),
"parent_lang": self.data.get("parent_lang"),
},
item="table-wrap",
sub_item="caption",
validation_type="exist",
is_valid=is_valid,
expected="caption",
obtained=caption,
advice=f'Wrap each caption with <caption> inside {self.xml}. Consult SPS documentation for more detail.',
data=self.data,
error_level=self.rules["caption_error_level"],
advice_text='Wrap each caption with <caption> inside {xml}. Consult SPS documentation for more detail.',
error_level=self.rules["label_or_caption_error_level"],
advice_text='Add <label> or <caption> with <title> inside {xml}. Consult SPS documentation for more detail.',
advice_params={"xml": self.xml},
)

Expand All @@ -201,7 +185,6 @@ def validate_table(self):
"""
table = self.data.get("table")
is_valid = bool(table)
table_id = self.data.get("table_wrap_id")
return build_response(
title="table",
parent={
Expand Down Expand Up @@ -230,11 +213,9 @@ def validate_alternatives(self):
Returns:
The validation result in the expected format.
"""
graphic = 1 if self.data.get("graphic") else 0 # self.data.get("graphic") retorna uma referência para uma representação da tabela, se houver
table = 1 if self.data.get("table") else 0 # self.data.get("table") retorna uma codificação de tabela, se houver
alternatives = self.data.get("alternative_elements") # uma lista com as tags internas à <alternatives>

table_id = self.data.get("table_wrap_id")
graphic = 1 if self.data.get("graphic") else 0
table = 1 if self.data.get("table") else 0
alternatives = self.data.get("alternative_elements")

if graphic + table > 1 and len(alternatives) == 0:
expected = "alternatives"
Expand Down Expand Up @@ -278,3 +259,132 @@ def validate_alternatives(self):
advice_text=advice_text,
advice_params=advice_params,
)

def validate_tr_not_in_table(self):
"""
Validates that <tr> is not a direct child of <table>.
The first level of <table> must not contain <tr> (NISO JATS model).

Returns:
The validation result in the expected format, or None if no <table> exists.
"""
if not self.data.get("table"):
return None
has_tr = self.data.get("has_tr_in_table")
is_valid = not has_tr
return build_response(
title="table structure",
parent={
"parent": self.data.get("parent"),
"parent_id": self.data.get("parent_id"),
"parent_article_type": self.data.get("parent_article_type"),
"parent_lang": self.data.get("parent_lang"),
},
item="table-wrap",
sub_item="table/tr",
validation_type="exist",
is_valid=is_valid,
expected="<tr> must not be a direct child of <table>",
obtained="<table><tr>..." if has_tr else "<tr> not found as direct child of <table>",
advice=f'Remove <tr> as a direct child of <table> in {self.xml}. Use <thead> or <tbody> to wrap <tr> elements.',
data=self.data,
error_level=self.rules["tr_in_table_error_level"],
advice_text='Remove <tr> as a direct child of <table> in {xml}. Use <thead> or <tbody> to wrap <tr> elements.',
advice_params={"xml": self.xml},
)

def validate_th_in_thead(self):
"""
Validates that <th> only appears as a descendant of <thead>.

Returns:
The validation result in the expected format, or None if no <table> exists.
"""
if not self.data.get("table"):
return None
has_th_outside = self.data.get("has_th_outside_thead")
is_valid = not has_th_outside
return build_response(
title="th in thead",
parent={
"parent": self.data.get("parent"),
"parent_id": self.data.get("parent_id"),
"parent_article_type": self.data.get("parent_article_type"),
"parent_lang": self.data.get("parent_lang"),
},
item="table-wrap",
sub_item="th",
validation_type="exist",
is_valid=is_valid,
expected="<th> only as descendant of <thead>",
obtained="<th> found outside <thead>" if has_th_outside else "<th> only in <thead>",
advice=f'Move <th> elements to be inside <thead> in {self.xml}. Consult SPS documentation for more detail.',
data=self.data,
error_level=self.rules["th_in_thead_error_level"],
advice_text='Move <th> elements to be inside <thead> in {xml}. Consult SPS documentation for more detail.',
advice_params={"xml": self.xml},
)

def validate_td_in_tbody(self):
"""
Validates that <td> only appears as a descendant of <tbody>.

Returns:
The validation result in the expected format, or None if no <table> exists.
"""
if not self.data.get("table"):
return None
has_td_outside = self.data.get("has_td_outside_tbody")
is_valid = not has_td_outside
return build_response(
title="td in tbody",
parent={
"parent": self.data.get("parent"),
"parent_id": self.data.get("parent_id"),
"parent_article_type": self.data.get("parent_article_type"),
"parent_lang": self.data.get("parent_lang"),
},
item="table-wrap",
sub_item="td",
validation_type="exist",
is_valid=is_valid,
expected="<td> only as descendant of <tbody>",
obtained="<td> found outside <tbody>" if has_td_outside else "<td> only in <tbody>",
advice=f'Move <td> elements to be inside <tbody> in {self.xml}. Consult SPS documentation for more detail.',
data=self.data,
error_level=self.rules["td_in_tbody_error_level"],
advice_text='Move <td> elements to be inside <tbody> in {xml}. Consult SPS documentation for more detail.',
advice_params={"xml": self.xml},
)

def validate_tbody(self):
"""
Validates the presence of <tbody> in <table>.

Returns:
The validation result in the expected format, or None if no <table> exists.
"""
if not self.data.get("table"):
return None
has_tbody = self.data.get("has_tbody")
is_valid = bool(has_tbody)
return build_response(
title="tbody",
parent={
"parent": self.data.get("parent"),
"parent_id": self.data.get("parent_id"),
"parent_article_type": self.data.get("parent_article_type"),
"parent_lang": self.data.get("parent_lang"),
},
item="table-wrap",
sub_item="tbody",
validation_type="exist",
is_valid=is_valid,
expected="<tbody> element in <table>",
obtained="<tbody>" if has_tbody else None,
advice=f'Add <tbody> inside <table> in {self.xml}. Consult SPS documentation for more detail.',
data=self.data,
error_level=self.rules["tbody_error_level"],
advice_text='Add <tbody> inside <table> in {xml}. Consult SPS documentation for more detail.',
advice_params={"xml": self.xml},
)
7 changes: 6 additions & 1 deletion packtools/sps/validation_rules/tablewrap_rules.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,14 @@
"table_wrap_rules": {
"absent_error_level": "WARNING",
"id_error_level": "CRITICAL",
"label_or_caption_error_level": "CRITICAL",
"label_error_level": "CRITICAL",
"caption_error_level": "CRITICAL",
"table_error_level": "CRITICAL",
"alternatives_error_level": "CRITICAL"
"alternatives_error_level": "CRITICAL",
"tr_in_table_error_level": "ERROR",
"th_in_thead_error_level": "ERROR",
"td_in_tbody_error_level": "ERROR",
"tbody_error_level": "WARNING"
}
}
Loading