Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions articlemeta/export_crossref.py
Original file line number Diff line number Diff line change
Expand Up @@ -1134,6 +1134,36 @@ def transform(self, data):
data = self._transform_translations(data)
return data

@staticmethod
def _get_preprint_relations(raw):
"""Return the list of related-article entries marked as preprint.

SciELO stores related-article info in ISIS field ``v241``, with
subfields ``i`` (identifier/href), ``t`` (related-article-type) and
``n`` (ext-link-type). Only entries whose type is ``preprint`` and
whose link type is ``doi`` (or unspecified) carry a usable DOI for
the Crossref ``hasPreprint`` relation.
"""
try:
related = raw.data['article'].get('v241') or []
except (AttributeError, KeyError, TypeError):
return []

preprints = []
for item in related:
if not isinstance(item, dict):
continue
if item.get('t') != 'preprint':
continue
identifier = item.get('i') or item.get('_')
if not identifier:
continue
ext_link_type = item.get('n')
if ext_link_type and ext_link_type != 'doi':
continue
preprints.append(identifier)
return preprints

def _transform_original(self, data):
raw, xml = data

Expand Down Expand Up @@ -1168,6 +1198,19 @@ def _transform_original(self, data):

program_node.append(related_item_node)

# program/related_item (hasPreprint)
for preprint_doi in self._get_preprint_relations(raw):
related_item_node = ET.Element('related_item')

intra_work_relation_node = ET.Element('intra_work_relation')
intra_work_relation_node.set(
'relationship-type', 'hasPreprint')
intra_work_relation_node.set('identifier-type', 'doi')
intra_work_relation_node.text = preprint_doi
related_item_node.append(intra_work_relation_node)

program_node.append(related_item_node)

journal_article_node.append(program_node)

return data
Expand Down
77 changes: 77 additions & 0 deletions tests/test_export_crossref.py
Original file line number Diff line number Diff line change
Expand Up @@ -1342,6 +1342,83 @@ def test_related_item_for_multilingue_document(self):
content[3],
intra_work_relation.attrib.get('relationship-type'))

def test_related_item_includes_has_preprint_relation(self):
self._article.data['article']['v241'] = [
{
'i': '10.1590/SciELOPreprints.9348',
't': 'preprint',
'n': 'doi',
}
]
xmlcrossref = create_xmlcrossref_with_n_journal_article_element(
['pt', 'en', 'es'])

data = [self._article, xmlcrossref]
xmlcrossref = export_crossref.XMLProgramRelatedItemPipe()
raw, xml = xmlcrossref.transform(data)

# main journal_article keeps its translations and gains the preprint
main_program = xml.findall('.//journal_article')[0].find('program')
relations = main_program.findall('related_item/intra_work_relation')
relation_types = [r.attrib.get('relationship-type') for r in relations]

self.assertIn('hasPreprint', relation_types)
preprint_node = next(
r for r in relations
if r.attrib.get('relationship-type') == 'hasPreprint'
)
self.assertEqual('doi', preprint_node.attrib.get('identifier-type'))
self.assertEqual('10.1590/SciELOPreprints.9348', preprint_node.text)

# translations should not get the hasPreprint relation
for journal_article in xml.findall('.//journal_article')[1:]:
translation_relations = journal_article.findall(
'program/related_item/intra_work_relation')
translation_types = [
r.attrib.get('relationship-type')
for r in translation_relations
]
self.assertNotIn('hasPreprint', translation_types)

def test_related_item_without_preprint_does_not_emit_has_preprint(self):
# ensure no v241 entries -> no hasPreprint relation is emitted
self._article.data['article'].pop('v241', None)
xmlcrossref = create_xmlcrossref_with_n_journal_article_element(
['pt', 'en', 'es'])

data = [self._article, xmlcrossref]
xmlcrossref = export_crossref.XMLProgramRelatedItemPipe()
raw, xml = xmlcrossref.transform(data)

relation_types = [
r.attrib.get('relationship-type')
for r in xml.findall(
'.//program/related_item/intra_work_relation')
]
self.assertNotIn('hasPreprint', relation_types)

def test_related_item_ignores_non_preprint_related_articles(self):
self._article.data['article']['v241'] = [
{
'i': '10.1590/some-other.1234',
't': 'commentary',
'n': 'doi',
}
]
xmlcrossref = create_xmlcrossref_with_n_journal_article_element(
['pt', 'en', 'es'])

data = [self._article, xmlcrossref]
xmlcrossref = export_crossref.XMLProgramRelatedItemPipe()
raw, xml = xmlcrossref.transform(data)

relation_types = [
r.attrib.get('relationship-type')
for r in xml.findall(
'.//program/related_item/intra_work_relation')
]
self.assertNotIn('hasPreprint', relation_types)

def test_collection_for_multilingue_document(self):
xmlcrossref = create_xmlcrossref_with_n_journal_article_element(
['pt', 'en', 'es'], 'doi_data')
Expand Down