diff --git a/articlemeta/export_crossref.py b/articlemeta/export_crossref.py index 9bb7eb6..a965ba1 100644 --- a/articlemeta/export_crossref.py +++ b/articlemeta/export_crossref.py @@ -1134,6 +1134,36 @@ def transform(self, data): data = self._transform_translations(data) return data + @staticmethod + def _get_preprint_relations(raw): + """Return the list of related-article entries marked as preprint. + + SciELO stores related-article info in ISIS field ``v241``, with + subfields ``i`` (identifier/href), ``t`` (related-article-type) and + ``n`` (ext-link-type). Only entries whose type is ``preprint`` and + whose link type is ``doi`` (or unspecified) carry a usable DOI for + the Crossref ``hasPreprint`` relation. + """ + try: + related = raw.data['article'].get('v241') or [] + except (AttributeError, KeyError, TypeError): + return [] + + preprints = [] + for item in related: + if not isinstance(item, dict): + continue + if item.get('t') != 'preprint': + continue + identifier = item.get('i') or item.get('_') + if not identifier: + continue + ext_link_type = item.get('n') + if ext_link_type and ext_link_type != 'doi': + continue + preprints.append(identifier) + return preprints + def _transform_original(self, data): raw, xml = data @@ -1168,6 +1198,19 @@ def _transform_original(self, data): program_node.append(related_item_node) + # program/related_item (hasPreprint) + for preprint_doi in self._get_preprint_relations(raw): + related_item_node = ET.Element('related_item') + + intra_work_relation_node = ET.Element('intra_work_relation') + intra_work_relation_node.set( + 'relationship-type', 'hasPreprint') + intra_work_relation_node.set('identifier-type', 'doi') + intra_work_relation_node.text = preprint_doi + related_item_node.append(intra_work_relation_node) + + program_node.append(related_item_node) + journal_article_node.append(program_node) return data diff --git a/tests/test_export_crossref.py b/tests/test_export_crossref.py index 005b62a..67e47ae 100644 --- a/tests/test_export_crossref.py +++ b/tests/test_export_crossref.py @@ -1342,6 +1342,83 @@ def test_related_item_for_multilingue_document(self): content[3], intra_work_relation.attrib.get('relationship-type')) + def test_related_item_includes_has_preprint_relation(self): + self._article.data['article']['v241'] = [ + { + 'i': '10.1590/SciELOPreprints.9348', + 't': 'preprint', + 'n': 'doi', + } + ] + xmlcrossref = create_xmlcrossref_with_n_journal_article_element( + ['pt', 'en', 'es']) + + data = [self._article, xmlcrossref] + xmlcrossref = export_crossref.XMLProgramRelatedItemPipe() + raw, xml = xmlcrossref.transform(data) + + # main journal_article keeps its translations and gains the preprint + main_program = xml.findall('.//journal_article')[0].find('program') + relations = main_program.findall('related_item/intra_work_relation') + relation_types = [r.attrib.get('relationship-type') for r in relations] + + self.assertIn('hasPreprint', relation_types) + preprint_node = next( + r for r in relations + if r.attrib.get('relationship-type') == 'hasPreprint' + ) + self.assertEqual('doi', preprint_node.attrib.get('identifier-type')) + self.assertEqual('10.1590/SciELOPreprints.9348', preprint_node.text) + + # translations should not get the hasPreprint relation + for journal_article in xml.findall('.//journal_article')[1:]: + translation_relations = journal_article.findall( + 'program/related_item/intra_work_relation') + translation_types = [ + r.attrib.get('relationship-type') + for r in translation_relations + ] + self.assertNotIn('hasPreprint', translation_types) + + def test_related_item_without_preprint_does_not_emit_has_preprint(self): + # ensure no v241 entries -> no hasPreprint relation is emitted + self._article.data['article'].pop('v241', None) + xmlcrossref = create_xmlcrossref_with_n_journal_article_element( + ['pt', 'en', 'es']) + + data = [self._article, xmlcrossref] + xmlcrossref = export_crossref.XMLProgramRelatedItemPipe() + raw, xml = xmlcrossref.transform(data) + + relation_types = [ + r.attrib.get('relationship-type') + for r in xml.findall( + './/program/related_item/intra_work_relation') + ] + self.assertNotIn('hasPreprint', relation_types) + + def test_related_item_ignores_non_preprint_related_articles(self): + self._article.data['article']['v241'] = [ + { + 'i': '10.1590/some-other.1234', + 't': 'commentary', + 'n': 'doi', + } + ] + xmlcrossref = create_xmlcrossref_with_n_journal_article_element( + ['pt', 'en', 'es']) + + data = [self._article, xmlcrossref] + xmlcrossref = export_crossref.XMLProgramRelatedItemPipe() + raw, xml = xmlcrossref.transform(data) + + relation_types = [ + r.attrib.get('relationship-type') + for r in xml.findall( + './/program/related_item/intra_work_relation') + ] + self.assertNotIn('hasPreprint', relation_types) + def test_collection_for_multilingue_document(self): xmlcrossref = create_xmlcrossref_with_n_journal_article_element( ['pt', 'en', 'es'], 'doi_data')