diff --git a/MSMetaEnhancer/app.py b/MSMetaEnhancer/app.py
index d8e0a47..db2c226 100644
--- a/MSMetaEnhancer/app.py
+++ b/MSMetaEnhancer/app.py
@@ -14,7 +14,7 @@
class Application:
- def __init__(self, log_level='info', log_file=None):
+ def __init__(self, log_level="info", log_file=None):
self.data = None
logger.setup(log_level, log_file)
@@ -25,12 +25,12 @@ def load_data(self, filename, file_format):
:param filename: path to source spectra file
:param file_format: format of spectra
"""
- if file_format in ['msp', 'mgf', 'json']:
+ if file_format in ["msp", "mgf", "json"]:
self.data = Spectra()
- elif file_format in ['csv', 'tsv', 'tabular', 'xlsx']:
+ elif file_format in ["csv", "tsv", "tabular", "xlsx"]:
self.data = DataFrame()
else:
- raise UnknownFileFormat(f'Format {file_format} not supported.')
+ raise UnknownFileFormat(f"Format {file_format} not supported.")
self.data.load_data(filename, file_format)
def save_data(self, filename, file_format):
@@ -51,12 +51,14 @@ def curate_metadata(self):
curated_metadata = Curator().curate_metadata(self.data.get_metadata())
self.data.fuse_metadata(curated_metadata)
- async def annotate_spectra(self,
- converters,
- jobs=None,
- repeat: bool = False,
- monitor: Monitor = Monitor(),
- annotator: Annotator = Annotator()):
+ async def annotate_spectra(
+ self,
+ converters,
+ jobs=None,
+ repeat: bool = False,
+ monitor: Monitor = Monitor(),
+ annotator: Annotator = Annotator(),
+ ):
"""
Annotates current Spectra data by specified jobs.
@@ -72,9 +74,11 @@ async def annotate_spectra(self,
async with aiohttp.ClientSession() as session:
builder = ConverterBuilder()
builder.validate_converters(converters)
- converters, web_converters = builder.build_converters(session, converters)
+ compute_converters, web_converters = builder.build_converters(
+ session, converters
+ )
- annotator.set_converters(converters)
+ annotator.set_converters(compute_converters | web_converters)
monitor.set_converters(web_converters)
# start converters status checker and wait for first status
@@ -86,7 +90,7 @@ async def annotate_spectra(self,
if not jobs:
jobs = []
converter: Converter
- for converter in converters.values():
+ for converter in annotator.converters.values():
jobs += converter.get_conversion_functions()
jobs = convert_to_jobs(jobs)
@@ -94,8 +98,12 @@ async def annotate_spectra(self,
logger.set_target_attributes(jobs, len(metadata_list))
- results = await asyncio.gather(*[annotator.annotate(metadata, jobs, repeat)
- for metadata in metadata_list])
+ results = await asyncio.gather(
+ *[
+ annotator.annotate(metadata, jobs, repeat)
+ for metadata in metadata_list
+ ]
+ )
finally:
monitor.join()
diff --git a/MSMetaEnhancer/libs/Annotator.py b/MSMetaEnhancer/libs/Annotator.py
index b1541ef..6d6cf83 100644
--- a/MSMetaEnhancer/libs/Annotator.py
+++ b/MSMetaEnhancer/libs/Annotator.py
@@ -2,8 +2,13 @@
from MSMetaEnhancer.libs.Curator import Curator
from MSMetaEnhancer.libs.utils import logger
-from MSMetaEnhancer.libs.utils.Errors import TargetAttributeNotRetrieved, SourceAttributeNotAvailable, \
- ServiceNotAvailable, UnknownResponse, DataAlreadyPresent
+from MSMetaEnhancer.libs.utils.Errors import (
+ TargetAttributeNotRetrieved,
+ SourceAttributeNotAvailable,
+ ServiceNotAvailable,
+ UnknownResponse,
+ DataAlreadyPresent,
+)
from MSMetaEnhancer.libs.utils.Logger import LogRecord
@@ -11,6 +16,7 @@ class Annotator:
"""
Annotator is responsible for annotation process of single spectra.
"""
+
def __init__(self):
self.converters = dict()
self.curator = Curator()
@@ -41,17 +47,28 @@ async def annotate(self, metadata, jobs, repeat=False):
for job in jobs:
if job.target not in metadata:
try:
- metadata, cache = await self.execute_job_with_cache(job, metadata, cache, log)
+ metadata, cache = await self.execute_job_with_cache(
+ job, metadata, cache, log
+ )
if repeat:
added_metadata = True
- except (SourceAttributeNotAvailable, TargetAttributeNotRetrieved) as exc:
+ except (
+ SourceAttributeNotAvailable,
+ TargetAttributeNotRetrieved,
+ ) as exc:
log.update(exc, job, level=3)
except (ServiceNotAvailable, UnknownResponse) as exc:
log.update(exc, job, level=2)
except Exception:
log.update(Exception(traceback.format_exc()), job, level=1)
else:
- log.update(DataAlreadyPresent(f'Requested attribute {job.target} already present.'), job, level=2)
+ log.update(
+ DataAlreadyPresent(
+ f"Requested attribute {job.target} already present."
+ ),
+ job,
+ level=2,
+ )
logger.add_logs(log)
logger.add_coverage_after(metadata.keys())
@@ -85,7 +102,7 @@ async def execute_job_with_cache(self, job, metadata, cache, warning):
if job.target in cache[job.converter]:
metadata[job.target] = cache[job.converter][job.target]
else:
- raise TargetAttributeNotRetrieved('No data retrieved.')
+ raise TargetAttributeNotRetrieved("No data retrieved.")
else:
- raise ServiceNotAvailable(f'Service {job.converter} not available.')
+ raise ServiceNotAvailable(f"Service {job.converter} not available.")
return metadata, cache
diff --git a/MSMetaEnhancer/libs/Converter.py b/MSMetaEnhancer/libs/Converter.py
index 4e70cb9..961271f 100644
--- a/MSMetaEnhancer/libs/Converter.py
+++ b/MSMetaEnhancer/libs/Converter.py
@@ -5,6 +5,7 @@ class Converter(ABC):
"""
General class for conversions.
"""
+
def __init__(self):
self.is_available = True
@@ -46,13 +47,15 @@ def get_conversion_functions(self) -> list:
:return: a list of available conversion functions
"""
available_conversions = []
- methods = [method_name for method_name in dir(self) if '_to_' in method_name]
+ methods = [method_name for method_name in dir(self) if "_to_" in method_name]
for method in methods:
- available_conversions.append((*method.split('_to_'), self.converter_name))
+ available_conversions.append((*method.split("_to_"), self.converter_name))
return available_conversions
-def create_top_level_method(obj: Converter, source: str, target: str, method: str, asynch: bool = True):
+def create_top_level_method(
+ obj: Converter, source: str, target: str, method: str, asynch: bool = True
+):
"""
Assign a new method to {obj} called {source}_to_{target} which calls {method}.
@@ -62,14 +65,15 @@ def create_top_level_method(obj: Converter, source: str, target: str, method: st
:param method: method which is called in the object with single argument
:param asynch: whether to create asynchronous methods
"""
+
async def async_conversion(key):
return await getattr(obj, str(method))(key)
def sync_conversion(key):
return getattr(obj, str(method))(key)
- doc = f'Convert {source} to {target} using {obj.__class__.__name__} converter'
- name = f'{source}_to_{target}'
+ doc = f"Convert {source} to {target} using {obj.__class__.__name__} converter"
+ name = f"{source}_to_{target}"
if asynch:
async_conversion.__doc__ = doc
diff --git a/MSMetaEnhancer/libs/Curator.py b/MSMetaEnhancer/libs/Curator.py
index 7c833f8..358f1ac 100644
--- a/MSMetaEnhancer/libs/Curator.py
+++ b/MSMetaEnhancer/libs/Curator.py
@@ -1,5 +1,7 @@
from matchms.filtering.filter_utils.smile_inchi_inchikey_conversions import (
- is_valid_smiles, is_valid_inchi, is_valid_inchikey
+ is_valid_smiles,
+ is_valid_inchi,
+ is_valid_inchikey,
)
from MSMetaEnhancer.libs.utils.Errors import InvalidAttributeFormat
@@ -10,7 +12,7 @@
inchikey = "VNWKTOKETHGBQD-UHFFFAOYSA-N"
print(is_valid_smiles(smiles)) # True if valid SMILES
-print(is_valid_inchi(inchi)) # True if valid InChI
+print(is_valid_inchi(inchi)) # True if valid InChI
print(is_valid_inchikey(inchikey)) # True if valid InChIKey
@@ -21,6 +23,7 @@ class Curator:
Additionally, it supports metadata validation to make sure the produced data are correct.
"""
+
def curate_metadata(self, metadata_list):
"""
Iterates over given metadata and curates individual entries.
@@ -40,8 +43,8 @@ def curate_casno(self, metadata):
:param metadata: given metadata
:return: curated metadata
"""
- if 'casno' in metadata:
- metadata['casno'] = self.fix_cas_number(metadata['casno'])
+ if "casno" in metadata:
+ metadata["casno"] = self.fix_cas_number(metadata["casno"])
return metadata
@staticmethod
@@ -54,7 +57,7 @@ def fix_cas_number(cas_number):
"""
if isinstance(cas_number, str):
if "-" not in cas_number:
- return f'{cas_number[:-3]}-{cas_number[-3:-1]}-{cas_number[-1]}'
+ return f"{cas_number[:-3]}-{cas_number[-3:-1]}-{cas_number[-1]}"
return cas_number
@staticmethod
@@ -68,20 +71,26 @@ def filter_invalid_metadata(metadata, log, job):
:return: only valid metadata
"""
filters = {
- 'smiles': is_valid_smiles,
- 'canonical_smiles': is_valid_smiles,
- 'isomeric_smiles': is_valid_smiles,
- 'inchi': is_valid_inchi,
- 'inchikey': is_valid_inchikey
+ "smiles": is_valid_smiles,
+ "canonical_smiles": is_valid_smiles,
+ "isomeric_smiles": is_valid_smiles,
+ "inchi": is_valid_inchi,
+ "inchikey": is_valid_inchikey,
}
valid_metadata = {}
- for (attribute, value) in metadata.items():
+ for attribute, value in metadata.items():
if attribute in filters.keys():
if filters[attribute](value):
valid_metadata[attribute] = value
else:
- log.update(InvalidAttributeFormat(f'Obtained {attribute} in invalid format: {value}'), job, level=2)
+ log.update(
+ InvalidAttributeFormat(
+ f"Obtained {attribute} in invalid format: {value}"
+ ),
+ job,
+ level=2,
+ )
else:
valid_metadata[attribute] = value
return valid_metadata
diff --git a/MSMetaEnhancer/libs/converters/compute/ComputeConverter.py b/MSMetaEnhancer/libs/converters/compute/ComputeConverter.py
index fb15c22..9df0c2e 100644
--- a/MSMetaEnhancer/libs/converters/compute/ComputeConverter.py
+++ b/MSMetaEnhancer/libs/converters/compute/ComputeConverter.py
@@ -5,5 +5,6 @@ class ComputeConverter(Converter):
"""
General class for computation conversion.
"""
+
async def convert(self, source, target, data):
- return getattr(self, f'{source}_to_{target}')(data)
+ return getattr(self, f"{source}_to_{target}")(data)
diff --git a/MSMetaEnhancer/libs/converters/compute/RDKit.py b/MSMetaEnhancer/libs/converters/compute/RDKit.py
index 1762a02..e91bd50 100644
--- a/MSMetaEnhancer/libs/converters/compute/RDKit.py
+++ b/MSMetaEnhancer/libs/converters/compute/RDKit.py
@@ -13,12 +13,15 @@ class RDKit(ComputeConverter):
"""
RDKit is a collection of chemo-informatics and machine-learning software.
"""
+
def __init__(self):
super().__init__()
# generate top level methods defining allowed conversions
- conversions = [('smiles', 'mw', 'from_smiles'),
- ('canonical_smiles', 'mw', 'from_smiles'),
- ('isomeric_smiles', 'mw', 'from_smiles')]
+ conversions = [
+ ("smiles", "mw", "from_smiles"),
+ ("canonical_smiles", "mw", "from_smiles"),
+ ("isomeric_smiles", "mw", "from_smiles"),
+ ]
self.create_top_level_conversion_methods(conversions, asynch=False)
def from_smiles(self, smiles):
@@ -29,7 +32,7 @@ def from_smiles(self, smiles):
:return: computed molecular weight
"""
weight = ExactMolWt(MolFromSmiles(smiles))
- return {'mw': weight}
+ return {"mw": weight}
def inchi_to_canonical_smiles(self, inchi):
"""
@@ -39,7 +42,7 @@ def inchi_to_canonical_smiles(self, inchi):
:return: computed canonical SMILES
"""
smiles = MolToSmiles(MolFromInchi(inchi), isomericSmiles=False)
- return {'canonical_smiles': smiles}
+ return {"canonical_smiles": smiles}
def inchi_to_isomeric_smiles(self, inchi):
"""
@@ -49,7 +52,7 @@ def inchi_to_isomeric_smiles(self, inchi):
:return: computed isomeric SMILES
"""
smiles = MolToSmiles(MolFromInchi(inchi))
- return {'isomeric_smiles': smiles}
+ return {"isomeric_smiles": smiles}
def formula_to_mw(self, formula):
"""
@@ -66,9 +69,13 @@ def formula_to_mw(self, formula):
continue
atom = Atom(parts[index])
- multiplier = int(parts[index + 1]) if len(parts) > index + 1 and parts[index + 1].isnumeric() else 1
+ multiplier = (
+ int(parts[index + 1])
+ if len(parts) > index + 1 and parts[index + 1].isnumeric()
+ else 1
+ )
mass += atom.GetMass() * multiplier
- return {'mw': mass}
+ return {"mw": mass}
def smiles_to_formula(self, smiles: str) -> dict:
"""
@@ -79,11 +86,11 @@ def smiles_to_formula(self, smiles: str) -> dict:
"""
mol = MolFromSmiles(smiles)
if mol is None:
- return {'formula': ''}
+ return {"formula": ""}
formula = CalcMolFormula(mol)
- return {'formula': formula}
+ return {"formula": formula}
def inchi_to_formula(self, inchi: str) -> dict:
"""
@@ -94,6 +101,6 @@ def inchi_to_formula(self, inchi: str) -> dict:
"""
mol = MolFromInchi(inchi)
if mol is None:
- return {'formula': ''}
+ return {"formula": ""}
formula = CalcMolFormula(mol)
- return {'formula': formula}
+ return {"formula": formula}
diff --git a/MSMetaEnhancer/libs/converters/compute/__init__.py b/MSMetaEnhancer/libs/converters/compute/__init__.py
index defa371..acc60d5 100644
--- a/MSMetaEnhancer/libs/converters/compute/__init__.py
+++ b/MSMetaEnhancer/libs/converters/compute/__init__.py
@@ -1,3 +1,3 @@
from MSMetaEnhancer.libs.converters.compute.RDKit import RDKit
-__all__ = ['RDKit']
+__all__ = ["RDKit"]
diff --git a/MSMetaEnhancer/libs/converters/web/BridgeDb.py b/MSMetaEnhancer/libs/converters/web/BridgeDb.py
index 6126917..e0e9b68 100644
--- a/MSMetaEnhancer/libs/converters/web/BridgeDb.py
+++ b/MSMetaEnhancer/libs/converters/web/BridgeDb.py
@@ -8,48 +8,62 @@ class BridgeDb(WebConverter):
More info about the available conversions: https://bridgedb.github.io/
"""
+
def __init__(self, session):
super().__init__(session)
# service URLs
- self.endpoints = {'BridgeDb': 'https://webservice.bridgedb.org/Human/xrefs/'}
-
- self.codes = {'hmdbid': 'Ch', 'pubchemid': 'Cpc', 'chemspiderid': 'Cs', 'wikidataid': 'Wd', 'chebiid': 'Ce',
- 'keggid': 'Ck'}
- self.identifiers = {'PubChem-compound': 'pubchemid', 'Chemspider': 'chemspiderid', 'ChEBI': 'chebiid',
- 'HMDB': 'hmdbid', 'Wikidata': 'wikidataid', 'KEGG Compound': 'keggid'}
+ self.endpoints = {"BridgeDb": "https://webservice.bridgedb.org/Human/xrefs/"}
+
+ self.codes = {
+ "hmdbid": "Ch",
+ "pubchemid": "Cpc",
+ "chemspiderid": "Cs",
+ "wikidataid": "Wd",
+ "chebiid": "Ce",
+ "keggid": "Ck",
+ }
+ self.identifiers = {
+ "PubChem-compound": "pubchemid",
+ "Chemspider": "chemspiderid",
+ "ChEBI": "chebiid",
+ "HMDB": "hmdbid",
+ "Wikidata": "wikidataid",
+ "KEGG Compound": "keggid",
+ }
# generate top level methods defining allowed conversions
- conversions = [('hmdbid', 'pubchemid', 'from_hmdbid'),
- ('hmdbid', 'chemspiderid', 'from_hmdbid'),
- ('hmdbid', 'wikidataid', 'from_hmdbid'),
- ('hmdbid', 'chebiid', 'from_hmdbid'),
- ('hmdbid', 'keggid', 'from_hmdbid'),
- ('pubchemid', 'hmdbid', 'from_pubchemid'),
- ('pubchemid', 'chemspiderid', 'from_pubchemid'),
- ('pubchemid', 'wikidataid', 'from_pubchemid'),
- ('pubchemid', 'chebiid', 'from_pubchemid'),
- ('pubchemid', 'keggid', 'from_pubchemid'),
- ('chemspiderid', 'hmdbid', 'from_chemspiderid'),
- ('chemspiderid', 'pubchemid', 'from_chemspiderid'),
- ('chemspiderid', 'wikidataid', 'from_chemspiderid'),
- ('chemspiderid', 'chebiid', 'from_chemspiderid'),
- ('chemspiderid', 'keggid', 'from_chemspiderid'),
- ('wikidataid', 'hmdbid', 'from_wikidataid'),
- ('wikidataid', 'pubchemid', 'from_wikidataid'),
- ('wikidataid', 'chemspiderid', 'from_wikidataid'),
- ('wikidataid', 'chebiid', 'from_wikidataid'),
- ('wikidataid', 'keggid', 'from_wikidataid'),
- ('chebiid', 'hmdbid', 'from_chebiid'),
- ('chebiid', 'pubchemid', 'from_chebiid'),
- ('chebiid', 'chemspiderid', 'from_chebiid'),
- ('chebiid', 'wikidataid', 'from_chebiid'),
- ('chebiid', 'keggid', 'from_chebiid'),
- ('keggid', 'hmdbid', 'from_keggid'),
- ('keggid', 'pubchemid', 'from_keggid'),
- ('keggid', 'chemspiderid', 'from_keggid'),
- ('keggid', 'wikidataid', 'from_keggid'),
- ('keggid', 'chebiid', 'from_keggid'),
- ]
+ conversions = [
+ ("hmdbid", "pubchemid", "from_hmdbid"),
+ ("hmdbid", "chemspiderid", "from_hmdbid"),
+ ("hmdbid", "wikidataid", "from_hmdbid"),
+ ("hmdbid", "chebiid", "from_hmdbid"),
+ ("hmdbid", "keggid", "from_hmdbid"),
+ ("pubchemid", "hmdbid", "from_pubchemid"),
+ ("pubchemid", "chemspiderid", "from_pubchemid"),
+ ("pubchemid", "wikidataid", "from_pubchemid"),
+ ("pubchemid", "chebiid", "from_pubchemid"),
+ ("pubchemid", "keggid", "from_pubchemid"),
+ ("chemspiderid", "hmdbid", "from_chemspiderid"),
+ ("chemspiderid", "pubchemid", "from_chemspiderid"),
+ ("chemspiderid", "wikidataid", "from_chemspiderid"),
+ ("chemspiderid", "chebiid", "from_chemspiderid"),
+ ("chemspiderid", "keggid", "from_chemspiderid"),
+ ("wikidataid", "hmdbid", "from_wikidataid"),
+ ("wikidataid", "pubchemid", "from_wikidataid"),
+ ("wikidataid", "chemspiderid", "from_wikidataid"),
+ ("wikidataid", "chebiid", "from_wikidataid"),
+ ("wikidataid", "keggid", "from_wikidataid"),
+ ("chebiid", "hmdbid", "from_chebiid"),
+ ("chebiid", "pubchemid", "from_chebiid"),
+ ("chebiid", "chemspiderid", "from_chebiid"),
+ ("chebiid", "wikidataid", "from_chebiid"),
+ ("chebiid", "keggid", "from_chebiid"),
+ ("keggid", "hmdbid", "from_keggid"),
+ ("keggid", "pubchemid", "from_keggid"),
+ ("keggid", "chemspiderid", "from_keggid"),
+ ("keggid", "wikidataid", "from_keggid"),
+ ("keggid", "chebiid", "from_keggid"),
+ ]
self.create_top_level_conversion_methods(conversions)
async def from_hmdbid(self, hmdbid):
@@ -59,7 +73,7 @@ async def from_hmdbid(self, hmdbid):
:param hmdbid: given HMDB ID number
:return: obtained IDs
"""
- args = f'{self.codes["hmdbid"]}/{hmdbid}'
+ args = f"{self.codes['hmdbid']}/{hmdbid}"
return await self.call_service(args)
async def from_pubchemid(self, pubchemid):
@@ -69,7 +83,7 @@ async def from_pubchemid(self, pubchemid):
:param pubchemid: given PubChem ID number
:return: obtained IDs
"""
- args = f'{self.codes["pubchemid"]}/{pubchemid}'
+ args = f"{self.codes['pubchemid']}/{pubchemid}"
return await self.call_service(args)
async def from_chemspiderid(self, chemspiderid):
@@ -79,7 +93,7 @@ async def from_chemspiderid(self, chemspiderid):
:param chemspiderid: given ChemSpider ID number
:return: obtained IDs
"""
- args = f'{self.codes["chemspiderid"]}/{chemspiderid}'
+ args = f"{self.codes['chemspiderid']}/{chemspiderid}"
return await self.call_service(args)
async def from_wikidataid(self, wikidataid):
@@ -89,7 +103,7 @@ async def from_wikidataid(self, wikidataid):
:param wikidataid: given WikiData ID number
:return: obtained IDs
"""
- args = f'{self.codes["wikidataid"]}/{wikidataid}'
+ args = f"{self.codes['wikidataid']}/{wikidataid}"
return await self.call_service(args)
async def from_chebiid(self, chebiid):
@@ -99,7 +113,7 @@ async def from_chebiid(self, chebiid):
:param chebiid: given ChEBI ID number
:return: obtained IDs
"""
- args = f'{self.codes["chebiid"]}/{chebiid}'
+ args = f"{self.codes['chebiid']}/{chebiid}"
return await self.call_service(args)
async def from_keggid(self, keggid):
@@ -109,11 +123,11 @@ async def from_keggid(self, keggid):
:param keggid: given KEGG ID number
:return: obtained IDs
"""
- args = f'{self.codes["keggid"]}/{keggid}'
+ args = f"{self.codes['keggid']}/{keggid}"
return await self.call_service(args)
async def call_service(self, args):
- response = await self.query_the_service('BridgeDb', args)
+ response = await self.query_the_service("BridgeDb", args)
if response:
return self.parse_attributes(response)
@@ -126,10 +140,10 @@ def parse_attributes(self, response):
"""
result = dict()
- lines = response.split('\n')
+ lines = response.split("\n")
for line in lines:
if line:
- value, identifier = line.split('\t')
+ value, identifier = line.split("\t")
if identifier in self.identifiers.keys():
result[self.identifiers[identifier]] = value
return result
diff --git a/MSMetaEnhancer/libs/converters/web/CIR.py b/MSMetaEnhancer/libs/converters/web/CIR.py
index b898217..414dd02 100644
--- a/MSMetaEnhancer/libs/converters/web/CIR.py
+++ b/MSMetaEnhancer/libs/converters/web/CIR.py
@@ -10,10 +10,11 @@ class CIR(WebConverter):
More info about the available conversions: https://cactus.nci.nih.gov/chemical/structure_documentation
"""
+
def __init__(self, session):
super().__init__(session)
# service URLs
- self.endpoints = {'CIR': 'https://cactus.nci.nih.gov/chemical/structure/'}
+ self.endpoints = {"CIR": "https://cactus.nci.nih.gov/chemical/structure/"}
async def casno_to_smiles(self, cas_number):
"""
@@ -22,10 +23,10 @@ async def casno_to_smiles(self, cas_number):
:param cas_number: given CAS number
:return: obtained SMILES
"""
- args = f'{cas_number}/smiles?resolver=cas_number'
- response = await self.query_the_service('CIR', args)
+ args = f"{cas_number}/smiles?resolver=cas_number"
+ response = await self.query_the_service("CIR", args)
if response:
- return {'smiles': self.retrieve_first(response)}
+ return {"smiles": self.retrieve_first(response)}
async def inchikey_to_smiles(self, inchikey):
"""
@@ -34,10 +35,10 @@ async def inchikey_to_smiles(self, inchikey):
:param inchikey: given InChiKey
:return: obtained SMILES
"""
- args = f'{inchikey}/smiles'
- response = await self.query_the_service('CIR', args)
+ args = f"{inchikey}/smiles"
+ response = await self.query_the_service("CIR", args)
if response:
- return {'smiles': self.retrieve_first(response)}
+ return {"smiles": self.retrieve_first(response)}
async def inchikey_to_inchi(self, inchikey):
"""
@@ -46,10 +47,10 @@ async def inchikey_to_inchi(self, inchikey):
:param inchikey: given InChiKey
:return: obtained InCHi
"""
- args = f'{inchikey}/stdinchi'
- response = await self.query_the_service('CIR', args)
+ args = f"{inchikey}/stdinchi"
+ response = await self.query_the_service("CIR", args)
if response:
- return {'inchi': self.retrieve_first(response)}
+ return {"inchi": self.retrieve_first(response)}
async def inchikey_to_casno(self, inchikey):
"""
@@ -58,10 +59,10 @@ async def inchikey_to_casno(self, inchikey):
:param inchikey: given InChiKey
:return: obtained CAS number
"""
- args = f'{inchikey}/cas'
- response = await self.query_the_service('CIR', args)
+ args = f"{inchikey}/cas"
+ response = await self.query_the_service("CIR", args)
if response:
- return {'casno': self.retrieve_first(response)}
+ return {"casno": self.retrieve_first(response)}
async def inchikey_to_formula(self, inchikey):
"""
@@ -70,10 +71,10 @@ async def inchikey_to_formula(self, inchikey):
:param inchikey: given InChiKey
:return: obtained chemical formula
"""
- args = f'{inchikey}/formula'
- response = await self.query_the_service('CIR', args)
+ args = f"{inchikey}/formula"
+ response = await self.query_the_service("CIR", args)
if response:
- return {'formula': self.retrieve_first(response)}
+ return {"formula": self.retrieve_first(response)}
async def smiles_to_inchikey(self, smiles):
"""
@@ -82,10 +83,10 @@ async def smiles_to_inchikey(self, smiles):
:param smiles: given SMILES
:return: obtained InChiKey
"""
- args = f'{smiles}/stdinchikey'
- response = await self.query_the_service('CIR', args)
+ args = f"{smiles}/stdinchikey"
+ response = await self.query_the_service("CIR", args)
if response:
- return {'inchikey': self.retrieve_first(response)[9:]}
+ return {"inchikey": self.retrieve_first(response)[9:]}
async def inchi_to_smiles(self, inchi):
"""
@@ -94,10 +95,10 @@ async def inchi_to_smiles(self, inchi):
:param inchi: given InChi
:return: obtained SMILES
"""
- args = f'{inchi}/smiles'
- response = await self.query_the_service('CIR', args)
+ args = f"{inchi}/smiles"
+ response = await self.query_the_service("CIR", args)
if response:
- return {'smiles': self.retrieve_first(response)}
+ return {"smiles": self.retrieve_first(response)}
@staticmethod
def retrieve_first(response):
@@ -108,4 +109,4 @@ def retrieve_first(response):
:param response: given response from CIR
:return: only first hit
"""
- return response.split('\n')[0]
+ return response.split("\n")[0]
diff --git a/MSMetaEnhancer/libs/converters/web/CTS.py b/MSMetaEnhancer/libs/converters/web/CTS.py
index a83b167..f691db5 100644
--- a/MSMetaEnhancer/libs/converters/web/CTS.py
+++ b/MSMetaEnhancer/libs/converters/web/CTS.py
@@ -11,17 +11,21 @@ class CTS(WebConverter):
More info about the available conversions: http://cts.fiehnlab.ucdavis.edu/services
"""
+
def __init__(self, session):
super().__init__(session)
# service URLs
- self.endpoints = {'CTS': 'https://cts.fiehnlab.ucdavis.edu/rest/convert/',
- 'CTS_compound': 'http://cts.fiehnlab.ucdavis.edu/service/compound/'
- }
+ self.endpoints = {
+ "CTS": "https://cts.fiehnlab.ucdavis.edu/rest/convert/",
+ "CTS_compound": "http://cts.fiehnlab.ucdavis.edu/service/compound/",
+ }
# generate top level methods defining allowed conversions
- conversions = [('inchikey', 'inchi', 'from_inchikey'),
- ('inchikey', 'compound_name', 'from_inchikey'),
- ('inchikey', 'iupac_name', 'from_inchikey')]
+ conversions = [
+ ("inchikey", "inchi", "from_inchikey"),
+ ("inchikey", "compound_name", "from_inchikey"),
+ ("inchikey", "iupac_name", "from_inchikey"),
+ ]
self.create_top_level_conversion_methods(conversions)
# top level methods defining allowed conversions
@@ -33,10 +37,10 @@ async def hmdbid_to_inchi(self, hmdbid):
:param hmdbid: given HMDB ID
:return: obtained InChi
"""
- args = f'Human%20Metabolome%20Database/InChI%20Code/{hmdbid}'
- response = await self.query_the_service('CTS', args)
+ args = f"Human%20Metabolome%20Database/InChI%20Code/{hmdbid}"
+ response = await self.query_the_service("CTS", args)
if response:
- return self.parse_single_response(response, 'inchi')
+ return self.parse_single_response(response, "inchi")
async def casno_to_inchi(self, cas_number):
"""
@@ -45,10 +49,10 @@ async def casno_to_inchi(self, cas_number):
:param cas_number: given CAS number
:return: obtained InChi
"""
- args = f'CAS/InChI%20Code/{cas_number}'
- response = await self.query_the_service('CTS', args)
+ args = f"CAS/InChI%20Code/{cas_number}"
+ response = await self.query_the_service("CTS", args)
if response:
- return self.parse_single_response(response, 'inchi')
+ return self.parse_single_response(response, "inchi")
async def casno_to_inchikey(self, cas_number):
"""
@@ -59,10 +63,10 @@ async def casno_to_inchikey(self, cas_number):
:param cas_number: given CAS number
:return: obtained InChiKey
"""
- args = f'CAS/InChIKey/{cas_number}'
- response = await self.query_the_service('CTS', args)
+ args = f"CAS/InChIKey/{cas_number}"
+ response = await self.query_the_service("CTS", args)
if response:
- return self.parse_single_response(response, 'inchikey')
+ return self.parse_single_response(response, "inchikey")
async def compound_name_to_inchikey(self, name):
"""
@@ -71,10 +75,10 @@ async def compound_name_to_inchikey(self, name):
:param name: given Chemical name
:return: obtained InChiKey
"""
- args = f'Chemical%20Name/InChIKey/{name}'
- response = await self.query_the_service('CTS', args)
+ args = f"Chemical%20Name/InChIKey/{name}"
+ response = await self.query_the_service("CTS", args)
if response:
- return self.parse_single_response(response, 'inchikey')
+ return self.parse_single_response(response, "inchikey")
async def from_inchikey(self, inchikey):
"""
@@ -84,7 +88,7 @@ async def from_inchikey(self, inchikey):
:return: all found data
"""
args = inchikey
- response = await self.query_the_service('CTS_compound', args)
+ response = await self.query_the_service("CTS_compound", args)
if response:
return self.parse_attributes(response)
@@ -97,8 +101,8 @@ def parse_single_response(self, response, attribute):
:return: parsed InChiKey
"""
response_json = json.loads(response)
- if len(response_json[0]['results']) != 0:
- return {attribute: response_json[0]['results'][0]}
+ if len(response_json[0]["results"]) != 0:
+ return {attribute: response_json[0]["results"][0]}
def parse_attributes(self, response):
"""
@@ -110,20 +114,24 @@ def parse_attributes(self, response):
response_json = json.loads(response)
result = dict()
- if 'inchicode' in response_json:
- result['inchi'] = response_json['inchicode']
+ if "inchicode" in response_json:
+ result["inchi"] = response_json["inchicode"]
- if 'formula' in response_json:
- result['formula'] = response_json['formula']
+ if "formula" in response_json:
+ result["formula"] = response_json["formula"]
- if 'synonyms' in response_json:
- synonyms = response_json['synonyms']
+ if "synonyms" in response_json:
+ synonyms = response_json["synonyms"]
- names = [item['name'] for item in synonyms if item['type'] == 'Synonym']
+ names = [item["name"] for item in synonyms if item["type"] == "Synonym"]
if names:
- result['compound_name'] = names[0]
+ result["compound_name"] = names[0]
- names = [item['name'] for item in synonyms if item['type'] == 'IUPAC Name (Preferred)']
+ names = [
+ item["name"]
+ for item in synonyms
+ if item["type"] == "IUPAC Name (Preferred)"
+ ]
if names:
- result['iupac_name'] = names[0]
+ result["iupac_name"] = names[0]
return result
diff --git a/MSMetaEnhancer/libs/converters/web/IDSM.py b/MSMetaEnhancer/libs/converters/web/IDSM.py
index 950608b..90d471d 100644
--- a/MSMetaEnhancer/libs/converters/web/IDSM.py
+++ b/MSMetaEnhancer/libs/converters/web/IDSM.py
@@ -14,31 +14,36 @@ class IDSM(WebConverter):
IDSM service: https://idsm.elixir-czech.cz/
"""
+
def __init__(self, session):
super().__init__(session)
# service URLs
- self.endpoints = {'IDSM': 'https://idsm.elixir-czech.cz/sparql/endpoint/idsm'}
+ self.endpoints = {"IDSM": "https://idsm.elixir-czech.cz/sparql/endpoint/idsm"}
self.header = frozendict({"Accept": "application/sparql-results+json"})
- self.attributes = [{'code': 'inchi', 'label': 'CHEMINF_000396'},
- {'code': 'iupac_name', 'label': 'CHEMINF_000382'},
- {'code': 'inchikey', 'label': 'CHEMINF_000399'},
- {'code': 'formula', 'label': 'CHEMINF_000335'},
- {'code': 'canonical_smiles', 'label': 'CHEMINF_000376'},
- {'code': 'isomeric_smiles', 'label': 'CHEMINF_000379'}]
+ self.attributes = [
+ {"code": "inchi", "label": "CHEMINF_000396"},
+ {"code": "iupac_name", "label": "CHEMINF_000382"},
+ {"code": "inchikey", "label": "CHEMINF_000399"},
+ {"code": "formula", "label": "CHEMINF_000335"},
+ {"code": "canonical_smiles", "label": "CHEMINF_000376"},
+ {"code": "isomeric_smiles", "label": "CHEMINF_000379"},
+ ]
# generate top level methods defining allowed conversions
- conversions = [('compound_name', 'inchi', 'from_name'),
- ('compound_name', 'iupac_name', 'from_name'),
- ('compound_name', 'inchikey', 'from_name'),
- ('compound_name', 'formula', 'from_name'),
- ('compound_name', 'canonical_smiles', 'from_name'),
- ('compound_name', 'isomeric_smiles', 'from_name'),
- ('inchi', 'iupac_name', 'from_inchi'),
- ('inchi', 'inchikey', 'from_inchi'),
- ('inchi', 'formula', 'from_inchi'),
- ('inchi', 'canonical_smiles', 'from_inchi'),
- ('inchi', 'isomeric_smiles', 'from_inchi')]
+ conversions = [
+ ("compound_name", "inchi", "from_name"),
+ ("compound_name", "iupac_name", "from_name"),
+ ("compound_name", "inchikey", "from_name"),
+ ("compound_name", "formula", "from_name"),
+ ("compound_name", "canonical_smiles", "from_name"),
+ ("compound_name", "isomeric_smiles", "from_name"),
+ ("inchi", "iupac_name", "from_inchi"),
+ ("inchi", "inchikey", "from_inchi"),
+ ("inchi", "formula", "from_inchi"),
+ ("inchi", "canonical_smiles", "from_inchi"),
+ ("inchi", "isomeric_smiles", "from_inchi"),
+ ]
self.create_top_level_conversion_methods(conversions)
# used to limit the maximal number of simultaneous requests being processed
@@ -137,7 +142,9 @@ async def call_service(self, query):
"""
data = frozendict({"query": query})
async with self.semaphore:
- response = await self.query_the_service('IDSM', '', method='POST', data=data, headers=self.header)
+ response = await self.query_the_service(
+ "IDSM", "", method="POST", data=data, headers=self.header
+ )
if response:
return self.parse_attributes(response)
@@ -153,10 +160,10 @@ def parse_attributes(self, response):
response_json = eval(response)
result = dict()
- for prop in response_json['results']['bindings']:
- identifier = prop['type']['value'].rsplit('/', 1)[-1]
- value = prop['value']['value']
+ for prop in response_json["results"]["bindings"]:
+ identifier = prop["type"]["value"].rsplit("/", 1)[-1]
+ value = prop["value"]["value"]
for att in self.attributes:
- if identifier == att['label']:
- result[att['code']] = value
+ if identifier == att["label"]:
+ result[att["code"]] = value
return result
diff --git a/MSMetaEnhancer/libs/converters/web/PubChem.py b/MSMetaEnhancer/libs/converters/web/PubChem.py
index 52371ff..9af2016 100644
--- a/MSMetaEnhancer/libs/converters/web/PubChem.py
+++ b/MSMetaEnhancer/libs/converters/web/PubChem.py
@@ -14,44 +14,49 @@ class PubChem(WebConverter):
PubChem service: https://pubchem.ncbi.nlm.nih.gov/
"""
+
def __init__(self, session):
super().__init__(session)
# service URLs
- self.endpoints = {'PubChem': 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/'}
-
- self.attributes = [{'code': 'inchi', 'label': 'InChI', 'extra': None},
- {'code': 'inchikey', 'label': 'InChIKey', 'extra': None},
- {'code': 'iupac_name', 'label': 'IUPAC Name', 'extra': 'Preferred'},
- {'code': 'formula', 'label': 'Molecular Formula', 'extra': None},
- {'code': 'canonical_smiles', 'label': 'SMILES', 'extra': 'Canonical'},
- {'code': 'isomeric_smiles', 'label': 'SMILES', 'extra': 'Isomeric'}]
+ self.endpoints = {
+ "PubChem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/"
+ }
+
+ self.attributes = [
+ {"code": "inchi", "label": "InChI", "extra": None},
+ {"code": "inchikey", "label": "InChIKey", "extra": None},
+ {"code": "iupac_name", "label": "IUPAC Name", "extra": "Preferred"},
+ {"code": "formula", "label": "Molecular Formula", "extra": None},
+ {"code": "canonical_smiles", "label": "SMILES", "extra": "Canonical"},
+ {"code": "isomeric_smiles", "label": "SMILES", "extra": "Isomeric"},
+ ]
# generate top level methods defining allowed conversions
conversions = [
- ('compound_name', 'inchi', 'from_name'),
- ('compound_name', 'inchikey', 'from_name'),
- ('compound_name', 'iupac_name', 'from_name'),
- ('compound_name', 'formula', 'from_name'),
- ('compound_name', 'canonical_smiles', 'from_name'),
- ('compound_name', 'isomeric_smiles', 'from_name'),
- ('inchi', 'inchikey', 'from_inchi'),
- ('inchi', 'iupac_name', 'from_inchi'),
- ('inchi', 'formula', 'from_inchi'),
- ('inchi', 'canonical_smiles', 'from_inchi'),
- ('inchi', 'isomeric_smiles', 'from_inchi'),
- ('inchi', 'pubchemid', 'from_inchi'),
- ('inchikey', 'inchi', 'from_inchikey'),
- ('inchikey', 'iupac_name', 'from_inchikey'),
- ('inchikey', 'formula', 'from_inchikey'),
- ('inchikey', 'canonical_smiles', 'from_inchikey'),
- ('inchikey', 'isomeric_smiles', 'from_inchikey'),
- ('inchikey', 'pubchemid', 'from_inchikey'),
- ('pubchemid', 'inchi', 'from_pubchemid'),
- ('pubchemid', 'iupac_name', 'from_pubchemid'),
- ('pubchemid', 'formula', 'from_pubchemid'),
- ('pubchemid', 'canonical_smiles', 'from_pubchemid'),
- ('pubchemid', 'isomeric_smiles', 'from_pubchemid'),
- ('pubchemid', 'inchikey', 'from_pubchemid')
+ ("compound_name", "inchi", "from_name"),
+ ("compound_name", "inchikey", "from_name"),
+ ("compound_name", "iupac_name", "from_name"),
+ ("compound_name", "formula", "from_name"),
+ ("compound_name", "canonical_smiles", "from_name"),
+ ("compound_name", "isomeric_smiles", "from_name"),
+ ("inchi", "inchikey", "from_inchi"),
+ ("inchi", "iupac_name", "from_inchi"),
+ ("inchi", "formula", "from_inchi"),
+ ("inchi", "canonical_smiles", "from_inchi"),
+ ("inchi", "isomeric_smiles", "from_inchi"),
+ ("inchi", "pubchemid", "from_inchi"),
+ ("inchikey", "inchi", "from_inchikey"),
+ ("inchikey", "iupac_name", "from_inchikey"),
+ ("inchikey", "formula", "from_inchikey"),
+ ("inchikey", "canonical_smiles", "from_inchikey"),
+ ("inchikey", "isomeric_smiles", "from_inchikey"),
+ ("inchikey", "pubchemid", "from_inchikey"),
+ ("pubchemid", "inchi", "from_pubchemid"),
+ ("pubchemid", "iupac_name", "from_pubchemid"),
+ ("pubchemid", "formula", "from_pubchemid"),
+ ("pubchemid", "canonical_smiles", "from_pubchemid"),
+ ("pubchemid", "isomeric_smiles", "from_pubchemid"),
+ ("pubchemid", "inchikey", "from_pubchemid"),
]
self.create_top_level_conversion_methods(conversions)
@@ -65,16 +70,16 @@ async def pubchemid_to_hmdbid(self, pubchemid):
:param pubchemid: given Chemical name
:return: all found data
"""
- args = f'cid/{pubchemid}/xrefs/RegistryID/JSON'
+ args = f"cid/{pubchemid}/xrefs/RegistryID/JSON"
async with self.throttler:
- response = await self.query_the_service('PubChem', args)
+ response = await self.query_the_service("PubChem", args)
response_json = json.loads(response)
- registry_ids = response_json['InformationList']['Information'][0]['RegistryID']
- hmdbids = [item for item in registry_ids if item.startswith('HMDB')]
+ registry_ids = response_json["InformationList"]["Information"][0]["RegistryID"]
+ hmdbids = [item for item in registry_ids if item.startswith("HMDB")]
if len(hmdbids) != 0:
- return {'hmdbid': hmdbids[0]}
+ return {"hmdbid": hmdbids[0]}
return dict()
async def from_pubchemid(self, pubchemid):
@@ -85,8 +90,8 @@ async def from_pubchemid(self, pubchemid):
:param pubchemid: given Chemical name
:return: all found data
"""
- args = f'cid/{pubchemid}/JSON'
- return await self.call_service(args, 'GET', None)
+ args = f"cid/{pubchemid}/JSON"
+ return await self.call_service(args, "GET", None)
async def from_name(self, name):
"""
@@ -96,8 +101,8 @@ async def from_name(self, name):
:param name: given Chemical name
:return: all found data
"""
- args = f'name/{name}/JSON'
- return await self.call_service(args, 'GET', None)
+ args = f"name/{name}/JSON"
+ return await self.call_service(args, "GET", None)
async def from_inchi(self, inchi):
"""
@@ -108,7 +113,7 @@ async def from_inchi(self, inchi):
:return: all found data
"""
args = "inchi/JSON"
- return await self.call_service(args, 'POST', frozendict({'inchi': inchi}))
+ return await self.call_service(args, "POST", frozendict({"inchi": inchi}))
async def from_inchikey(self, inchikey):
"""
@@ -119,7 +124,7 @@ async def from_inchikey(self, inchikey):
:return: all found data
"""
args = "inchikey/JSON"
- return await self.call_service(args, 'POST', frozendict({'inchikey': inchikey}))
+ return await self.call_service(args, "POST", frozendict({"inchikey": inchikey}))
async def call_service(self, args, method, data):
"""
@@ -136,7 +141,9 @@ async def call_service(self, args, method, data):
:return: obtained attributes
"""
async with self.throttler:
- response = await self.query_the_service('PubChem', args, method=method, data=data)
+ response = await self.query_the_service(
+ "PubChem", args, method=method, data=data
+ )
if response:
return self.parse_attributes(response)
@@ -150,14 +157,18 @@ async def process_request(self, response, url, method):
:return: processed response
"""
result = await response.text()
- if 'X-Throttling-Control' in response.headers:
- sleep_time = self.adjust_throttling(response.headers['X-Throttling-Control'])
+ if "X-Throttling-Control" in response.headers:
+ sleep_time = self.adjust_throttling(
+ response.headers["X-Throttling-Control"]
+ )
if sleep_time:
await asyncio.sleep(sleep_time)
if response.ok:
return result
else:
- raise UnknownResponse(f'Unknown response {response.status}:{result} for {method} request on {url}.')
+ raise UnknownResponse(
+ f"Unknown response {response.status}:{result} for {method} request on {url}."
+ )
def adjust_throttling(self, throttling_header):
"""
@@ -167,9 +178,10 @@ def adjust_throttling(self, throttling_header):
:param throttling_header: header containing current service load info
"""
+
def parse_status(part):
- value = part.split(': ')[1]
- return int(value.split(' (')[1][:-2])
+ value = part.split(": ")[1]
+ return int(value.split(" (")[1][:-2])
def parse_pubchem_info(header):
"""
@@ -181,24 +193,26 @@ def parse_pubchem_info(header):
:param header: given PubChem header with Throttling info
:return: most critical indicator value (maximum of three) with possible complete blacklist indicator
"""
- indicators = header.split(',')
+ indicators = header.split(",")
blocked = False
sleep_time = 0
- if 'too many requests per second or blacklisted' in indicators[-1]:
+ if "too many requests per second or blacklisted" in indicators[-1]:
blocked = True
- if 'Remaining blocking time' in indicators[-1]:
- sleep_time = string_to_seconds(indicators[-1].split(': ')[1])
+ if "Remaining blocking time" in indicators[-1]:
+ sleep_time = string_to_seconds(indicators[-1].split(": ")[1])
blocked = True
- return {'load': max([parse_status(indicator) for indicator in indicators[:3]]),
- 'blocked': blocked,
- 'sleep_time': sleep_time}
+ return {
+ "load": max([parse_status(indicator) for indicator in indicators[:3]]),
+ "blocked": blocked,
+ "sleep_time": sleep_time,
+ }
status = parse_pubchem_info(throttling_header)
- if status['blocked'] or status['load'] > 75:
+ if status["blocked"] or status["load"] > 75:
self.throttler.decrease_limit()
- elif status['load'] < 25:
+ elif status["load"] < 25:
self.throttler.increase_limit()
- return status['sleep_time']
+ return status["sleep_time"]
def parse_attributes(self, response):
"""
@@ -212,21 +226,21 @@ def parse_attributes(self, response):
response_json = json.loads(response)
result = dict()
- if 'PC_Compounds' in response_json:
- if len(response_json['PC_Compounds']) > 0:
- first_hit = response_json['PC_Compounds'][0]
+ if "PC_Compounds" in response_json:
+ if len(response_json["PC_Compounds"]) > 0:
+ first_hit = response_json["PC_Compounds"][0]
- pubchemid = first_hit.get('id', {}).get('id', {}).get('cid', None)
+ pubchemid = first_hit.get("id", {}).get("id", {}).get("cid", None)
if pubchemid:
- result['pubchemid'] = pubchemid
+ result["pubchemid"] = pubchemid
- for prop in first_hit.get('props', {}):
- label = prop['urn']['label']
+ for prop in first_hit.get("props", {}):
+ label = prop["urn"]["label"]
for att in self.attributes:
- if label == att['label']:
- if att['extra']:
- if prop['urn']['name'] == att['extra']:
- result[att['code']] = prop['value']['sval']
+ if label == att["label"]:
+ if att["extra"]:
+ if prop["urn"]["name"] == att["extra"]:
+ result[att["code"]] = prop["value"]["sval"]
else:
- result[att['code']] = prop['value']['sval']
+ result[att["code"]] = prop["value"]["sval"]
return result
diff --git a/MSMetaEnhancer/libs/converters/web/WebConverter.py b/MSMetaEnhancer/libs/converters/web/WebConverter.py
index d5736e2..5e04eb0 100644
--- a/MSMetaEnhancer/libs/converters/web/WebConverter.py
+++ b/MSMetaEnhancer/libs/converters/web/WebConverter.py
@@ -8,13 +8,18 @@
from aiocircuitbreaker import circuit
from MSMetaEnhancer.libs.Converter import Converter
-from MSMetaEnhancer.libs.utils.Errors import ServiceNotAvailable, UnknownResponse, TargetAttributeNotRetrieved
+from MSMetaEnhancer.libs.utils.Errors import (
+ ServiceNotAvailable,
+ UnknownResponse,
+ TargetAttributeNotRetrieved,
+)
class WebConverter(Converter):
"""
General class for web conversions.
"""
+
FAILURE_THRESHOLD: int = 10
"""Number of consecutive failures before circuit breaker is opened."""
@@ -42,14 +47,16 @@ async def convert(self, source: str, target: str, data: Union[str, int, float]):
Returns:
_type_: Data retrieved from the service.
"""
- result = await getattr(self, f'{source}_to_{target}')(data)
+ result = await getattr(self, f"{source}_to_{target}")(data)
if result:
return result
else:
- raise TargetAttributeNotRetrieved('No data retrieved.')
+ raise TargetAttributeNotRetrieved("No data retrieved.")
@lru_cache
- async def query_the_service(self, service: str, args: str, method: str = 'GET', data=None, headers=None) -> str:
+ async def query_the_service(
+ self, service: str, args: str, method: str = "GET", data=None, headers=None
+ ) -> str:
"""
Make get request to given converter with arguments.
Raises ConnectionError if converter is not available.
@@ -62,14 +69,20 @@ async def query_the_service(self, service: str, args: str, method: str = 'GET',
:return: obtained response
"""
try:
- result = await self.loop_request(self.endpoints[service] + args, method, data, headers)
+ result = await self.loop_request(
+ self.endpoints[service] + args, method, data, headers
+ )
return result
except TypeError:
- raise TypeError(f'Incorrect argument {args} for converter {service}.')
-
- @circuit(failure_threshold=FAILURE_THRESHOLD,
- expected_exception=Union[TimeoutError, ServerDisconnectedError, ClientConnectorError].__args__,
- fallback_function=ServiceNotAvailable.raise_circuitbreaker)
+ raise TypeError(f"Incorrect argument {args} for converter {service}.")
+
+ @circuit(
+ failure_threshold=FAILURE_THRESHOLD,
+ expected_exception=Union[
+ TimeoutError, ServerDisconnectedError, ClientConnectorError
+ ].__args__,
+ fallback_function=ServiceNotAvailable.raise_circuitbreaker,
+ )
async def make_request(self, url, method, data, headers):
"""
Enter a circuit breaker loop and execute request with type depending on specified method.
@@ -82,7 +95,7 @@ async def make_request(self, url, method, data, headers):
"""
if headers is None:
headers = {}
- if method == 'GET':
+ if method == "GET":
async with self.session.get(url, headers=headers) as response:
return await self.process_request(response, url, method)
else:
@@ -90,7 +103,9 @@ async def make_request(self, url, method, data, headers):
async with self.session.post(url, data=data, headers=headers) as response:
return await self.process_request(response, url, method)
- async def loop_request(self, url: str, method: str, data: Any, headers: dict) -> str:
+ async def loop_request(
+ self, url: str, method: str, data: Any, headers: dict
+ ) -> str:
"""
Execute request in a circuit breaker loop. If the request fails multiple times in a row,
the circuit breaker is opened and ServiceNotAvailable exception is raised.
@@ -106,7 +121,9 @@ async def loop_request(self, url: str, method: str, data: Any, headers: dict) ->
except (ServerDisconnectedError, ClientConnectorError, TimeoutError):
return await self.loop_request(url, method, data, headers)
- async def process_request(self, response: aiohttp.ClientResponse, url: str, method: str) -> str:
+ async def process_request(
+ self, response: aiohttp.ClientResponse, url: str, method: str
+ ) -> str:
"""
Method to wrap response handling (same for POST and GET requests).
@@ -119,4 +136,6 @@ async def process_request(self, response: aiohttp.ClientResponse, url: str, meth
if response.ok:
return result
else:
- raise UnknownResponse(f'Unknown response {response.status}:{result} for {method} request on {url}.')
+ raise UnknownResponse(
+ f"Unknown response {response.status}:{result} for {method} request on {url}."
+ )
diff --git a/MSMetaEnhancer/libs/converters/web/__init__.py b/MSMetaEnhancer/libs/converters/web/__init__.py
index 30ea43a..76e1e31 100644
--- a/MSMetaEnhancer/libs/converters/web/__init__.py
+++ b/MSMetaEnhancer/libs/converters/web/__init__.py
@@ -4,4 +4,4 @@
from MSMetaEnhancer.libs.converters.web.PubChem import PubChem
from MSMetaEnhancer.libs.converters.web.BridgeDb import BridgeDb
-__all__ = ['IDSM', 'CTS', 'CIR', 'PubChem', 'BridgeDb']
+__all__ = ["IDSM", "CTS", "CIR", "PubChem", "BridgeDb"]
diff --git a/MSMetaEnhancer/libs/data/Data.py b/MSMetaEnhancer/libs/data/Data.py
index eff3671..8983287 100644
--- a/MSMetaEnhancer/libs/data/Data.py
+++ b/MSMetaEnhancer/libs/data/Data.py
@@ -6,6 +6,7 @@ class Data(ABC):
"""
General class for data.
"""
+
@abstractmethod
def get_metadata(self) -> List[Dict]:
"""
diff --git a/MSMetaEnhancer/libs/data/DataFrame.py b/MSMetaEnhancer/libs/data/DataFrame.py
index a6c06aa..998e06f 100644
--- a/MSMetaEnhancer/libs/data/DataFrame.py
+++ b/MSMetaEnhancer/libs/data/DataFrame.py
@@ -2,6 +2,7 @@
from MSMetaEnhancer.libs.data.Data import Data
from MSMetaEnhancer.libs.utils.Errors import UnknownFileFormat
+from MSMetaEnhancer.libs.utils.Generic import is_na_value
class DataFrame(Data):
@@ -17,14 +18,14 @@ def load_data(self, filename: str, file_format: str):
:param filename: given file
:param file_format: format of the input file
"""
- if file_format == 'csv':
+ if file_format == "csv":
self.df = pandas.read_csv(filename, dtype=str)
- elif file_format in ['tsv', 'tabular']:
- self.df = pandas.read_csv(filename, dtype=str, sep='\t')
- elif file_format == 'xlsx':
+ elif file_format in ["tsv", "tabular"]:
+ self.df = pandas.read_csv(filename, dtype=str, sep="\t")
+ elif file_format == "xlsx":
self.df = pandas.read_excel(filename, dtype=str)
else:
- raise UnknownFileFormat(f'Format {file_format} not supported.')
+ raise UnknownFileFormat(f"Format {file_format} not supported.")
def save_data(self, filename: str, file_format: str):
"""
@@ -35,17 +36,21 @@ def save_data(self, filename: str, file_format: str):
:param filename: target file
:param file_format: format of the output file
"""
- if file_format == 'csv':
+ if file_format == "csv":
self.df.to_csv(filename, index=False)
- elif file_format in ['tsv', 'tabular']:
- self.df.to_csv(filename, index=False, sep='\t')
- elif file_format == 'xlsx':
+ elif file_format in ["tsv", "tabular"]:
+ self.df.to_csv(filename, index=False, sep="\t")
+ elif file_format == "xlsx":
self.df.to_excel(filename)
else:
- raise UnknownFileFormat(f'Format {file_format} not supported.')
+ raise UnknownFileFormat(f"Format {file_format} not supported.")
def get_metadata(self):
- return self.df.to_dict('records')
+ records = self.df.to_dict("records")
+ return [
+ {k: v for k, v in record.items() if not is_na_value(v)}
+ for record in records
+ ]
def fuse_metadata(self, metadata_list):
self.df = pandas.DataFrame.from_dict(metadata_list)
diff --git a/MSMetaEnhancer/libs/data/Spectra.py b/MSMetaEnhancer/libs/data/Spectra.py
index 94bf1e4..856ce64 100644
--- a/MSMetaEnhancer/libs/data/Spectra.py
+++ b/MSMetaEnhancer/libs/data/Spectra.py
@@ -5,6 +5,7 @@
from MSMetaEnhancer.libs.data.Data import Data
from MSMetaEnhancer.libs.utils.Errors import UnknownFileFormat
+from MSMetaEnhancer.libs.utils.Generic import is_na_value
class Spectra(Data):
@@ -12,12 +13,18 @@ class Spectra(Data):
Spectra class represents a single spectra dataset as a list.
It is using `matchms` package to load and save MSP files.
"""
+
def __init__(self):
self.spectrums: List[Spectrum] = []
def __eq__(self, other):
if len(self.spectrums) == len(other.spectrums):
- return all([spectra_eq(self.spectrums[i], other.spectrums[i]) for i in range(len(self.spectrums))])
+ return all(
+ [
+ spectra_eq(self.spectrums[i], other.spectrums[i])
+ for i in range(len(self.spectrums))
+ ]
+ )
else:
return False
@@ -30,7 +37,9 @@ def load_data(self, filename: str, file_format: str):
:param filename: given file
:param file_format: format of the input file
"""
- self.spectrums = list(getattr(matchms.importing, f'load_from_{file_format}')(filename))
+ self.spectrums = list(
+ getattr(matchms.importing, f"load_from_{file_format}")(filename)
+ )
def save_data(self, filename: str, file_format: str):
"""
@@ -43,12 +52,17 @@ def save_data(self, filename: str, file_format: str):
:param file_format: format of the output file
"""
try:
- getattr(matchms.exporting, f'save_as_{file_format}')(self.spectrums, filename)
+ getattr(matchms.exporting, f"save_as_{file_format}")(
+ self.spectrums, filename
+ )
except Exception:
- raise UnknownFileFormat(f'Format {file_format} not supported.')
+ raise UnknownFileFormat(f"Format {file_format} not supported.")
def get_metadata(self):
- return [spectra.metadata for spectra in self.spectrums]
+ return [
+ {k: v for k, v in spectra.metadata.items() if not is_na_value(v)}
+ for spectra in self.spectrums
+ ]
def fuse_metadata(self, metadata):
for i in range(len(metadata)):
@@ -63,4 +77,8 @@ def spectra_eq(first: Spectrum, second: Spectrum):
:param first: spectra object
:param second: spectra object
"""
- return first.peaks == second.peaks and first.losses == second.losses and first.metadata == second.metadata
+ return (
+ first.peaks == second.peaks
+ and first.losses == second.losses
+ and first.metadata == second.metadata
+ )
diff --git a/MSMetaEnhancer/libs/data/__init__.py b/MSMetaEnhancer/libs/data/__init__.py
index 185a037..7dc89b8 100644
--- a/MSMetaEnhancer/libs/data/__init__.py
+++ b/MSMetaEnhancer/libs/data/__init__.py
@@ -1,4 +1,4 @@
from MSMetaEnhancer.libs.data.Spectra import Spectra
from MSMetaEnhancer.libs.data.DataFrame import DataFrame
-__all__ = ['Spectra', 'DataFrame']
+__all__ = ["Spectra", "DataFrame"]
diff --git a/MSMetaEnhancer/libs/utils/ConverterBuilder.py b/MSMetaEnhancer/libs/utils/ConverterBuilder.py
index 9f28d5f..9871ed2 100644
--- a/MSMetaEnhancer/libs/utils/ConverterBuilder.py
+++ b/MSMetaEnhancer/libs/utils/ConverterBuilder.py
@@ -1,9 +1,16 @@
-from MSMetaEnhancer.libs.converters.web import __all__ as web_converters
-from MSMetaEnhancer.libs.converters.compute import __all__ as compute_converters
+from MSMetaEnhancer.libs.converters.web.WebConverter import WebConverter
+from MSMetaEnhancer.libs.converters.compute.ComputeConverter import ComputeConverter
from MSMetaEnhancer.libs.utils.Errors import UnknownConverter
class ConverterBuilder:
+ converters: dict[str, type] = {}
+
+ @staticmethod
+ def register(converters: list[type]):
+ for converter in converters:
+ ConverterBuilder.converters[converter.__name__] = converter
+
@staticmethod
def validate_converters(converters):
"""
@@ -13,14 +20,11 @@ def validate_converters(converters):
:param converters: given list of converters names
"""
for converter in converters:
- try:
- eval(converter)
-
- except NameError:
- raise UnknownConverter(f'Converter {converter} unknown.')
+ if ConverterBuilder.converters.get(converter) is None:
+ raise UnknownConverter(f"Converter {converter} unknown.")
@staticmethod
- def build_converters(session, converters: list):
+ def build_converters(session, converters: list[str]):
"""
Create provided converters.
@@ -28,11 +32,12 @@ def build_converters(session, converters: list):
:param converters: list of converters to be built
:return: built converters
"""
- built_web_converters, built_converters = {}, {}
+ web_converters, compute_converters = {}, {}
for converter in converters:
- if converter in web_converters:
- built_web_converters[converter] = eval(converter)(session)
- elif converter in compute_converters:
- built_converters[converter] = eval(converter)()
- built_converters.update(built_web_converters)
- return built_converters, built_web_converters
+ if issubclass(ConverterBuilder.converters[converter], WebConverter):
+ web_converters[converter] = ConverterBuilder.converters[converter](
+ session
+ )
+ elif issubclass(ConverterBuilder.converters[converter], ComputeConverter):
+ compute_converters[converter] = ConverterBuilder.converters[converter]()
+ return compute_converters, web_converters
diff --git a/MSMetaEnhancer/libs/utils/Errors.py b/MSMetaEnhancer/libs/utils/Errors.py
index ba18d79..57672fe 100644
--- a/MSMetaEnhancer/libs/utils/Errors.py
+++ b/MSMetaEnhancer/libs/utils/Errors.py
@@ -12,6 +12,7 @@ class UnknownConverter(Exception):
class UnknownFileFormat(Exception):
"""Format not supported."""
+
pass
@@ -23,7 +24,7 @@ class ServiceNotAvailable(Exception):
@staticmethod
async def raise_circuitbreaker(*args):
converter_name = args[0].converter_name
- raise ServiceNotAvailable(f'Service {converter_name} not available.')
+ raise ServiceNotAvailable(f"Service {converter_name} not available.")
class UnknownResponse(Exception):
diff --git a/MSMetaEnhancer/libs/utils/Generic.py b/MSMetaEnhancer/libs/utils/Generic.py
index f59e4bf..0b177bf 100644
--- a/MSMetaEnhancer/libs/utils/Generic.py
+++ b/MSMetaEnhancer/libs/utils/Generic.py
@@ -1,6 +1,24 @@
+import math
+
+
+NA_STRING_VALUES = {"na", "n/a", "nan", "none", ""}
+
+
+def is_na_value(value) -> bool:
+ """Check if a value should be treated as NA/missing (e.g. empty, None, NaN, 'NA')."""
+ if value is None:
+ return True
+ if isinstance(value, float) and math.isnan(value):
+ return True
+ if isinstance(value, str) and value.strip().lower() in NA_STRING_VALUES:
+ return True
+ return False
+
+
def escape_single_quotes(f):
async def wrapper(self, arg):
return await f(self, arg.replace("'", "\\'"))
+
return wrapper
diff --git a/MSMetaEnhancer/libs/utils/Job.py b/MSMetaEnhancer/libs/utils/Job.py
index fc96b5d..3838b87 100644
--- a/MSMetaEnhancer/libs/utils/Job.py
+++ b/MSMetaEnhancer/libs/utils/Job.py
@@ -2,8 +2,10 @@
from matchms import Metadata
from MSMetaEnhancer.libs.Converter import Converter
-from MSMetaEnhancer.libs.utils.Errors import (ConversionNotSupported,
- SourceAttributeNotAvailable)
+from MSMetaEnhancer.libs.utils.Errors import (
+ ConversionNotSupported,
+ SourceAttributeNotAvailable,
+)
class Job:
@@ -11,10 +13,10 @@ def __init__(self, data: Tuple[str, str, str]):
self.source, self.target, self.converter = data
def __str__(self):
- return f'{self.converter}: {self.source} -> {self.target}'
+ return f"{self.converter}: {self.source} -> {self.target}"
def __repr__(self):
- return f'Job(({self.source}, {self.target}, {self.converter}))'
+ return f"Job(({self.source}, {self.target}, {self.converter}))"
def validate(self, converters: dict, metadata: Metadata) -> Tuple[Converter, Any]:
"""
@@ -28,10 +30,14 @@ def validate(self, converters: dict, metadata: Metadata) -> Tuple[Converter, Any
data = metadata.get(self.source, None)
if converter is None:
- raise ConversionNotSupported(f'Conversion ({self.converter}) {self.source} -> {self.target}: '
- f'is not supported')
+ raise ConversionNotSupported(
+ f"Conversion ({self.converter}) {self.source} -> {self.target}: "
+ f"is not supported"
+ )
elif data is None:
- raise SourceAttributeNotAvailable(f'{self}:\n Attribute {self.source} missing in given metadata.')
+ raise SourceAttributeNotAvailable(
+ f"{self}:\n Attribute {self.source} missing in given metadata."
+ )
else:
return converter, data
diff --git a/MSMetaEnhancer/libs/utils/LogRecord.py b/MSMetaEnhancer/libs/utils/LogRecord.py
index dcecd55..1808e9a 100644
--- a/MSMetaEnhancer/libs/utils/LogRecord.py
+++ b/MSMetaEnhancer/libs/utils/LogRecord.py
@@ -15,14 +15,14 @@ def format_log(self, level: str) -> str:
Returns:
str: Formatted log message
"""
- message = f'Issues related to metadata:\n\n{self.metadata}\n\n'
- filtered_logs = [log['msg'] for log in self.logs if level >= log['level']]
+ message = f"Issues related to metadata:\n\n{self.metadata}\n\n"
+ filtered_logs = [log["msg"] for log in self.logs if level >= log["level"]]
if filtered_logs:
for log in filtered_logs:
- message += f'{log}\n'
+ message += f"{log}\n"
else:
return None
- return f'{message}\n'
+ return f"{message}\n"
def update(self, exc: Exception, job: Job, level: str):
"""
@@ -32,4 +32,6 @@ def update(self, exc: Exception, job: Job, level: str):
:param job: related job
:param level: log level
"""
- self.logs.append({'level': level, 'msg': f'-> {type(exc).__name__} - {job}:\n{exc}'})
+ self.logs.append(
+ {"level": level, "msg": f"-> {type(exc).__name__} - {job}:\n{exc}"}
+ )
diff --git a/MSMetaEnhancer/libs/utils/Logger.py b/MSMetaEnhancer/libs/utils/Logger.py
index d34004d..00928bb 100644
--- a/MSMetaEnhancer/libs/utils/Logger.py
+++ b/MSMetaEnhancer/libs/utils/Logger.py
@@ -9,13 +9,13 @@
class Logger:
def __init__(self):
- self.logger = logging.getLogger('log')
- self.logger.setLevel('INFO')
+ self.logger = logging.getLogger("log")
+ self.logger.setLevel("INFO")
# statistical values
self.metrics = Metrics()
- self.LEVELS = {'error': 1, 'warning': 2, 'info': 3}
+ self.LEVELS = {"error": 1, "warning": 2, "info": 3}
self.log_level = 3
@@ -36,12 +36,12 @@ def add_filehandler(self, file_name: str = None):
file_name (str, optional): Log filename. Defaults to None.
"""
if file_name is None:
- file_name = datetime.now().strftime('MSMetaEnhancer_%Y%m%d%H%M%S.log')
+ file_name = datetime.now().strftime("MSMetaEnhancer_%Y%m%d%H%M%S.log")
- filehandler_dbg = logging.FileHandler(file_name, mode='w')
- filehandler_dbg.setLevel('DEBUG')
+ filehandler_dbg = logging.FileHandler(file_name, mode="w")
+ filehandler_dbg.setLevel("DEBUG")
- streamformatter = logging.Formatter(fmt='%(levelname)s: %(message)s')
+ streamformatter = logging.Formatter(fmt="%(levelname)s: %(message)s")
# Apply formatters to handlers
filehandler_dbg.setFormatter(streamformatter)
diff --git a/MSMetaEnhancer/libs/utils/Metrics.py b/MSMetaEnhancer/libs/utils/Metrics.py
index e5b4714..45b2bed 100644
--- a/MSMetaEnhancer/libs/utils/Metrics.py
+++ b/MSMetaEnhancer/libs/utils/Metrics.py
@@ -39,10 +39,16 @@ def update_after_annotation(self, metadata_keys):
self.coverage_after_annotation[key] += 1
def __str__(self):
- table = tabulate([[key,
- f'{(self.coverage_before_annotation[key]/self.max_spectra)*100:.2f}%',
- f'{(self.coverage_after_annotation[key]/self.max_spectra)*100:.2f}%']
- for key in self.coverage_before_annotation],
- headers=['Target\nattribute', 'Coverage\nbefore', 'Coverage\nafter'])
-
- return f'\nAttribute discovery rates:\n\n{table}\n' + '='*50 + '\n'
+ table = tabulate(
+ [
+ [
+ key,
+ f"{(self.coverage_before_annotation[key] / self.max_spectra) * 100:.2f}%",
+ f"{(self.coverage_after_annotation[key] / self.max_spectra) * 100:.2f}%",
+ ]
+ for key in self.coverage_before_annotation
+ ],
+ headers=["Target\nattribute", "Coverage\nbefore", "Coverage\nafter"],
+ )
+
+ return f"\nAttribute discovery rates:\n\n{table}\n" + "=" * 50 + "\n"
diff --git a/MSMetaEnhancer/libs/utils/Monitor.py b/MSMetaEnhancer/libs/utils/Monitor.py
index 9431ca8..d4c5ec0 100644
--- a/MSMetaEnhancer/libs/utils/Monitor.py
+++ b/MSMetaEnhancer/libs/utils/Monitor.py
@@ -8,6 +8,7 @@ class Monitor(Thread):
"""
Class to periodically monitor status of used web.
"""
+
def __init__(self):
super(Monitor, self).__init__()
self.converters = dict()
@@ -39,7 +40,11 @@ def check_service(url):
try:
result = requests.get(url, timeout=5)
return result.status_code == 200
- except (requests.exceptions.ConnectionError, TimeoutError, requests.exceptions.ReadTimeout):
+ except (
+ requests.exceptions.ConnectionError,
+ TimeoutError,
+ requests.exceptions.ReadTimeout,
+ ):
return False
def run(self):
diff --git a/MSMetaEnhancer/libs/utils/Throttler.py b/MSMetaEnhancer/libs/utils/Throttler.py
index a611285..3b06bf6 100644
--- a/MSMetaEnhancer/libs/utils/Throttler.py
+++ b/MSMetaEnhancer/libs/utils/Throttler.py
@@ -8,6 +8,7 @@ class Throttler:
"""
Class to limit number of parallel requests by a rate (number per period of time).
"""
+
def __init__(self, rate_limit=10, period=1, retry_interval=0.01):
self.rate = rate_limit
self.rate_limit = rate_limit
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 8014eb3..3c53040 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -13,31 +13,32 @@
import os
import sys
-sys.path.insert(0, os.path.abspath('../../'))
+sys.path.insert(0, os.path.abspath("../../"))
from shutil import copyfile
-copyfile('../../README.md', 'readme.md')
-copyfile('../../CHANGELOG.md', 'CHANGELOG.md')
-copyfile('../../CONTRIBUTING.md', 'CONTRIBUTING.md')
+
+copyfile("../../README.md", "readme.md")
+copyfile("../../CHANGELOG.md", "CHANGELOG.md")
+copyfile("../../CONTRIBUTING.md", "CONTRIBUTING.md")
# -- Project information -----------------------------------------------------
-project = 'MSMetaEnhancer'
-copyright = '2021, RECETOX'
-author = 'RECETOX'
+project = "MSMetaEnhancer"
+copyright = "2021, RECETOX"
+author = "RECETOX"
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
-extensions = ['sphinx.ext.autodoc', 'myst_parser']
+extensions = ["sphinx.ext.autodoc", "myst_parser"]
# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
@@ -49,9 +50,9 @@
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
-html_theme = 'sphinx_rtd_theme'
+html_theme = "sphinx_rtd_theme"
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
diff --git a/galaxy/generate_options.py b/galaxy/generate_options.py
index d9553a9..fe8d2ca 100644
--- a/galaxy/generate_options.py
+++ b/galaxy/generate_options.py
@@ -12,14 +12,18 @@
def generate_options():
jobs = []
converters = web_converters + compute_converters
- built_converters, built_web_converters = ConverterBuilder().build_converters(None, converters)
+ built_converters, built_web_converters = ConverterBuilder().build_converters(
+ None, converters
+ )
for converter in built_converters:
- jobs += (built_converters[converter].get_conversion_functions())
+ jobs += built_converters[converter].get_conversion_functions()
for job in jobs:
- print(f'')
+ print(
+ f''
+ )
-if __name__ == '__main__':
+if __name__ == "__main__":
generate_options()
diff --git a/pyproject.toml b/pyproject.toml
index 12b00bf..51d6d29 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,7 +13,7 @@ packages = [
]
[tool.poetry.dependencies]
-python = ">=3.10,<3.13"
+python = ">=3.10,<3.14"
matchms = ">=0.30.0"
pandas = "^2.2.1"
scipy = "^1.12.0"
diff --git a/tests/__init__.py b/tests/__init__.py
index ac023ca..d4b517c 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,7 +1,7 @@
import asyncio
import sys
-if sys.platform == 'win32':
+if sys.platform == "win32":
# Set the policy to prevent "Event loop is closed" error on Windows - https://github.com/encode/httpx/issues/914
# See https://stackoverflow.com/questions/63860576/asyncio-event-loop-is-closed-when-using-asyncio-run
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
diff --git a/tests/test_BridgeDB.py b/tests/test_BridgeDB.py
index 565ddae..a6c2c69 100644
--- a/tests/test_BridgeDB.py
+++ b/tests/test_BridgeDB.py
@@ -5,25 +5,27 @@
from tests.utils import wrap_with_session
-HMDBID = 'HMDB0000001'
+HMDBID = "HMDB0000001"
@pytest.mark.dependency()
def test_service_available():
- asyncio.run(wrap_with_session(BridgeDb, 'hmdbid_to_pubchemid', ['HMDB0000001']))
+ asyncio.run(wrap_with_session(BridgeDb, "hmdbid_to_pubchemid", ["HMDB0000001"]))
@pytest.mark.dependency(depends=["test_service_available"])
def test_format():
- args = f'Ch/{HMDBID}'
- response = asyncio.run(wrap_with_session(BridgeDb, 'query_the_service', ['BridgeDb', args]))
+ args = f"Ch/{HMDBID}"
+ response = asyncio.run(
+ wrap_with_session(BridgeDb, "query_the_service", ["BridgeDb", args])
+ )
assert isinstance(response, str)
- lines = response.split('\n')
+ lines = response.split("\n")
assert len(lines) != 0
- assert '\t' in response
+ assert "\t" in response
def test_get_conversions():
jobs = BridgeDb(None).get_conversion_functions()
- assert ('wikidataid', 'pubchemid', 'BridgeDb') in jobs
+ assert ("wikidataid", "pubchemid", "BridgeDb") in jobs
diff --git a/tests/test_CIR.py b/tests/test_CIR.py
index 564769d..63138bc 100644
--- a/tests/test_CIR.py
+++ b/tests/test_CIR.py
@@ -7,14 +7,14 @@
@pytest.mark.dependency()
def test_service_available():
- asyncio.run(wrap_with_session(CIR, 'casno_to_smiles', ['7783-89-3']))
+ asyncio.run(wrap_with_session(CIR, "casno_to_smiles", ["7783-89-3"]))
@pytest.mark.dependency(depends=["test_service_available"])
def test_format():
- casno = '7783-89-3'
- args = '{}/smiles?resolver=cas_number'.format(casno)
- response = asyncio.run(wrap_with_session(CIR, 'query_the_service', ['CIR', args]))
+ casno = "7783-89-3"
+ args = "{}/smiles?resolver=cas_number".format(casno)
+ response = asyncio.run(wrap_with_session(CIR, "query_the_service", ["CIR", args]))
assert isinstance(response, str)
diff --git a/tests/test_CTS.py b/tests/test_CTS.py
index ceada45..d4d4e07 100644
--- a/tests/test_CTS.py
+++ b/tests/test_CTS.py
@@ -8,22 +8,19 @@
@pytest.mark.dependency()
def test_service_available():
- asyncio.run(wrap_with_session(CTS, 'casno_to_inchikey', ['7783-89-3']))
+ asyncio.run(wrap_with_session(CTS, "casno_to_inchikey", ["7783-89-3"]))
@pytest.mark.dependency(depends=["test_service_available"])
-@pytest.mark.parametrize('value, size', [
- ['7783-89-3', 1],
- ['7783893', 0]
-])
+@pytest.mark.parametrize("value, size", [["7783-89-3", 1], ["7783893", 0]])
def test_format(value, size):
- args = 'CAS/InChIKey/{}'.format(value)
- response = asyncio.run(wrap_with_session(CTS, 'query_the_service', ['CTS', args]))
+ args = "CAS/InChIKey/{}".format(value)
+ response = asyncio.run(wrap_with_session(CTS, "query_the_service", ["CTS", args]))
response_json = json.loads(response)
assert isinstance(response_json, list)
assert len(response_json) == 1
- assert 'results' in response_json[0]
- assert len(response_json[0]['results']) == size
+ assert "results" in response_json[0]
+ assert len(response_json[0]["results"]) == size
def test_get_conversions():
diff --git a/tests/test_IDSM.py b/tests/test_IDSM.py
index 25c6d95..fab8db3 100644
--- a/tests/test_IDSM.py
+++ b/tests/test_IDSM.py
@@ -8,12 +8,12 @@
from tests.utils import wrap_with_session
-INCHI = 'InChI=1S/C11H8FNO3/c1-13-6-9(10(14)16-11(13)15)7-2-4-8(12)5-3-7/h2-6H,1H3'
+INCHI = "InChI=1S/C11H8FNO3/c1-13-6-9(10(14)16-11(13)15)7-2-4-8(12)5-3-7/h2-6H,1H3"
@pytest.mark.dependency()
def test_service_available():
- asyncio.run(wrap_with_session(IDSM, 'inchi_to_inchikey', [INCHI]))
+ asyncio.run(wrap_with_session(IDSM, "inchi_to_inchikey", [INCHI]))
@pytest.mark.dependency(depends=["test_service_available"])
@@ -33,17 +33,29 @@ def test_format():
"""
data = frozendict({"query": query})
- response = asyncio.run(wrap_with_session(IDSM, 'query_the_service',
- ['IDSM', '', 'POST', frozendict(data),
- frozendict({"Accept": "application/sparql-results+json"})]))
+ response = asyncio.run(
+ wrap_with_session(
+ IDSM,
+ "query_the_service",
+ [
+ "IDSM",
+ "",
+ "POST",
+ frozendict(data),
+ frozendict({"Accept": "application/sparql-results+json"}),
+ ],
+ )
+ )
try:
response_json = json.loads(response) # Safely parse JSON
except json.JSONDecodeError as e:
pytest.fail(f"Failed to decode JSON response: {e}")
- assert 'results' in response_json, "Key 'results' not found in response"
- assert 'bindings' in response_json['results'], "Key 'bindings' not found in 'results'"
- assert len(response_json['results']['bindings']) > 1
+ assert "results" in response_json, "Key 'results' not found in response"
+ assert "bindings" in response_json["results"], (
+ "Key 'bindings' not found in 'results'"
+ )
+ assert len(response_json["results"]["bindings"]) > 1
def test_get_conversions():
diff --git a/tests/test_PubChem.py b/tests/test_PubChem.py
index f362328..bb9bbd5 100644
--- a/tests/test_PubChem.py
+++ b/tests/test_PubChem.py
@@ -8,25 +8,32 @@
from tests.utils import wrap_with_session
-INCHI = 'InChI=1S/C11H8FNO3/c1-13-6-9(10(14)16-11(13)15)7-2-4-8(12)5-3-7/h2-6H,1H3'
+INCHI = "InChI=1S/C11H8FNO3/c1-13-6-9(10(14)16-11(13)15)7-2-4-8(12)5-3-7/h2-6H,1H3"
@pytest.mark.dependency()
def test_service_available():
- asyncio.run(wrap_with_session(PubChem, 'inchi_to_inchikey', [INCHI]))
+ asyncio.run(wrap_with_session(PubChem, "inchi_to_inchikey", [INCHI]))
@pytest.mark.dependency(depends=["test_service_available"])
def test_format():
- inchi = 'InChI=1S/C9H10O4/c10-7-3-1-6(2-4-7)5-8(11)9(12)13/h1-4,8,10-11H,5H2,(H,12,13)'
- data = frozendict({'inchi': inchi})
-
- response = asyncio.run(wrap_with_session(PubChem, 'query_the_service',
- ['PubChem', 'inchi/JSON', 'POST', frozendict(data)]))
+ inchi = (
+ "InChI=1S/C9H10O4/c10-7-3-1-6(2-4-7)5-8(11)9(12)13/h1-4,8,10-11H,5H2,(H,12,13)"
+ )
+ data = frozendict({"inchi": inchi})
+
+ response = asyncio.run(
+ wrap_with_session(
+ PubChem,
+ "query_the_service",
+ ["PubChem", "inchi/JSON", "POST", frozendict(data)],
+ )
+ )
response_json = json.loads(response)
- assert 'PC_Compounds' in response_json
- assert len(response_json['PC_Compounds']) > 0
- assert 'props' in response_json['PC_Compounds'][0]
+ assert "PC_Compounds" in response_json
+ assert len(response_json["PC_Compounds"]) > 0
+ assert "props" in response_json["PC_Compounds"][0]
def test_get_conversions():
@@ -35,15 +42,31 @@ def test_get_conversions():
jobs = PubChem(None).get_conversion_functions()
loop.close()
- assert ('inchi', 'iupac_name', 'PubChem') in jobs
-
-
-@pytest.mark.parametrize('response, expected', [
- [{"PC_Compounds": [{"id": {"id": {"cid": "123"}},
- "props": [{"urn": {"label": "InChI"}, "value": {"sval": "random_inchi"}}]}]},
- {"pubchemid": "123", "inchi": "random_inchi"}],
- [{"PC_Compounds": [{"id": {}, "props": []}]}, dict()]
-])
+ assert ("inchi", "iupac_name", "PubChem") in jobs
+
+
+@pytest.mark.parametrize(
+ "response, expected",
+ [
+ [
+ {
+ "PC_Compounds": [
+ {
+ "id": {"id": {"cid": "123"}},
+ "props": [
+ {
+ "urn": {"label": "InChI"},
+ "value": {"sval": "random_inchi"},
+ }
+ ],
+ }
+ ]
+ },
+ {"pubchemid": "123", "inchi": "random_inchi"},
+ ],
+ [{"PC_Compounds": [{"id": {}, "props": []}]}, dict()],
+ ],
+)
def test_parse_attributes(response, expected):
actual = PubChem(None).parse_attributes(json.dumps(response))
assert actual == expected
@@ -53,5 +76,5 @@ def test_convert_inchikey_to_inchi():
inchikey = "OHCNQFYTLLGNOE-UHFFFAOYSA-N"
expected = "InChI=1S/C5H13NSi/c1-7(2,3)6-4-5-6/h4-5H2,1-3H3"
- actual = asyncio.run(wrap_with_session(PubChem, 'inchikey_to_inchi', [inchikey]))
- assert actual['inchi'] == expected
+ actual = asyncio.run(wrap_with_session(PubChem, "inchikey_to_inchi", [inchikey]))
+ assert actual["inchi"] == expected
diff --git a/tests/test_annotator.py b/tests/test_annotator.py
index 059ba10..1803627 100644
--- a/tests/test_annotator.py
+++ b/tests/test_annotator.py
@@ -7,16 +7,31 @@
from MSMetaEnhancer.libs.utils.Job import Job
-@pytest.mark.parametrize('metadata, expected, repeat, mocked', [
- [{'compound_name': '$NAME'}, {'compound_name': '$NAME', 'inchi': '$InChi'}, False,
- [({'compound_name': '$NAME', 'inchi': '$InChi'}, None)]],
- [{'compound_name': '$NAME'}, {'compound_name': '$NAME', 'inchi': '$InChi', 'smiles': '$SMILES'}, True,
- [({'compound_name': '$NAME', 'inchi': '$InChi'}, None),
- ({'compound_name': '$NAME', 'inchi': '$InChi', 'smiles': '$SMILES'}, None)]]
-])
+@pytest.mark.parametrize(
+ "metadata, expected, repeat, mocked",
+ [
+ [
+ {"compound_name": "$NAME"},
+ {"compound_name": "$NAME", "inchi": "$InChi"},
+ False,
+ [({"compound_name": "$NAME", "inchi": "$InChi"}, None)],
+ ],
+ [
+ {"compound_name": "$NAME"},
+ {"compound_name": "$NAME", "inchi": "$InChi", "smiles": "$SMILES"},
+ True,
+ [
+ ({"compound_name": "$NAME", "inchi": "$InChi"}, None),
+ (
+ {"compound_name": "$NAME", "inchi": "$InChi", "smiles": "$SMILES"},
+ None,
+ ),
+ ],
+ ],
+ ],
+)
def test_annotate(metadata, expected, repeat, mocked):
- jobs = [Job(('inchi', 'smiles', 'IDSM')),
- Job(('name', 'inchi', 'IDSM'))]
+ jobs = [Job(("inchi", "smiles", "IDSM")), Job(("name", "inchi", "IDSM"))]
annotator = Annotator()
annotator.set_converters(dict())
@@ -34,32 +49,36 @@ def test_execute_job_with_cache():
curator.filter_invalid_metadata = mock.MagicMock(side_effect=lambda a, b, c: a)
idsm = mock.Mock()
- idsm.convert = mock.AsyncMock(return_value={'smiles': '$SMILES'})
+ idsm.convert = mock.AsyncMock(return_value={"smiles": "$SMILES"})
- job = Job(('inchi', 'smiles', 'IDSM'))
+ job = Job(("inchi", "smiles", "IDSM"))
job.validate = mock.Mock(return_value=(idsm, None))
annotator = Annotator()
- annotator.set_converters({'IDSM': idsm})
+ annotator.set_converters({"IDSM": idsm})
annotator.curator = curator
- metadata, cache = asyncio.run(annotator.execute_job_with_cache(job, {'inchi': '$InChi'}, dict(), warning))
- assert metadata == {'inchi': '$InChi', 'smiles': '$SMILES'}
+ metadata, cache = asyncio.run(
+ annotator.execute_job_with_cache(job, {"inchi": "$InChi"}, dict(), warning)
+ )
+ assert metadata == {"inchi": "$InChi", "smiles": "$SMILES"}
# already cached
cts = mock.Mock()
cts.convert = mock.AsyncMock(return_value=dict())
- job = Job(('smiles', 'formula', 'CTS'))
+ job = Job(("smiles", "formula", "CTS"))
job.validate = mock.Mock(return_value=(cts, None))
- cache = {job.converter: {'formula': '$FORMULA'}}
+ cache = {job.converter: {"formula": "$FORMULA"}}
annotator = Annotator()
- annotator.set_converters({'CTS': cts})
+ annotator.set_converters({"CTS": cts})
annotator.curator = curator
- metadata, cache = asyncio.run(annotator.execute_job_with_cache(job, {'smiles': '$SMILES'}, cache, warning))
- assert metadata == {'smiles': '$SMILES', 'formula': '$FORMULA'}
+ metadata, cache = asyncio.run(
+ annotator.execute_job_with_cache(job, {"smiles": "$SMILES"}, cache, warning)
+ )
+ assert metadata == {"smiles": "$SMILES", "formula": "$FORMULA"}
# no data retrieved
@@ -67,22 +86,33 @@ def test_execute_job_with_cache():
cir.convert = mock.AsyncMock(return_value=dict())
annotator = Annotator()
- annotator.set_converters({'CIR': cir})
+ annotator.set_converters({"CIR": cir})
annotator.curator = curator
with pytest.raises(TargetAttributeNotRetrieved):
- metadata, cache = asyncio.run(annotator.execute_job_with_cache(job, {'smiles': '$SMILES'}, dict(), warning))
+ metadata, cache = asyncio.run(
+ annotator.execute_job_with_cache(
+ job, {"smiles": "$SMILES"}, dict(), warning
+ )
+ )
def test_catch_exception():
- metadata = {'inchi': 'a value', 'compound_name': 'a molecule'}
- result_metadata = {'inchi': 'a value', 'compound_name': 'a molecule', 'atr1': 'val1', 'atr2': 'val2'}
- jobs = [mock.Mock(target='a target')] * 3
+ metadata = {"inchi": "a value", "compound_name": "a molecule"}
+ result_metadata = {
+ "inchi": "a value",
+ "compound_name": "a molecule",
+ "atr1": "val1",
+ "atr2": "val2",
+ }
+ jobs = [mock.Mock(target="a target")] * 3
annotator = Annotator()
annotator.set_converters(dict())
- mocked = [({'inchi': 'a value', 'compound_name': 'a molecule', 'atr1': 'val1'}, dict()),
- Exception(),
- (result_metadata, dict())]
+ mocked = [
+ ({"inchi": "a value", "compound_name": "a molecule", "atr1": "val1"}, dict()),
+ Exception(),
+ (result_metadata, dict()),
+ ]
annotator.execute_job_with_cache = mock.AsyncMock()
annotator.execute_job_with_cache.side_effect = mocked
diff --git a/tests/test_application.py b/tests/test_application.py
index e044467..16bb78a 100644
--- a/tests/test_application.py
+++ b/tests/test_application.py
@@ -2,7 +2,10 @@
import pytest
from MSMetaEnhancer import Application
+from MSMetaEnhancer.libs.converters.web import IDSM, PubChem
+from MSMetaEnhancer.libs.utils.ConverterBuilder import ConverterBuilder
from tests.utils import FakeMonitor, FakeAnnotator
+from MSMetaEnhancer.libs.utils.Generic import is_na_value
def test_annotate_spectra_monitor_stops():
@@ -10,7 +13,7 @@ def test_annotate_spectra_monitor_stops():
monitor = FakeMonitor()
annotator = FakeAnnotator()
- app.load_data('tests/test_data/sample.msp', file_format='msp')
+ app.load_data("tests/test_data/sample.msp", file_format="msp")
asyncio.run(app.annotate_spectra([], monitor=monitor, annotator=annotator))
assert monitor.stop_request.is_set()
@@ -21,9 +24,19 @@ def test_annotate_spectra_monitor_stops_after_exception():
monitor = FakeMonitor()
annotator = FakeAnnotator(True)
- app.load_data('tests/test_data/sample.msp', file_format='msp')
+ app.load_data("tests/test_data/sample.msp", file_format="msp")
with pytest.raises(Exception):
- asyncio.run(app.annotate_spectra([], monitor=monitor, annotator=annotator))
+ asyncio.run(app.annotate_spectra({}, monitor=monitor, annotator=annotator))
assert monitor.stop_request.is_set()
+
+
+def test_application_sparse():
+ ConverterBuilder.register([PubChem, IDSM])
+ app = Application()
+ app.load_data("tests/test_data/sparse.tsv", file_format="tabular")
+ asyncio.run(app.annotate_spectra(["PubChem", "IDSM"]))
+
+ actual = [x.get("canonical_smiles") for x in app.data.get_metadata()]
+ assert not any([is_na_value(x) for x in actual])
diff --git a/tests/test_converter.py b/tests/test_converter.py
index a6a46e7..1f6c581 100644
--- a/tests/test_converter.py
+++ b/tests/test_converter.py
@@ -8,45 +8,49 @@
from asyncio.exceptions import TimeoutError
from MSMetaEnhancer.libs.converters.web.WebConverter import WebConverter
-from MSMetaEnhancer.libs.utils.Errors import TargetAttributeNotRetrieved, UnknownResponse, ServiceNotAvailable
+from MSMetaEnhancer.libs.utils.Errors import (
+ TargetAttributeNotRetrieved,
+ UnknownResponse,
+ ServiceNotAvailable,
+)
def test_query_the_service():
converter = WebConverter(mock.Mock())
- converter.endpoints = {'CTS': 'what a converter'}
- converter.loop_request = mock.AsyncMock(return_value={'smiles': '$SMILES'})
+ converter.endpoints = {"CTS": "what a converter"}
+ converter.loop_request = mock.AsyncMock(return_value={"smiles": "$SMILES"})
- result = asyncio.run(converter.query_the_service('CTS', 'arg'))
- assert result == {'smiles': '$SMILES'}
+ result = asyncio.run(converter.query_the_service("CTS", "arg"))
+ assert result == {"smiles": "$SMILES"}
converter.loop_request.assert_called()
# test wrong arg type
with pytest.raises(TypeError):
- _ = asyncio.run(converter.query_the_service('CTS', 10))
+ _ = asyncio.run(converter.query_the_service("CTS", 10))
# test lru_cache
converter.executed = False
converter.loop_request = mock.AsyncMock()
- result = asyncio.run(converter.query_the_service('CTS', 'arg'))
- assert result == {'smiles': '$SMILES'}
+ result = asyncio.run(converter.query_the_service("CTS", "arg"))
+ assert result == {"smiles": "$SMILES"}
converter.loop_request.assert_not_called()
async def test_loop_request(aiohttp_client):
- response = {'smiles': '$SMILES'}
+ response = {"smiles": "$SMILES"}
async def fake_request():
return web.Response(body=response)
app = web.Application()
- app.router.add_route('GET', '/', fake_request)
+ app.router.add_route("GET", "/", fake_request)
session = await aiohttp_client(app)
converter = WebConverter(session)
converter.process_request = mock.AsyncMock(return_value=response)
- result = await converter.loop_request('/', 'GET', None, None)
+ result = await converter.loop_request("/", "GET", None, None)
assert result == response
@@ -55,16 +59,22 @@ async def fake_request():
raise ServerDisconnectedError()
app = web.Application()
- app.router.add_route('GET', '/', fake_request)
+ app.router.add_route("GET", "/", fake_request)
session = await aiohttp_client(app)
converter = WebConverter(session)
with pytest.raises(UnknownResponse):
- await converter.loop_request('/', 'GET', None, None)
+ await converter.loop_request("/", "GET", None, None)
-@pytest.fixture(params=[TimeoutError, ServerDisconnectedError, ClientConnectorError(None, OSError())])
+@pytest.fixture(
+ params=[
+ TimeoutError,
+ ServerDisconnectedError,
+ ClientConnectorError(None, OSError()),
+ ]
+)
def failing_session_mock(request):
session = mock.AsyncMock()
session.get = mock.Mock(side_effect=request.param)
@@ -76,15 +86,15 @@ async def test_loop_request_circuit_breaker_get(failing_session_mock):
converter = WebConverter(failing_session_mock)
with pytest.raises(ServiceNotAvailable):
- await converter.loop_request('/', 'GET', None, None)
+ await converter.loop_request("/", "GET", None, None)
async def test_loop_request_circuit_breaker_post(failing_session_mock):
converter = WebConverter(failing_session_mock)
- data = {'inchi': 'inchi'}
+ data = {"inchi": "inchi"}
with pytest.raises(ServiceNotAvailable):
- await converter.loop_request('/', 'POST', data, None)
+ await converter.loop_request("/", "POST", data, None)
def test_process_request():
@@ -93,44 +103,41 @@ def test_process_request():
response = mock.AsyncMock()
response.status = 200
- response.text = mock.AsyncMock(return_value='this is response')
+ response.text = mock.AsyncMock(return_value="this is response")
response.ok = True
- result = asyncio.run(converter.process_request(response, '/', 'GET'))
- assert result == 'this is response'
+ result = asyncio.run(converter.process_request(response, "/", "GET"))
+ assert result == "this is response"
-@pytest.mark.parametrize('ok, status', [
- [False, 500],
- [False, 503]
-])
+@pytest.mark.parametrize("ok, status", [[False, 500], [False, 503]])
def test_process_request_exception(ok, status):
converter = WebConverter(mock.Mock())
converter.loop_request = mock.AsyncMock(return_value=None)
response = mock.AsyncMock()
response.status = status
- response.text = mock.AsyncMock(return_value='this is response')
+ response.text = mock.AsyncMock(return_value="this is response")
response.ok = ok
with pytest.raises(UnknownResponse):
- asyncio.run(converter.process_request(response, '/', 'GET'))
+ asyncio.run(converter.process_request(response, "/", "GET"))
def test_convert():
converter = WebConverter(mock.Mock())
converter.A_to_B = mock.AsyncMock()
- converter.A_to_B.side_effect = ['value']
+ converter.A_to_B.side_effect = ["value"]
- result = asyncio.run(converter.convert('A', 'B', None))
- assert result == 'value'
+ result = asyncio.run(converter.convert("A", "B", None))
+ assert result == "value"
converter.A_to_B.side_effect = [None]
with pytest.raises(TargetAttributeNotRetrieved):
- _ = asyncio.run(converter.convert('A', 'B', None))
+ _ = asyncio.run(converter.convert("A", "B", None))
with pytest.raises(AttributeError):
- _ = asyncio.run(converter.convert('B', 'C', None))
+ _ = asyncio.run(converter.convert("B", "C", None))
async def test_lru_cache(aiohttp_client):
@@ -138,16 +145,16 @@ async def test_lru_cache(aiohttp_client):
session = await aiohttp_client(app)
converter = WebConverter(session)
- converter.endpoints = {'/': '/'}
+ converter.endpoints = {"/": "/"}
converter.loop_request = mock.AsyncMock(return_value=(1, 2, 3))
converter.query_the_service.cache_clear()
- _ = await converter.query_the_service('/', '')
+ _ = await converter.query_the_service("/", "")
assert converter.query_the_service.cache_info().hits == 0
- _ = await converter.query_the_service('/', '')
+ _ = await converter.query_the_service("/", "")
assert converter.query_the_service.cache_info().hits == 1
- _ = await converter.query_the_service('/', '')
+ _ = await converter.query_the_service("/", "")
assert converter.query_the_service.cache_info().hits == 2
diff --git a/tests/test_curator.py b/tests/test_curator.py
index 898dfe5..311f80f 100644
--- a/tests/test_curator.py
+++ b/tests/test_curator.py
@@ -7,17 +7,20 @@
def test_fix_cas_number():
curator = Curator()
- assert curator.fix_cas_number('7783893') == '7783-89-3'
- assert curator.fix_cas_number('7783-89-3') == '7783-89-3'
+ assert curator.fix_cas_number("7783893") == "7783-89-3"
+ assert curator.fix_cas_number("7783-89-3") == "7783-89-3"
-@pytest.mark.parametrize('metadata, validated_metadata, logs_size', [
- [{'inchikey': 'random content'}, {}, 1],
- [{'smiles': 'CC(NC(C)=O)C#N'}, {'smiles': 'CC(NC(C)=O)C#N'}, 0]
-])
+@pytest.mark.parametrize(
+ "metadata, validated_metadata, logs_size",
+ [
+ [{"inchikey": "random content"}, {}, 1],
+ [{"smiles": "CC(NC(C)=O)C#N"}, {"smiles": "CC(NC(C)=O)C#N"}, 0],
+ ],
+)
def test_filter_invalid_metadata(metadata, validated_metadata, logs_size):
warning = LogRecord(dict())
- job = Job(('smiles', 'inchi', 'converter'))
+ job = Job(("smiles", "inchi", "converter"))
curator = Curator()
assert curator.filter_invalid_metadata(metadata, warning, job) == validated_metadata
assert len(warning.logs) == logs_size
diff --git a/tests/test_data/sample_metadata_with_na.csv b/tests/test_data/sample_metadata_with_na.csv
new file mode 100644
index 0000000..41d4e3a
--- /dev/null
+++ b/tests/test_data/sample_metadata_with_na.csv
@@ -0,0 +1,4 @@
+formula,mw,casno,inchikey,smiles
+H2,2,1333740,NA,
+D2,4,7782390,nan,None
+CH4,16,74828,N/A,n/a
diff --git a/tests/test_data/sample_with_na.msp b/tests/test_data/sample_with_na.msp
new file mode 100644
index 0000000..9f3d6a3
--- /dev/null
+++ b/tests/test_data/sample_with_na.msp
@@ -0,0 +1,30 @@
+NAME: Hydrogen
+FORMULA: H2
+MW: 2
+INCHIKEY: NA
+SMILES: n/a
+NUM PEAKS: 2
+1.0 20.98
+2.0 999.0
+
+NAME: Deuterium
+FORMULA: D2
+MW: 4
+INCHIKEY: nan
+SMILES: None
+NUM PEAKS: 2
+2.0 14.99
+4.0 999.0
+
+NAME: Methane
+FORMULA: CH4
+MW: 16
+INCHIKEY: N/A
+SMILES:
+NUM PEAKS: 6
+12.0 37.97
+13.0 105.9
+14.0 203.82
+15.0 886.2
+16.0 999.0
+17.0 15.99
diff --git a/tests/test_data/sparse.tsv b/tests/test_data/sparse.tsv
new file mode 100644
index 0000000..31f3239
--- /dev/null
+++ b/tests/test_data/sparse.tsv
@@ -0,0 +1,4 @@
+compound_name rel.effect rel.relationship publication.id chemical_normalized heading canonical_smiles inchi inchikey iupac_name
+(-)-epicatechin gap junction intercellular communication no inhibition 18828601 (-)-epicatechin epicatechin n/a InChI=1S/C15H14O6/c16-8-4-11(18)9-6-13(20)15(21-14(9)5-8)7-1-2-10(17)12(19)3-7/h1-5,13,15-20H,6H2/t13-,15-/m1/s1 PFTAWBLQPZVEMU-UKRRQHHQSA-N (2R,3R)-2-(3,4-dihydroxyphenyl)-3,4-dihydro-2H-chromene-3,5,7-triol
+1-chloroanthracene gap junction intercellular communication inhibition 10416268 1-chloroanthracene 1-chloroanthracene NA InChI=1S/C14H9Cl/c15-14-7-3-6-12-8-10-4-1-2-5-11(10)9-13(12)14/h1-9H SRIHSAFSOOUEGL-UHFFFAOYSA-N 1-chloroanthracene
+1-methyl-fluorene gap junction intercellular communication inhibition 7835547 1-methyl-fluorene 1-methylfluorene CC1=C2CC3=CC=CC=C3C2=CC=C1 InChI=1S/C14H12/c1-10-5-4-8-13-12-7-3-2-6-11(12)9-14(10)13/h2-8H,9H2,1H3 GKEUODMJRFDLJY-UHFFFAOYSA-N 1-methyl-9H-fluorene
\ No newline at end of file
diff --git a/tests/test_io.py b/tests/test_io.py
index a2a1063..db0194f 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -4,25 +4,53 @@
from MSMetaEnhancer.libs.data import Spectra, DataFrame
-DATA = [{'formula': 'H2', 'mw': '2', 'casno': '1333740', 'id': '1', 'num_peaks': '2', 'compound_name': 'Hydrogen'},
- {'formula': 'D2', 'mw': '4', 'casno': '7782390', 'id': '2', 'num_peaks': '2', 'compound_name': 'Deuterium'},
- {'formula': 'CH4', 'mw': '16', 'casno': '74828', 'id': '3', 'num_peaks': '6', 'compound_name': 'Methane'}]
-
-
-@pytest.mark.parametrize('backend, file_type, filename', [
- [Spectra(), 'msp', 'tests/test_data/sample.msp'],
- [Spectra(), 'mgf', 'tests/test_data/sample.mgf'],
- [Spectra(), 'json', 'tests/test_data/sample.json'],
- [DataFrame(), 'csv', 'tests/test_data/sample_metadata.csv'],
- [DataFrame(), 'tsv', 'tests/test_data/sample_metadata.tsv'],
- [DataFrame(), 'xlsx', 'tests/test_data/sample_metadata.xlsx']
-])
+DATA = [
+ {
+ "formula": "H2",
+ "mw": "2",
+ "casno": "1333740",
+ "id": "1",
+ "num_peaks": "2",
+ "compound_name": "Hydrogen",
+ },
+ {
+ "formula": "D2",
+ "mw": "4",
+ "casno": "7782390",
+ "id": "2",
+ "num_peaks": "2",
+ "compound_name": "Deuterium",
+ },
+ {
+ "formula": "CH4",
+ "mw": "16",
+ "casno": "74828",
+ "id": "3",
+ "num_peaks": "6",
+ "compound_name": "Methane",
+ },
+]
+
+
+@pytest.mark.parametrize(
+ "backend, file_type, filename",
+ [
+ [Spectra(), "msp", "tests/test_data/sample.msp"],
+ [Spectra(), "mgf", "tests/test_data/sample.mgf"],
+ [Spectra(), "json", "tests/test_data/sample.json"],
+ [DataFrame(), "csv", "tests/test_data/sample_metadata.csv"],
+ [DataFrame(), "tsv", "tests/test_data/sample_metadata.tsv"],
+ [DataFrame(), "xlsx", "tests/test_data/sample_metadata.xlsx"],
+ ],
+)
def test_get_metadata(backend, file_type, filename):
backend.load_data(filename, file_type)
metadata = backend.get_metadata()
# Compare lengths
- assert len(metadata) == len(DATA), f"Metadata length mismatch: {len(metadata)} != {len(DATA)}"
+ assert len(metadata) == len(DATA), (
+ f"Metadata length mismatch: {len(metadata)} != {len(DATA)}"
+ )
# Compare values of matching keys
for i, (meta_item, data_item) in enumerate(zip(metadata, DATA)):
@@ -32,11 +60,14 @@ def test_get_metadata(backend, file_type, filename):
f"Value mismatch for key '{key}' at index {i}: {meta_item[key]} != {data_item[key]}"
)
+
def test_fuse_metadata_dataframe():
df = DataFrame()
df.fuse_metadata(DATA)
# Compare row by row, ignoring mismatched keys
- for i, (fused_row, original_row) in enumerate(zip(df.df.to_dict(orient='records'), DATA)):
+ for i, (fused_row, original_row) in enumerate(
+ zip(df.df.to_dict(orient="records"), DATA)
+ ):
for key in original_row.keys():
if key in fused_row:
assert fused_row[key] == original_row[key], (
@@ -46,11 +77,15 @@ def test_fuse_metadata_dataframe():
def test_fuse_metadata_spectra():
spectra_fused = Spectra()
- spectra_fused.spectrums = [mock.Mock(metadata=dict()), mock.Mock(metadata=dict()), mock.Mock(metadata=dict())]
+ spectra_fused.spectrums = [
+ mock.Mock(metadata=dict()),
+ mock.Mock(metadata=dict()),
+ mock.Mock(metadata=dict()),
+ ]
spectra_fused.fuse_metadata(DATA)
spectra_loaded = Spectra()
- spectra_loaded.load_data('tests/test_data/sample.msp', 'msp')
+ spectra_loaded.load_data("tests/test_data/sample.msp", "msp")
# Compare metadata row by row, ignoring mismatched keys
fused_metadata = spectra_fused.get_metadata()
@@ -69,13 +104,15 @@ def test_tabular_data():
Test loading and comparing tabular (TSV) data using the DataFrame backend.
"""
df = DataFrame()
- filename = 'tests/test_data/sample_metadata.tsv'
- file_type = 'tabular'
+ filename = "tests/test_data/sample_metadata.tsv"
+ file_type = "tabular"
df.load_data(filename, file_type)
metadata = df.get_metadata()
# Compare lengths
- assert len(metadata) == len(DATA), f"Metadata length mismatch: {len(metadata)} != {len(DATA)}"
+ assert len(metadata) == len(DATA), (
+ f"Metadata length mismatch: {len(metadata)} != {len(DATA)}"
+ )
# Compare values of matching keys
for i, (meta_item, data_item) in enumerate(zip(metadata, DATA)):
for key in meta_item.keys():
@@ -83,3 +120,35 @@ def test_tabular_data():
assert meta_item[key] == data_item[key], (
f"Value mismatch for key '{key}' at index {i}: {meta_item[key]} != {data_item[key]}"
)
+
+
+@pytest.mark.parametrize(
+ "backend, file_type, filename, absent_keys",
+ [
+ [
+ DataFrame(),
+ "csv",
+ "tests/test_data/sample_metadata_with_na.csv",
+ ["inchikey", "smiles"],
+ ],
+ [
+ Spectra(),
+ "msp",
+ "tests/test_data/sample_with_na.msp",
+ ["inchikey", "smiles"],
+ ],
+ ],
+)
+def test_na_values_filtered_from_metadata(backend, file_type, filename, absent_keys):
+ """NA and empty values in data files should be excluded from metadata dicts."""
+ backend.load_data(filename, file_type)
+ metadata = backend.get_metadata()
+
+ assert len(metadata) == 3
+
+ for i, meta_item in enumerate(metadata):
+ # Keys that had NA values must be absent
+ for key in absent_keys:
+ assert key not in meta_item, (
+ f"NA key '{key}' should not be present at index {i}, got {meta_item.get(key)}"
+ )
diff --git a/tests/test_rdkit.py b/tests/test_rdkit.py
index fdc2ffb..c6562b5 100644
--- a/tests/test_rdkit.py
+++ b/tests/test_rdkit.py
@@ -3,20 +3,27 @@
from MSMetaEnhancer.libs.converters.compute import RDKit
-INCHI = 'InChI=1S/C19H28O2/c1-18-9-7-13(20)11-12(18)3-4-14-15-5-6-17(21)19(15,2)10-8-16(14)18/h11,14-17,21H,3-10H2,1-2H3/t14-,15-,16-,17-,18-,19-/m0/s1'
-CANONICAL_SMILES = 'CC12CCC(=O)C=C1CCC1C2CCC2(C)C(O)CCC12'
+INCHI = "InChI=1S/C19H28O2/c1-18-9-7-13(20)11-12(18)3-4-14-15-5-6-17(21)19(15,2)10-8-16(14)18/h11,14-17,21H,3-10H2,1-2H3/t14-,15-,16-,17-,18-,19-/m0/s1"
+CANONICAL_SMILES = "CC12CCC(=O)C=C1CCC1C2CCC2(C)C(O)CCC12"
-@pytest.mark.parametrize('method, input, expected', [
- ['inchi_to_canonical_smiles', INCHI, {'canonical_smiles': CANONICAL_SMILES}],
- ['inchi_to_isomeric_smiles', INCHI, {
- 'isomeric_smiles': 'C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2O'
- }],
- ['from_smiles', CANONICAL_SMILES, {'mw': 288.208930136}],
- ["formula_to_mw", "C9H15N4O8P", {'mw': 338.21299999999997}],
- ['smiles_to_formula', CANONICAL_SMILES, {'formula': 'C19H28O2'}],
- ['inchi_to_formula', INCHI, {'formula': 'C19H28O2'}],
-])
+@pytest.mark.parametrize(
+ "method, input, expected",
+ [
+ ["inchi_to_canonical_smiles", INCHI, {"canonical_smiles": CANONICAL_SMILES}],
+ [
+ "inchi_to_isomeric_smiles",
+ INCHI,
+ {
+ "isomeric_smiles": "C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2O"
+ },
+ ],
+ ["from_smiles", CANONICAL_SMILES, {"mw": 288.208930136}],
+ ["formula_to_mw", "C9H15N4O8P", {"mw": 338.21299999999997}],
+ ["smiles_to_formula", CANONICAL_SMILES, {"formula": "C19H28O2"}],
+ ["inchi_to_formula", INCHI, {"formula": "C19H28O2"}],
+ ],
+)
def test_convert_methods(method, input, expected):
func = getattr(RDKit(), method)
actual = func(input)
diff --git a/tests/utils.py b/tests/utils.py
index 4452392..726bfe6 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -13,6 +13,7 @@ class FakeMonitor(Thread):
"""
Fake Monitor to test basic functionality.
"""
+
def __init__(self):
super(FakeMonitor, self).__init__()
self.converters = None
@@ -35,6 +36,7 @@ class FakeAnnotator:
"""
Fake Annotator to test basic functionality.
"""
+
def __init__(self, raise_exception=False):
self.converters = None
self.raise_exception = raise_exception