From 74e3080e6752cdb316d0c5d460e584b9da081de0 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Thu, 21 Aug 2025 19:55:58 +0000 Subject: [PATCH 1/2] fix: never parse index_col from node data --- spras/dataset.py | 2 +- .../fixtures/toy-372/input-interactome.txt | 2 ++ test/dataset/fixtures/toy-372/input-nodes.txt | 3 +++ test/dataset/test_dataset.py | 22 +++++++++++++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 test/dataset/fixtures/toy-372/input-interactome.txt create mode 100644 test/dataset/fixtures/toy-372/input-nodes.txt diff --git a/spras/dataset.py b/spras/dataset.py index 891f4d6f9..c2271235c 100644 --- a/spras/dataset.py +++ b/spras/dataset.py @@ -121,7 +121,7 @@ def load_files_from_dict(self, dataset_dict: DatasetDict): # Load generic node tables self.node_table = pd.DataFrame(node_set, columns=[self.NODE_ID]) for node_file in node_data_files: - single_node_table = pd.read_table(os.path.join(data_loc, node_file)) + single_node_table = pd.read_table(os.path.join(data_loc, node_file), index_col=False) # If we have only 1 column, assume this is an indicator variable if len(single_node_table.columns) == 1: single_node_table = pd.read_table( diff --git a/test/dataset/fixtures/toy-372/input-interactome.txt b/test/dataset/fixtures/toy-372/input-interactome.txt new file mode 100644 index 000000000..f252ca4ca --- /dev/null +++ b/test/dataset/fixtures/toy-372/input-interactome.txt @@ -0,0 +1,2 @@ +C D 0.77 U +N O 0.66 U \ No newline at end of file diff --git a/test/dataset/fixtures/toy-372/input-nodes.txt b/test/dataset/fixtures/toy-372/input-nodes.txt new file mode 100644 index 000000000..2efa6f320 --- /dev/null +++ b/test/dataset/fixtures/toy-372/input-nodes.txt @@ -0,0 +1,3 @@ +NODEID prize active dummy sources targets +N +C 5.7 True True diff --git a/test/dataset/test_dataset.py b/test/dataset/test_dataset.py index 4cb988632..52333ca91 100644 --- a/test/dataset/test_dataset.py +++ b/test/dataset/test_dataset.py @@ -2,6 +2,7 @@ import pandas import pytest +import numpy as np from spras.dataset import Dataset @@ -58,3 +59,24 @@ def test_standard(self): }) assert len(dataset.get_interactome()) == 2 + + # 372 is a PR, but for the relevant comment, see + # https://github.com/Reed-CompBio/spras/pull/372/files#r2291953612. + # Note that the input-nodes file has more tabs than the original fixture. + def test_372(self): + dataset = Dataset({ + 'label': 'toy-372', + 'edge_files': ['input-interactome.txt'], + 'node_files': ['input-nodes.txt'], + 'data_dir': FIXTURES_PATH / 'toy-372', + 'other_files': [] + }) + + node_table = dataset.node_table + assert node_table is not None + + assert node_table[node_table[Dataset.NODE_ID] == 'C'].iloc[0]['prize'] == 5.7 + assert node_table[node_table[Dataset.NODE_ID] == 'C'].iloc[0]['active'] == True + + assert np.isnan(node_table[node_table[Dataset.NODE_ID] == 'C'].iloc[0]['sources']) + assert node_table[node_table[Dataset.NODE_ID] == 'C'].iloc[0]['targets'] == True From 4da14efc2221b28b0dda493e4f2af09aecd03fa4 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Sat, 25 Apr 2026 09:02:54 +0000 Subject: [PATCH 2/2] test(dataset): use new schema --- test/dataset/test_dataset.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/test/dataset/test_dataset.py b/test/dataset/test_dataset.py index 626374353..aecbd68e5 100644 --- a/test/dataset/test_dataset.py +++ b/test/dataset/test_dataset.py @@ -1,8 +1,8 @@ from pathlib import Path +import numpy as np import pandas import pytest -import numpy as np from spras.config.dataset import DatasetSchema from spras.dataset import Dataset @@ -59,19 +59,21 @@ def test_standard(self): data_dir=FIXTURES_PATH / 'standard' )) - assert len(dataset.get_interactome()) == 2 - + interactome = dataset.get_interactome() + assert interactome is not None + assert len(interactome) == 2 + # 372 is a PR, but for the relevant comment, see # https://github.com/Reed-CompBio/spras/pull/372/files#r2291953612. # Note that the input-nodes file has more tabs than the original fixture. def test_372(self): - dataset = Dataset({ - 'label': 'toy-372', - 'edge_files': ['input-interactome.txt'], - 'node_files': ['input-nodes.txt'], - 'data_dir': FIXTURES_PATH / 'toy-372', - 'other_files': [] - }) + dataset = Dataset(DatasetSchema( + label='toy-372', + edge_files=['input-interactome.txt'], + node_files=['input-nodes.txt'], + data_dir=FIXTURES_PATH / 'toy-372', + other_files=[] + )) node_table = dataset.node_table assert node_table is not None