diff --git a/spras/dataset.py b/spras/dataset.py index ddf74736f..cb694e44b 100644 --- a/spras/dataset.py +++ b/spras/dataset.py @@ -136,7 +136,7 @@ def __init__(self, dataset_params: DatasetSchema): # Load generic node tables self.node_table = pd.DataFrame(node_set, columns=[self.NODE_ID]) for node_file in node_data_files: - single_node_table = pd.read_table(os.path.join(data_loc, node_file)) + single_node_table = pd.read_table(os.path.join(data_loc, node_file), index_col=False) # If we have only 1 column, assume this is an indicator variable if len(single_node_table.columns) == 1: single_node_table = pd.read_table( diff --git a/test/dataset/fixtures/toy-372/input-interactome.txt b/test/dataset/fixtures/toy-372/input-interactome.txt new file mode 100644 index 000000000..f252ca4ca --- /dev/null +++ b/test/dataset/fixtures/toy-372/input-interactome.txt @@ -0,0 +1,2 @@ +C D 0.77 U +N O 0.66 U \ No newline at end of file diff --git a/test/dataset/fixtures/toy-372/input-nodes.txt b/test/dataset/fixtures/toy-372/input-nodes.txt new file mode 100644 index 000000000..2efa6f320 --- /dev/null +++ b/test/dataset/fixtures/toy-372/input-nodes.txt @@ -0,0 +1,3 @@ +NODEID prize active dummy sources targets +N +C 5.7 True True diff --git a/test/dataset/test_dataset.py b/test/dataset/test_dataset.py index 8781f25d4..aecbd68e5 100644 --- a/test/dataset/test_dataset.py +++ b/test/dataset/test_dataset.py @@ -1,5 +1,6 @@ from pathlib import Path +import numpy as np import pandas import pytest @@ -58,4 +59,27 @@ def test_standard(self): data_dir=FIXTURES_PATH / 'standard' )) - assert len(dataset.get_interactome()) == 2 + interactome = dataset.get_interactome() + assert interactome is not None + assert len(interactome) == 2 + + # 372 is a PR, but for the relevant comment, see + # https://github.com/Reed-CompBio/spras/pull/372/files#r2291953612. + # Note that the input-nodes file has more tabs than the original fixture. + def test_372(self): + dataset = Dataset(DatasetSchema( + label='toy-372', + edge_files=['input-interactome.txt'], + node_files=['input-nodes.txt'], + data_dir=FIXTURES_PATH / 'toy-372', + other_files=[] + )) + + node_table = dataset.node_table + assert node_table is not None + + assert node_table[node_table[Dataset.NODE_ID] == 'C'].iloc[0]['prize'] == 5.7 + assert node_table[node_table[Dataset.NODE_ID] == 'C'].iloc[0]['active'] == True + + assert np.isnan(node_table[node_table[Dataset.NODE_ID] == 'C'].iloc[0]['sources']) + assert node_table[node_table[Dataset.NODE_ID] == 'C'].iloc[0]['targets'] == True