diff --git a/packages/graphrag/graphrag/query/indexer_adapters.py b/packages/graphrag/graphrag/query/indexer_adapters.py index 7119ad842c..a6ded1f9f7 100644 --- a/packages/graphrag/graphrag/query/indexer_adapters.py +++ b/packages/graphrag/graphrag/query/indexer_adapters.py @@ -221,5 +221,5 @@ def _filter_under_community_level( ) -> pd.DataFrame: return cast( "pd.DataFrame", - df[df.level <= community_level], + df[(df.level <= community_level) | df.level.isna()], ) diff --git a/tests/unit/query/test_indexer_adapters.py b/tests/unit/query/test_indexer_adapters.py new file mode 100644 index 0000000000..182e1e12cf --- /dev/null +++ b/tests/unit/query/test_indexer_adapters.py @@ -0,0 +1,50 @@ +# Copyright (c) 2024 Microsoft Corporation. +# Licensed under the MIT License + +"""Tests for _filter_under_community_level logic.""" + +import pandas as pd + + +def _filter_under_community_level( + df: pd.DataFrame, community_level: int +) -> pd.DataFrame: + return df[(df.level <= community_level) | df.level.isna()] + + +def test_filter_under_community_level_preserves_nan(): + df = pd.DataFrame( + { + "id": [1, 2, 3, 4], + "level": [0, 1, None, 2], + "title": ["a", "b", "c", "d"], + } + ) + result = _filter_under_community_level(df, 1) + assert len(result) == 3 + assert result["id"].tolist() == [1, 2, 3] + + +def test_filter_under_community_level_all_assigned(): + df = pd.DataFrame( + { + "id": [1, 2, 3], + "level": [0, 1, 2], + "title": ["a", "b", "c"], + } + ) + result = _filter_under_community_level(df, 1) + assert len(result) == 2 + assert result["id"].tolist() == [1, 2] + + +def test_filter_under_community_level_all_nan(): + df = pd.DataFrame( + { + "id": [1, 2, 3], + "level": [None, None, None], + "title": ["a", "b", "c"], + } + ) + result = _filter_under_community_level(df, 1) + assert len(result) == 3