From e354bdbcfae40e3b7cafb5071631c30c217ec528 Mon Sep 17 00:00:00 2001 From: Anthony Diaz Date: Mon, 3 Feb 2025 15:41:18 -0500 Subject: [PATCH 1/8] Subject: CCDB - Update API to return Matched Company aggregation (CABL-433) Body: Adding the matched_company to _AGG_FIELDS and matched_company AGG_MATCHED_COMPANY_DEFAULT to _AGG_SIZE_MAP, updating build_one function size to pull from field_name instead of es_field_name Footer: --- complaint_search/defaults.py | 1 + complaint_search/es_builders.py | 16 +++++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/complaint_search/defaults.py b/complaint_search/defaults.py index 3a985759..06be090d 100644 --- a/complaint_search/defaults.py +++ b/complaint_search/defaults.py @@ -7,6 +7,7 @@ # Counts for ZIP Codes and states are unlikely to change much, but others may. # Companies, products, and issues could grow over time and need adjustment. AGG_COMPANY_DEFAULT = 6500 +AGG_MATCHED_COMPANY_DEFAULT = 50 AGG_ZIPCODE_DEFAULT = 26000 AGG_STATE_DEFAULT = 100 AGG_STATE_PRODUCT_DEFAULT = 5 diff --git a/complaint_search/es_builders.py b/complaint_search/es_builders.py index f13368e3..69484d53 100644 --- a/complaint_search/es_builders.py +++ b/complaint_search/es_builders.py @@ -19,7 +19,7 @@ EXPORT_FORMATS, PARAMS, SOURCE_FIELDS, - TREND_DEPTH_DEFAULT, + TREND_DEPTH_DEFAULT, AGG_MATCHED_COMPANY_DEFAULT, ) @@ -326,6 +326,7 @@ class AggregationBuilder(BaseBuilder): "company", "company_public_response", "company_response", + "matched_company", "consumer_consent_provided", "consumer_disputed", "has_narrative", @@ -339,13 +340,14 @@ class AggregationBuilder(BaseBuilder): ) _AGG_SIZE_MAP = { - "company.raw": AGG_COMPANY_DEFAULT, # 6500 + "company": AGG_COMPANY_DEFAULT, # 6500 + "matched_company": AGG_MATCHED_COMPANY_DEFAULT, # 50 "state": AGG_STATE_DEFAULT, # 100 "zip_code": AGG_ZIPCODE_DEFAULT, # 26000 - "issue.raw": AGG_ISSUE_DEFAULT, # 200 - "sub_issue.raw": AGG_SUBISSUE_DEFAULT, # 250 - "product.raw": AGG_PRODUCT_DEFAULT, # 30 - "sub_product.raw": AGG_SUBPRODUCT_DEFAULT, # 90 + "issue": AGG_ISSUE_DEFAULT, # 200 + "sub_issue": AGG_SUBISSUE_DEFAULT, # 250 + "product": AGG_PRODUCT_DEFAULT, # 30 + "sub_product": AGG_SUBPRODUCT_DEFAULT, # 90 } def __init__(self): @@ -405,7 +407,7 @@ def build_one(self, field_name): field_aggs["aggs"] = { field_name: { "terms": { - "size": self._AGG_SIZE_MAP.get(es_field_name, 10), + "size": self._AGG_SIZE_MAP.get(field_name, 10), "field": es_field_name, } } From cc82bab66467e5255126865fff565b35508413bc Mon Sep 17 00:00:00 2001 From: Anthony Diaz Date: Wed, 5 Feb 2025 12:26:13 -0500 Subject: [PATCH 2/8] Reverting the size about for _AGG_SIZE_MAP use es_field_name --- complaint_search/es_builders.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/complaint_search/es_builders.py b/complaint_search/es_builders.py index 69484d53..fa2a5f8c 100644 --- a/complaint_search/es_builders.py +++ b/complaint_search/es_builders.py @@ -340,14 +340,14 @@ class AggregationBuilder(BaseBuilder): ) _AGG_SIZE_MAP = { - "company": AGG_COMPANY_DEFAULT, # 6500 + "company.raw": AGG_COMPANY_DEFAULT, # 6500 "matched_company": AGG_MATCHED_COMPANY_DEFAULT, # 50 "state": AGG_STATE_DEFAULT, # 100 "zip_code": AGG_ZIPCODE_DEFAULT, # 26000 - "issue": AGG_ISSUE_DEFAULT, # 200 - "sub_issue": AGG_SUBISSUE_DEFAULT, # 250 - "product": AGG_PRODUCT_DEFAULT, # 30 - "sub_product": AGG_SUBPRODUCT_DEFAULT, # 90 + "issue.raw": AGG_ISSUE_DEFAULT, # 200 + "sub_issue.raw": AGG_SUBISSUE_DEFAULT, # 250 + "product.raw": AGG_PRODUCT_DEFAULT, # 30 + "sub_product.raw": AGG_SUBPRODUCT_DEFAULT, # 90 } def __init__(self): @@ -407,7 +407,7 @@ def build_one(self, field_name): field_aggs["aggs"] = { field_name: { "terms": { - "size": self._AGG_SIZE_MAP.get(field_name, 10), + "size": self._AGG_SIZE_MAP.get(es_field_name, 10), "field": es_field_name, } } From 4958bd0e5ea3e712c226bfb51bb01389052ea600 Mon Sep 17 00:00:00 2001 From: Anthony Diaz Date: Wed, 5 Feb 2025 12:40:23 -0500 Subject: [PATCH 3/8] Updating the docs_requirements.txt file with needed libraries --- docs_requirements.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs_requirements.txt b/docs_requirements.txt index 2a51df0e..a800f20d 100644 --- a/docs_requirements.txt +++ b/docs_requirements.txt @@ -3,11 +3,14 @@ click==8.1.3 Django==3.2.17 django-rest-swagger==2.2.0 djangorestframework==3.14.0 +django-flags==5.0.13 +django-localflavor==4.0 elasticsearch==7.10.1 Jinja2==2.11.3 Markdown==3.3.6 MarkupSafe==2.0.1 mkdocs==1.2.3 mkDOCter==1.0.5 -PyYAML==6.0 +PyYAML==6.0.2 tornado==4.5.3 +requests-aws4auth==1.3.1 From 7605497eea2bf1b81b8474c9b5e83547f7c15220 Mon Sep 17 00:00:00 2001 From: Anthony Diaz Date: Wed, 5 Feb 2025 18:30:48 -0500 Subject: [PATCH 4/8] Correcting field size --- complaint_search/es_builders.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/complaint_search/es_builders.py b/complaint_search/es_builders.py index fa2a5f8c..00cebb1a 100644 --- a/complaint_search/es_builders.py +++ b/complaint_search/es_builders.py @@ -340,14 +340,14 @@ class AggregationBuilder(BaseBuilder): ) _AGG_SIZE_MAP = { - "company.raw": AGG_COMPANY_DEFAULT, # 6500 + "company": AGG_COMPANY_DEFAULT, # 6500 "matched_company": AGG_MATCHED_COMPANY_DEFAULT, # 50 "state": AGG_STATE_DEFAULT, # 100 "zip_code": AGG_ZIPCODE_DEFAULT, # 26000 - "issue.raw": AGG_ISSUE_DEFAULT, # 200 - "sub_issue.raw": AGG_SUBISSUE_DEFAULT, # 250 - "product.raw": AGG_PRODUCT_DEFAULT, # 30 - "sub_product.raw": AGG_SUBPRODUCT_DEFAULT, # 90 + "issue": AGG_ISSUE_DEFAULT, # 200 + "sub_issue": AGG_SUBISSUE_DEFAULT, # 250 + "product": AGG_PRODUCT_DEFAULT, # 30 + "sub_product": AGG_SUBPRODUCT_DEFAULT, # 90 } def __init__(self): @@ -360,19 +360,19 @@ def add_exclude(self, field_name_list): self.exclude += field_name_list def build_parent_child_field_agg( - self, agg_heading_name, es_parent_name, es_child_name + self, agg_heading_name, es_parent_name, es_child_name, parent_size, child_size ): field_agg = { agg_heading_name: { "terms": { - "size": self._AGG_SIZE_MAP.get(es_parent_name, 10), + "size": parent_size, "field": es_parent_name, }, "aggs": { es_child_name: { "terms": { - "size": self._AGG_SIZE_MAP.get(es_child_name, 10), + "size": child_size, "field": es_child_name, } } @@ -400,19 +400,24 @@ def build_one(self, field_name): es_child_name = self._OPTIONAL_FILTERS_PARAM_TO_ES_MAP.get( self._OPTIONAL_FILTERS_CHILD_MAP.get(field_name) ) + parent_field_size = self._AGG_SIZE_MAP.get(field_name, 10) + child_field_size = self._AGG_SIZE_MAP.get(self._OPTIONAL_FILTERS_CHILD_MAP.get(field_name), 10) + field_aggs["aggs"] = self.build_parent_child_field_agg( - field_name, es_field_name, es_child_name + field_name, es_field_name, es_child_name, parent_field_size, child_field_size ) else: field_aggs["aggs"] = { field_name: { "terms": { - "size": self._AGG_SIZE_MAP.get(es_field_name, 10), + "size": self._AGG_SIZE_MAP.get(field_name, 10), "field": es_field_name, } } } + print(f"field_aggs: {field_aggs['aggs']}") + # Create a subset of the filters incl_subset = { k: v for k, v in self.include_clauses.items() if k != field_name From cc5ffb3d4157cb26f1ea287b22dd2f6026ad0c34 Mon Sep 17 00:00:00 2001 From: Richard Dinh Date: Thu, 6 Feb 2025 19:42:37 -0800 Subject: [PATCH 5/8] remove matched company field in favor of default field. update agg count returned to 50, update api root path to match what ccdb5-ui is expecting --- ccdb5_api/urls.py | 2 +- complaint_search/defaults.py | 5 ++--- complaint_search/es_builders.py | 4 +--- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/ccdb5_api/urls.py b/ccdb5_api/urls.py index ae418d32..e569eb55 100644 --- a/ccdb5_api/urls.py +++ b/ccdb5_api/urls.py @@ -4,5 +4,5 @@ urlpatterns = [ re_path(r"^admin/", admin.site.urls), - re_path(r"^", include("complaint_search.urls")), + re_path(r"data-research/consumer-complaints/search/api/v1/", include("complaint_search.urls")), ] diff --git a/complaint_search/defaults.py b/complaint_search/defaults.py index 06be090d..4fa90590 100644 --- a/complaint_search/defaults.py +++ b/complaint_search/defaults.py @@ -6,8 +6,7 @@ # Defaults were adjusted in 2021 to be higher than object counts at the time. # Counts for ZIP Codes and states are unlikely to change much, but others may. # Companies, products, and issues could grow over time and need adjustment. -AGG_COMPANY_DEFAULT = 6500 -AGG_MATCHED_COMPANY_DEFAULT = 50 +AGG_COMPANY_DEFAULT = 50 AGG_ZIPCODE_DEFAULT = 26000 AGG_STATE_DEFAULT = 100 AGG_STATE_PRODUCT_DEFAULT = 5 @@ -94,7 +93,7 @@ ] ) -AGG_EXCLUDE_FIELDS = ["company", "zip_code"] +AGG_EXCLUDE_FIELDS = ["zip_code"] CHUNK_SIZE = 512 diff --git a/complaint_search/es_builders.py b/complaint_search/es_builders.py index 00cebb1a..0c62bd09 100644 --- a/complaint_search/es_builders.py +++ b/complaint_search/es_builders.py @@ -19,7 +19,7 @@ EXPORT_FORMATS, PARAMS, SOURCE_FIELDS, - TREND_DEPTH_DEFAULT, AGG_MATCHED_COMPANY_DEFAULT, + TREND_DEPTH_DEFAULT, ) @@ -326,7 +326,6 @@ class AggregationBuilder(BaseBuilder): "company", "company_public_response", "company_response", - "matched_company", "consumer_consent_provided", "consumer_disputed", "has_narrative", @@ -341,7 +340,6 @@ class AggregationBuilder(BaseBuilder): _AGG_SIZE_MAP = { "company": AGG_COMPANY_DEFAULT, # 6500 - "matched_company": AGG_MATCHED_COMPANY_DEFAULT, # 50 "state": AGG_STATE_DEFAULT, # 100 "zip_code": AGG_ZIPCODE_DEFAULT, # 26000 "issue": AGG_ISSUE_DEFAULT, # 200 From f38224f54307a3dfe37493da8e2f6883215030be Mon Sep 17 00:00:00 2001 From: Richard Dinh Date: Tue, 24 Jun 2025 14:39:59 -0700 Subject: [PATCH 6/8] restore to 26000 companies --- complaint_search/defaults.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/complaint_search/defaults.py b/complaint_search/defaults.py index 4fa90590..4106c69e 100644 --- a/complaint_search/defaults.py +++ b/complaint_search/defaults.py @@ -6,7 +6,7 @@ # Defaults were adjusted in 2021 to be higher than object counts at the time. # Counts for ZIP Codes and states are unlikely to change much, but others may. # Companies, products, and issues could grow over time and need adjustment. -AGG_COMPANY_DEFAULT = 50 +AGG_COMPANY_DEFAULT = 26000 AGG_ZIPCODE_DEFAULT = 26000 AGG_STATE_DEFAULT = 100 AGG_STATE_PRODUCT_DEFAULT = 5 From 60785a5c7b73e914af71b988b0e591a4bc1541df Mon Sep 17 00:00:00 2001 From: Richard Dinh Date: Tue, 24 Jun 2025 14:43:38 -0700 Subject: [PATCH 7/8] revert url for other environments --- ccdb5_api/urls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccdb5_api/urls.py b/ccdb5_api/urls.py index e569eb55..ae418d32 100644 --- a/ccdb5_api/urls.py +++ b/ccdb5_api/urls.py @@ -4,5 +4,5 @@ urlpatterns = [ re_path(r"^admin/", admin.site.urls), - re_path(r"data-research/consumer-complaints/search/api/v1/", include("complaint_search.urls")), + re_path(r"^", include("complaint_search.urls")), ] From 26f15a24f4574483175cf36bb256d55183b0f2ac Mon Sep 17 00:00:00 2001 From: Richard Dinh Date: Wed, 25 Jun 2025 08:19:06 -0700 Subject: [PATCH 8/8] revert defaults.py company count to 6500 max --- complaint_search/defaults.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/complaint_search/defaults.py b/complaint_search/defaults.py index 4106c69e..adf12385 100644 --- a/complaint_search/defaults.py +++ b/complaint_search/defaults.py @@ -6,7 +6,7 @@ # Defaults were adjusted in 2021 to be higher than object counts at the time. # Counts for ZIP Codes and states are unlikely to change much, but others may. # Companies, products, and issues could grow over time and need adjustment. -AGG_COMPANY_DEFAULT = 26000 +AGG_COMPANY_DEFAULT = 6500 AGG_ZIPCODE_DEFAULT = 26000 AGG_STATE_DEFAULT = 100 AGG_STATE_PRODUCT_DEFAULT = 5