From 463bb124ecfbcc6ef450bbd2f43bfe167a7509bb Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 17 Mar 2026 18:05:22 +0000 Subject: [PATCH 1/3] feat(helm): Cogstack CE - auto provision sample data --- .../provisioning/document_bulk.ndjson | 120 ++++++++++++++++++ .../provisioning/opensearch-provisioning.sh | 32 ++--- 2 files changed, 128 insertions(+), 24 deletions(-) create mode 100644 helm-charts/cogstack-helm-ce/provisioning/document_bulk.ndjson diff --git a/helm-charts/cogstack-helm-ce/provisioning/document_bulk.ndjson b/helm-charts/cogstack-helm-ce/provisioning/document_bulk.ndjson new file mode 100644 index 0000000..329b401 --- /dev/null +++ b/helm-charts/cogstack-helm-ce/provisioning/document_bulk.ndjson @@ -0,0 +1,120 @@ +{"index":{"_index":"admissions","_id":"1"}} +{"subject_id":10000032,"hadm_id":22595853,"admittime":"2180-05-06 22:23:00","dischtime":"2180-05-07 17:15:00","admission_type":"URGENT","admit_provider_id":"P49AFC","admission_location":"TRANSFER FROM HOSPITAL","discharge_location":"HOME","insurance":"Medicaid","language":"English","marital_status":"WIDOWED","race":"WHITE","edregtime":"2180-05-06 19:17:00","edouttime":"2180-05-06 23:30:00","hospital_expire_flag":0} +{"index":{"_index":"admissions","_id":"2"}} +{"subject_id":10000032,"hadm_id":22841357,"admittime":"2180-06-26 18:27:00","dischtime":"2180-06-27 18:49:00","admission_type":"EW EMER.","admit_provider_id":"P784FA","admission_location":"EMERGENCY ROOM","discharge_location":"HOME","insurance":"Medicaid","language":"English","marital_status":"WIDOWED","race":"WHITE","edregtime":"2180-06-26 15:54:00","edouttime":"2180-06-26 21:31:00","hospital_expire_flag":0} +{"index":{"_index":"admissions","_id":"3"}} +{"subject_id":10001843,"hadm_id":26133978,"admittime":"2134-12-05 00:10:00","dischtime":"2134-12-06 12:54:00","deathtime":"2134-12-06 12:54:00","admission_type":"URGENT","admit_provider_id":"P67ATB","admission_location":"TRANSFER FROM HOSPITAL","discharge_location":"DIED","insurance":"Medicare","language":"English","marital_status":"SINGLE","race":"WHITE","hospital_expire_flag":1} +{"index":{"_index":"admissions","_id":"4"}} +{"subject_id":10001884,"hadm_id":26184834,"admittime":"2131-01-07 20:39:00","dischtime":"2131-01-20 05:15:00","deathtime":"2131-01-20 05:15:00","admission_type":"OBSERVATION ADMIT","admit_provider_id":"P49AFC","admission_location":"EMERGENCY ROOM","discharge_location":"DIED","insurance":"Medicare","language":"English","marital_status":"MARRIED","race":"BLACK/AFRICAN AMERICAN","edregtime":"2131-01-07 13:36:00","edouttime":"2131-01-07 22:13:00","hospital_expire_flag":1} +{"index":{"_index":"admissions","_id":"5"}} +{"subject_id":10002155,"hadm_id":20345487,"admittime":"2131-03-09 20:33:00","dischtime":"2131-03-10 01:55:00","deathtime":"2131-03-10 21:53:00","admission_type":"EW EMER.","admit_provider_id":"P579JR","admission_location":"EMERGENCY ROOM","discharge_location":"DIED","insurance":"Medicare","language":"English","marital_status":"MARRIED","race":"WHITE","edregtime":"2131-03-09 19:14:00","edouttime":"2131-03-09 21:33:00","hospital_expire_flag":1} +{"index":{"_index":"admissions","_id":"6"}} +{"subject_id":10003400,"hadm_id":23559586,"admittime":"2137-08-04 00:07:00","dischtime":"2137-09-02 17:05:00","deathtime":"2137-09-02 17:05:00","admission_type":"URGENT","admit_provider_id":"P32CSX","admission_location":"TRANSFER FROM HOSPITAL","discharge_location":"DIED","insurance":"Medicare","language":"English","marital_status":"MARRIED","race":"BLACK/AFRICAN AMERICAN","hospital_expire_flag":1} +{"index":{"_index":"admissions","_id":"7"}} +{"subject_id":10003637,"hadm_id":28317408,"admittime":"2150-05-14 19:51:00","dischtime":"2150-05-22 16:25:00","deathtime":"2150-05-22 16:25:00","admission_type":"EW EMER.","admit_provider_id":"P46834","admission_location":"WALK-IN/SELF REFERRAL","discharge_location":"DIED","insurance":"Medicare","language":"English","marital_status":"DIVORCED","race":"PORTUGUESE","edregtime":"2150-05-14 18:07:00","edouttime":"2150-05-14 21:59:00","hospital_expire_flag":1} +{"index":{"_index":"admissions","_id":"8"}} +{"subject_id":10004401,"hadm_id":25777141,"admittime":"2144-06-05 19:45:00","dischtime":"2144-06-18 21:30:00","deathtime":"2144-06-18 21:30:00","admission_type":"EW EMER.","admit_provider_id":"P43MIV","admission_location":"EMERGENCY ROOM","discharge_location":"DIED","insurance":"Medicare","language":"English","marital_status":"MARRIED","race":"WHITE","edregtime":"2144-06-05 16:08:00","edouttime":"2144-06-05 20:36:00","hospital_expire_flag":1} +{"index":{"_index":"admissions","_id":"9"}} +{"subject_id":10004720,"hadm_id":22081550,"admittime":"2186-11-12 18:01:00","dischtime":"2186-11-17 18:30:00","deathtime":"2186-11-17 18:30:00","admission_type":"EW EMER.","admit_provider_id":"P595QV","admission_location":"INFORMATION NOT AVAILABLE","discharge_location":"DIED","insurance":"Medicare","language":"English","marital_status":"SINGLE","race":"WHITE","edregtime":"2186-11-12 16:09:00","edouttime":"2186-11-12 19:55:00","hospital_expire_flag":1} +{"index":{"_index":"admissions","_id":"10"}} +{"subject_id":10005024,"hadm_id":25023471,"admittime":"2138-03-29 01:17:00","dischtime":"2138-04-19 11:30:00","deathtime":"2138-04-19 11:30:00","admission_type":"EW EMER.","admit_provider_id":"P94MJY","admission_location":"EMERGENCY ROOM","discharge_location":"DIED","insurance":"Medicaid","language":"English","marital_status":"SINGLE","race":"UNKNOWN","edregtime":"2138-03-28 23:30:00","edouttime":"2138-03-29 03:08:00","hospital_expire_flag":1} +{"index":{"_index":"drgcodes","_id":"1"}} +{"subject_id":10023075,"hadm_id":27859968,"drg_type":"APR","drg_code":304,"description":"DORSAL AND LUMBAR FUSION PROCEDURE EXCEPT FOR CURVATURE OF BACK","drg_severity":1,"drg_mortality":2} +{"index":{"_index":"drgcodes","_id":"2"}} +{"subject_id":10023075,"hadm_id":27859968,"drg_type":"HCFA","drg_code":460,"description":"SPINAL FUSION EXCEPT CERVICAL WITHOUT MCC"} +{"index":{"_index":"drgcodes","_id":"3"}} +{"subject_id":10023075,"hadm_id":29919442,"drg_type":"APR","drg_code":320,"description":"OTHER MUSCULOSKELETAL SYSTEM AND CONNECTIVE TISSUE PROCEDURES","drg_severity":4,"drg_mortality":4} +{"index":{"_index":"drgcodes","_id":"4"}} +{"subject_id":10023075,"hadm_id":29919442,"drg_type":"HCFA","drg_code":515,"description":"OTHER MUSCULOSKELETAL SYSTEM AND CONNECTIVE TISSUE O.R. PROCEDURES WITH"} +{"index":{"_index":"drgcodes","_id":"5"}} +{"subject_id":10023117,"hadm_id":21133938,"drg_type":"APR","drg_code":196,"description":"CARDIAC ARREST AND SHOCK","drg_severity":4,"drg_mortality":4} +{"index":{"_index":"drgcodes","_id":"6"}} +{"subject_id":10023117,"hadm_id":21133938,"drg_type":"HCFA","drg_code":291,"description":"HEART FAILURE & SHOCK W MCC"} +{"index":{"_index":"drgcodes","_id":"7"}} +{"subject_id":10023117,"hadm_id":21607814,"drg_type":"APR","drg_code":194,"description":"HEART FAILURE","drg_severity":4,"drg_mortality":3} +{"index":{"_index":"drgcodes","_id":"8"}} +{"subject_id":10023117,"hadm_id":21607814,"drg_type":"HCFA","drg_code":292,"description":"HEART FAILURE & SHOCK W CC"} +{"index":{"_index":"drgcodes","_id":"9"}} +{"subject_id":10023117,"hadm_id":24244087,"drg_type":"HCFA","drg_code":682,"description":"RENAL FAILURE W MCC"} +{"index":{"_index":"drgcodes","_id":"10"}} +{"subject_id":10023117,"hadm_id":28872262,"drg_type":"APR","drg_code":177,"description":"CARDIAC PACEMAKER AND DEFIBRILLATOR REVISION EXCEPT DEVICE REPLACEMENT","drg_severity":3,"drg_mortality":3} +{"index":{"_index":"emar","_id":"1"}} +{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-421","emar_seq":421,"poe_id":"10011352-501","pharmacy_id":16237767,"enter_provider_id":"P62G22","charttime":"2133-03-17 08:25:00","medication":"Furosemide","event_txt":"Administered","scheduletime":"2133-03-17 08:26:00","storetime":"2133-03-17 08:32:00"} +{"index":{"_index":"emar","_id":"2"}} +{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-422","emar_seq":422,"poe_id":"10011352-369","pharmacy_id":36268547,"enter_provider_id":"P62G22","charttime":"2133-03-17 08:32:00","medication":"Sodium Chloride 0.9% Flush","event_txt":"Flushed","scheduletime":"2133-03-17 08:32:00","storetime":"2133-03-17 08:32:00"} +{"index":{"_index":"emar","_id":"3"}} +{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-423","emar_seq":423,"poe_id":"10011352-357","pharmacy_id":55273397,"enter_provider_id":"P62G22","charttime":"2133-03-17 15:49:00","medication":"Sodium Chloride 0.9% Flush","event_txt":"Flushed","scheduletime":"2133-03-17 15:49:00","storetime":"2133-03-17 15:50:00"} +{"index":{"_index":"emar","_id":"4"}} +{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-424","emar_seq":424,"poe_id":"10011352-366","pharmacy_id":82384346,"enter_provider_id":"P62G22","charttime":"2133-03-17 15:49:00","medication":"Sodium Chloride 0.9% Flush","event_txt":"Flushed","scheduletime":"2133-03-17 15:49:00","storetime":"2133-03-17 15:50:00"} +{"index":{"_index":"emar","_id":"5"}} +{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-425","emar_seq":425,"poe_id":"10011352-382","pharmacy_id":42180932,"enter_provider_id":"P62G22","charttime":"2133-03-17 15:49:00","medication":"Sodium Chloride 0.9% Flush","event_txt":"Flushed","scheduletime":"2133-03-17 15:49:00","storetime":"2133-03-17 15:50:00"} +{"index":{"_index":"emar","_id":"6"}} +{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-426","emar_seq":426,"poe_id":"10011352-365","enter_provider_id":"P62G22","charttime":"2133-03-17 15:54:00","medication":"Influenza Vaccine Quadrivalent","event_txt":"Not Given","scheduletime":"2133-03-17 15:54:00","storetime":"2133-03-17 15:54:00"} +{"index":{"_index":"emar","_id":"7"}} +{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-43","emar_seq":43,"poe_id":"10011352-43","pharmacy_id":54731632,"enter_provider_id":"P78BJU","charttime":"2133-03-01 08:35:00","medication":"FoLIC Acid","event_txt":"Administered","scheduletime":"2133-03-01 08:00:00","storetime":"2133-03-01 08:37:00"} +{"index":{"_index":"emar","_id":"8"}} +{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-44","emar_seq":44,"poe_id":"10011352-86","pharmacy_id":78272665,"enter_provider_id":"P78BJU","charttime":"2133-03-01 08:35:00","medication":"Midodrine","event_txt":"Administered","scheduletime":"2133-03-01 08:00:00","storetime":"2133-03-01 08:37:00"} +{"index":{"_index":"emar","_id":"9"}} +{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-45","emar_seq":45,"poe_id":"10011352-46","pharmacy_id":84497985,"enter_provider_id":"P78BJU","charttime":"2133-03-01 08:35:00","medication":"Multivitamins","event_txt":"Administered","scheduletime":"2133-03-01 08:00:00","storetime":"2133-03-01 08:37:00"} +{"index":{"_index":"emar","_id":"10"}} +{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-46","emar_seq":46,"poe_id":"10011352-48","pharmacy_id":47739534,"enter_provider_id":"P78BJU","charttime":"2133-03-01 08:35:00","medication":"Pantoprazole","event_txt":"Administered","scheduletime":"2133-03-01 08:00:00","storetime":"2133-03-01 08:37:00"} +{"index":{"_index":"icustays","_id":"1"}} +{"subject_id":10000032,"hadm_id":29079034,"stay_id":39553978,"first_careunit":"Medical Intensive Care Unit (MICU)","last_careunit":"Medical Intensive Care Unit (MICU)","intime":"2180-07-23 14:00:00","outtime":"2180-07-23 23:50:47","los":0.4102662} +{"index":{"_index":"icustays","_id":"2"}} +{"subject_id":10000690,"hadm_id":25860671,"stay_id":37081114,"first_careunit":"Medical Intensive Care Unit (MICU)","last_careunit":"Medical Intensive Care Unit (MICU)","intime":"2150-11-02 19:37:00","outtime":"2150-11-06 17:03:17","los":3.8932524} +{"index":{"_index":"icustays","_id":"3"}} +{"subject_id":10000980,"hadm_id":26913865,"stay_id":39765666,"first_careunit":"Medical Intensive Care Unit (MICU)","last_careunit":"Medical Intensive Care Unit (MICU)","intime":"2189-06-27 08:42:00","outtime":"2189-06-27 20:38:27","los":0.49753472} +{"index":{"_index":"icustays","_id":"4"}} +{"subject_id":10001217,"hadm_id":24597018,"stay_id":37067082,"first_careunit":"Surgical Intensive Care Unit (SICU)","last_careunit":"Surgical Intensive Care Unit (SICU)","intime":"2157-11-20 19:18:02","outtime":"2157-11-21 22:08:00","los":1.1180325} +{"index":{"_index":"icustays","_id":"5"}} +{"subject_id":10001217,"hadm_id":27703517,"stay_id":34592300,"first_careunit":"Surgical Intensive Care Unit (SICU)","last_careunit":"Surgical Intensive Care Unit (SICU)","intime":"2157-12-19 15:42:24","outtime":"2157-12-20 14:27:41","los":0.94811344} +{"index":{"_index":"icustays","_id":"6"}} +{"subject_id":10001725,"hadm_id":25563031,"stay_id":31205490,"first_careunit":"Medical/Surgical Intensive Care Unit (MICU/SICU)","last_careunit":"Medical/Surgical Intensive Care Unit (MICU/SICU)","intime":"2110-04-11 15:52:22","outtime":"2110-04-12 23:59:56","los":1.338588} +{"index":{"_index":"icustays","_id":"7"}} +{"subject_id":10001843,"hadm_id":26133978,"stay_id":39698942,"first_careunit":"Medical/Surgical Intensive Care Unit (MICU/SICU)","last_careunit":"Medical/Surgical Intensive Care Unit (MICU/SICU)","intime":"2134-12-05 18:50:03","outtime":"2134-12-06 14:38:26","los":0.8252662} +{"index":{"_index":"icustays","_id":"8"}} +{"subject_id":10001884,"hadm_id":26184834,"stay_id":37510196,"first_careunit":"Medical Intensive Care Unit (MICU)","last_careunit":"Medical Intensive Care Unit (MICU)","intime":"2131-01-11 04:20:05","outtime":"2131-01-20 08:27:30","los":9.171817} +{"index":{"_index":"icustays","_id":"9"}} +{"subject_id":10002013,"hadm_id":23581541,"stay_id":39060235,"first_careunit":"Cardiac Vascular Intensive Care Unit (CVICU)","last_careunit":"Cardiac Vascular Intensive Care Unit (CVICU)","intime":"2160-05-18 10:00:53","outtime":"2160-05-19 17:33:33","los":1.3143518} +{"index":{"_index":"icustays","_id":"10"}} +{"subject_id":10002114,"hadm_id":27793700,"stay_id":34672098,"first_careunit":"Coronary Care Unit (CCU)","last_careunit":"Coronary Care Unit (CCU)","intime":"2162-02-17 23:30:00","outtime":"2162-02-20 21:16:27","los":2.9072568} +{"index":{"_index":"patients","_id":"1"}} +{"subject_id":10000032,"gender":"F","anchor_age":52,"anchor_year":2180,"anchor_year_group":"2014 - 2016","dod":"2180-09-09"} +{"index":{"_index":"patients","_id":"2"}} +{"subject_id":10000048,"gender":"F","anchor_age":23,"anchor_year":2126,"anchor_year_group":"2008 - 2010"} +{"index":{"_index":"patients","_id":"3"}} +{"subject_id":10000058,"gender":"F","anchor_age":33,"anchor_year":2168,"anchor_year_group":"2020 - 2022"} +{"index":{"_index":"patients","_id":"4"}} +{"subject_id":10000068,"gender":"F","anchor_age":19,"anchor_year":2160,"anchor_year_group":"2008 - 2010"} +{"index":{"_index":"patients","_id":"5"}} +{"subject_id":10000084,"gender":"M","anchor_age":72,"anchor_year":2160,"anchor_year_group":"2017 - 2019","dod":"2161-02-13"} +{"index":{"_index":"patients","_id":"6"}} +{"subject_id":10000102,"gender":"F","anchor_age":27,"anchor_year":2136,"anchor_year_group":"2008 - 2010"} +{"index":{"_index":"patients","_id":"7"}} +{"subject_id":10000108,"gender":"M","anchor_age":25,"anchor_year":2163,"anchor_year_group":"2014 - 2016"} +{"index":{"_index":"patients","_id":"8"}} +{"subject_id":10000115,"gender":"M","anchor_age":24,"anchor_year":2154,"anchor_year_group":"2017 - 2019"} +{"index":{"_index":"patients","_id":"9"}} +{"subject_id":10000117,"gender":"F","anchor_age":48,"anchor_year":2174,"anchor_year_group":"2008 - 2010"} +{"index":{"_index":"patients","_id":"10"}} +{"subject_id":10000161,"gender":"M","anchor_age":60,"anchor_year":2163,"anchor_year_group":"2020 - 2022"} +{"index":{"_index":"poe","_id":"1"}} +{"poe_id":"13055950-257","poe_seq":257,"subject_id":13055950,"hadm_id":21079497,"ordertime":"2145-07-04 11:23:28","order_type":"General Care","order_subtype":"Other","transaction_type":"New","order_provider_id":"P64302","order_status":"Inactive"} +{"index":{"_index":"poe","_id":"2"}} +{"poe_id":"13055950-258","poe_seq":258,"subject_id":13055950,"hadm_id":21079497,"ordertime":"2145-07-05 06:09:50","order_type":"General Care","order_subtype":"Tubes/Drains","transaction_type":"D/C","discontinue_of_poe_id":"13055950-246","order_provider_id":"P64302","order_status":"Inactive"} +{"index":{"_index":"poe","_id":"3"}} +{"poe_id":"13055950-259","poe_seq":259,"subject_id":13055950,"hadm_id":21079497,"ordertime":"2145-07-05 06:09:50","order_type":"IV therapy","order_subtype":"IV fluids","transaction_type":"D/C","discontinue_of_poe_id":"13055950-240","order_provider_id":"P64302","order_status":"Inactive"} +{"index":{"_index":"poe","_id":"4"}} +{"poe_id":"13055950-268","poe_seq":268,"subject_id":13055950,"hadm_id":21079497,"ordertime":"2145-07-05 11:22:31","order_type":"Medications","transaction_type":"New","discontinued_by_poe_id":"13055950-269","order_provider_id":"P64302","order_status":"Inactive"} +{"index":{"_index":"poe","_id":"5"}} +{"poe_id":"13055950-308","poe_seq":308,"subject_id":13055950,"hadm_id":21079497,"ordertime":"2145-07-09 06:54:02","order_type":"Radiology","order_subtype":"General Xray","transaction_type":"New","order_provider_id":"P64302","order_status":"Inactive"} +{"index":{"_index":"poe","_id":"6"}} +{"poe_id":"13055950-360","poe_seq":360,"subject_id":13055950,"hadm_id":28507903,"ordertime":"2149-09-27 03:32:00","order_type":"Medications","transaction_type":"New","order_provider_id":"P758TM","order_status":"Inactive"} +{"index":{"_index":"poe","_id":"7"}} +{"poe_id":"13055950-372","poe_seq":372,"subject_id":13055950,"hadm_id":28507903,"ordertime":"2149-09-27 10:06:59","order_type":"Consults","order_subtype":"Physical Therapy","transaction_type":"New","order_provider_id":"P97CU2","order_status":"Inactive"} +{"index":{"_index":"poe","_id":"8"}} +{"poe_id":"13055950-387","poe_seq":387,"subject_id":13055950,"hadm_id":28507903,"ordertime":"2149-09-28 10:14:15","order_type":"IV therapy","order_subtype":"IV fluids","transaction_type":"Change","discontinue_of_poe_id":"13055950-386","discontinued_by_poe_id":"13055950-388","order_provider_id":"P85LAY","order_status":"Inactive"} +{"index":{"_index":"poe","_id":"9"}} +{"poe_id":"13055950-388","poe_seq":388,"subject_id":13055950,"hadm_id":28507903,"ordertime":"2149-09-28 10:15:49","order_type":"IV therapy","order_subtype":"IV fluids","transaction_type":"D/C","discontinue_of_poe_id":"13055950-387","order_provider_id":"P85LAY","order_status":"Inactive"} +{"index":{"_index":"poe","_id":"10"}} +{"poe_id":"13055950-397","poe_seq":397,"subject_id":13055950,"hadm_id":28507903,"ordertime":"2149-09-28 13:40:53","order_type":"Medications","transaction_type":"New","order_provider_id":"P78AKX","order_status":"Inactive"} diff --git a/helm-charts/cogstack-helm-ce/provisioning/opensearch-provisioning.sh b/helm-charts/cogstack-helm-ce/provisioning/opensearch-provisioning.sh index 94c566a..8333e51 100644 --- a/helm-charts/cogstack-helm-ce/provisioning/opensearch-provisioning.sh +++ b/helm-charts/cogstack-helm-ce/provisioning/opensearch-provisioning.sh @@ -11,12 +11,12 @@ log() { : "${OPENSEARCH_USERNAME:?OPENSEARCH_USERNAME is required. }" : "${OPENSEARCH_PASSWORD:?OPENSEARCH_PASSWORD is required. }" + : "${PROVISION_OPENSEARCH_INDEX_TEMPLATE_ENABLED:?PROVISION_OPENSEARCH_INDEX_TEMPLATE_ENABLED is required. }" + : "${PROVISION_OPENSEARCH_EXAMPLE_DOCUMENTS_ENABLED:?PROVISION_OPENSEARCH_EXAMPLE_DOCUMENTS_ENABLED is required. }" + : "${PROVISION_OPENSEARCH_DASHBOARDS_ENABLED:?PROVISION_OPENSEARCH_DASHBOARDS_ENABLED is required. }" + : "${CONFIG_DIR:?CONFIG_DIR is required. }" : "${CURL_BODY_FILE:=/tmp/curl_body.$$}" -# OPENSEARCH_URL=https://localhost:9200 -# OPENSEARCH_DASHBOARD_URL=http://localhost:5601 -# OPENSEARCH_USERNAME=admin -# OPENSEARCH_PASSWORD=opensearch-312$A wait_for_service() { local service_name="$1" @@ -73,31 +73,15 @@ fi if [ "$PROVISION_OPENSEARCH_EXAMPLE_DOCUMENTS_ENABLED" = "true" ]; then wait_for_service "OpenSearch" "$OPENSEARCH_URL" "-u $OPENSEARCH_AUTH" || exit 1 - log "Creating example admissions document - POST $OPENSEARCH_URL/admissions/_doc" + log "Creating example admissions document (bulk) - POST $OPENSEARCH_URL/_bulk" os_status="$(curl -sS \ -o "$CURL_BODY_FILE" \ -w "%{http_code}" \ - -X POST "$OPENSEARCH_URL/admissions/_doc" \ - -H "Content-Type: application/json" \ + -X POST "$OPENSEARCH_URL/_bulk" \ + -H "Content-Type: application/x-ndjson" \ -u "$OPENSEARCH_AUTH" \ -k \ - -d '{ - "subject_id": 10000032, - "hadm_id": 22595853, - "admittime": "2180-05-06 22:23:00", - "dischtime": "2180-05-07 17:15:00", - "admission_type": "URGENT", - "admit_provider_id": "P49AFC", - "admission_location": "TRANSFER FROM HOSPITAL", - "discharge_location": "HOME", - "insurance": "Medicaid", - "language": "English", - "marital_status": "WIDOWED", - "race": "WHITE", - "edregtime": "2180-05-06 19:17:00", - "edouttime": "2180-05-06 23:30:00", - "hospital_expire_flag": 0 -}')" + --data-binary @"${CONFIG_DIR}/document_bulk.ndjson")" if [ "$os_status" != "200" ] && [ "$os_status" != "201" ]; then log "Failed to create example admissions document (http_status=$os_status)" if [ -s "$CURL_BODY_FILE" ]; then From 7e1bffa7568063df3bc91a2702d0c3e5d9197bc8 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Wed, 18 Mar 2026 11:54:13 +0000 Subject: [PATCH 2/3] feat(helm): Generate synthetic data for dashboard demo usage --- .../provisioning/document_bulk.ndjson | 120 --- .../generate_synthetic_bulk_ndjson.py | 744 ++++++++++++++++++ .../provisioning/opensearch-provisioning.sh | 20 +- .../opensearch-provisioning-post-install.yaml | 2 +- 4 files changed, 762 insertions(+), 124 deletions(-) delete mode 100644 helm-charts/cogstack-helm-ce/provisioning/document_bulk.ndjson create mode 100644 helm-charts/cogstack-helm-ce/provisioning/generate_synthetic_bulk_ndjson.py diff --git a/helm-charts/cogstack-helm-ce/provisioning/document_bulk.ndjson b/helm-charts/cogstack-helm-ce/provisioning/document_bulk.ndjson deleted file mode 100644 index 329b401..0000000 --- a/helm-charts/cogstack-helm-ce/provisioning/document_bulk.ndjson +++ /dev/null @@ -1,120 +0,0 @@ -{"index":{"_index":"admissions","_id":"1"}} -{"subject_id":10000032,"hadm_id":22595853,"admittime":"2180-05-06 22:23:00","dischtime":"2180-05-07 17:15:00","admission_type":"URGENT","admit_provider_id":"P49AFC","admission_location":"TRANSFER FROM HOSPITAL","discharge_location":"HOME","insurance":"Medicaid","language":"English","marital_status":"WIDOWED","race":"WHITE","edregtime":"2180-05-06 19:17:00","edouttime":"2180-05-06 23:30:00","hospital_expire_flag":0} -{"index":{"_index":"admissions","_id":"2"}} -{"subject_id":10000032,"hadm_id":22841357,"admittime":"2180-06-26 18:27:00","dischtime":"2180-06-27 18:49:00","admission_type":"EW EMER.","admit_provider_id":"P784FA","admission_location":"EMERGENCY ROOM","discharge_location":"HOME","insurance":"Medicaid","language":"English","marital_status":"WIDOWED","race":"WHITE","edregtime":"2180-06-26 15:54:00","edouttime":"2180-06-26 21:31:00","hospital_expire_flag":0} -{"index":{"_index":"admissions","_id":"3"}} -{"subject_id":10001843,"hadm_id":26133978,"admittime":"2134-12-05 00:10:00","dischtime":"2134-12-06 12:54:00","deathtime":"2134-12-06 12:54:00","admission_type":"URGENT","admit_provider_id":"P67ATB","admission_location":"TRANSFER FROM HOSPITAL","discharge_location":"DIED","insurance":"Medicare","language":"English","marital_status":"SINGLE","race":"WHITE","hospital_expire_flag":1} -{"index":{"_index":"admissions","_id":"4"}} -{"subject_id":10001884,"hadm_id":26184834,"admittime":"2131-01-07 20:39:00","dischtime":"2131-01-20 05:15:00","deathtime":"2131-01-20 05:15:00","admission_type":"OBSERVATION ADMIT","admit_provider_id":"P49AFC","admission_location":"EMERGENCY ROOM","discharge_location":"DIED","insurance":"Medicare","language":"English","marital_status":"MARRIED","race":"BLACK/AFRICAN AMERICAN","edregtime":"2131-01-07 13:36:00","edouttime":"2131-01-07 22:13:00","hospital_expire_flag":1} -{"index":{"_index":"admissions","_id":"5"}} -{"subject_id":10002155,"hadm_id":20345487,"admittime":"2131-03-09 20:33:00","dischtime":"2131-03-10 01:55:00","deathtime":"2131-03-10 21:53:00","admission_type":"EW EMER.","admit_provider_id":"P579JR","admission_location":"EMERGENCY ROOM","discharge_location":"DIED","insurance":"Medicare","language":"English","marital_status":"MARRIED","race":"WHITE","edregtime":"2131-03-09 19:14:00","edouttime":"2131-03-09 21:33:00","hospital_expire_flag":1} -{"index":{"_index":"admissions","_id":"6"}} -{"subject_id":10003400,"hadm_id":23559586,"admittime":"2137-08-04 00:07:00","dischtime":"2137-09-02 17:05:00","deathtime":"2137-09-02 17:05:00","admission_type":"URGENT","admit_provider_id":"P32CSX","admission_location":"TRANSFER FROM HOSPITAL","discharge_location":"DIED","insurance":"Medicare","language":"English","marital_status":"MARRIED","race":"BLACK/AFRICAN AMERICAN","hospital_expire_flag":1} -{"index":{"_index":"admissions","_id":"7"}} -{"subject_id":10003637,"hadm_id":28317408,"admittime":"2150-05-14 19:51:00","dischtime":"2150-05-22 16:25:00","deathtime":"2150-05-22 16:25:00","admission_type":"EW EMER.","admit_provider_id":"P46834","admission_location":"WALK-IN/SELF REFERRAL","discharge_location":"DIED","insurance":"Medicare","language":"English","marital_status":"DIVORCED","race":"PORTUGUESE","edregtime":"2150-05-14 18:07:00","edouttime":"2150-05-14 21:59:00","hospital_expire_flag":1} -{"index":{"_index":"admissions","_id":"8"}} -{"subject_id":10004401,"hadm_id":25777141,"admittime":"2144-06-05 19:45:00","dischtime":"2144-06-18 21:30:00","deathtime":"2144-06-18 21:30:00","admission_type":"EW EMER.","admit_provider_id":"P43MIV","admission_location":"EMERGENCY ROOM","discharge_location":"DIED","insurance":"Medicare","language":"English","marital_status":"MARRIED","race":"WHITE","edregtime":"2144-06-05 16:08:00","edouttime":"2144-06-05 20:36:00","hospital_expire_flag":1} -{"index":{"_index":"admissions","_id":"9"}} -{"subject_id":10004720,"hadm_id":22081550,"admittime":"2186-11-12 18:01:00","dischtime":"2186-11-17 18:30:00","deathtime":"2186-11-17 18:30:00","admission_type":"EW EMER.","admit_provider_id":"P595QV","admission_location":"INFORMATION NOT AVAILABLE","discharge_location":"DIED","insurance":"Medicare","language":"English","marital_status":"SINGLE","race":"WHITE","edregtime":"2186-11-12 16:09:00","edouttime":"2186-11-12 19:55:00","hospital_expire_flag":1} -{"index":{"_index":"admissions","_id":"10"}} -{"subject_id":10005024,"hadm_id":25023471,"admittime":"2138-03-29 01:17:00","dischtime":"2138-04-19 11:30:00","deathtime":"2138-04-19 11:30:00","admission_type":"EW EMER.","admit_provider_id":"P94MJY","admission_location":"EMERGENCY ROOM","discharge_location":"DIED","insurance":"Medicaid","language":"English","marital_status":"SINGLE","race":"UNKNOWN","edregtime":"2138-03-28 23:30:00","edouttime":"2138-03-29 03:08:00","hospital_expire_flag":1} -{"index":{"_index":"drgcodes","_id":"1"}} -{"subject_id":10023075,"hadm_id":27859968,"drg_type":"APR","drg_code":304,"description":"DORSAL AND LUMBAR FUSION PROCEDURE EXCEPT FOR CURVATURE OF BACK","drg_severity":1,"drg_mortality":2} -{"index":{"_index":"drgcodes","_id":"2"}} -{"subject_id":10023075,"hadm_id":27859968,"drg_type":"HCFA","drg_code":460,"description":"SPINAL FUSION EXCEPT CERVICAL WITHOUT MCC"} -{"index":{"_index":"drgcodes","_id":"3"}} -{"subject_id":10023075,"hadm_id":29919442,"drg_type":"APR","drg_code":320,"description":"OTHER MUSCULOSKELETAL SYSTEM AND CONNECTIVE TISSUE PROCEDURES","drg_severity":4,"drg_mortality":4} -{"index":{"_index":"drgcodes","_id":"4"}} -{"subject_id":10023075,"hadm_id":29919442,"drg_type":"HCFA","drg_code":515,"description":"OTHER MUSCULOSKELETAL SYSTEM AND CONNECTIVE TISSUE O.R. PROCEDURES WITH"} -{"index":{"_index":"drgcodes","_id":"5"}} -{"subject_id":10023117,"hadm_id":21133938,"drg_type":"APR","drg_code":196,"description":"CARDIAC ARREST AND SHOCK","drg_severity":4,"drg_mortality":4} -{"index":{"_index":"drgcodes","_id":"6"}} -{"subject_id":10023117,"hadm_id":21133938,"drg_type":"HCFA","drg_code":291,"description":"HEART FAILURE & SHOCK W MCC"} -{"index":{"_index":"drgcodes","_id":"7"}} -{"subject_id":10023117,"hadm_id":21607814,"drg_type":"APR","drg_code":194,"description":"HEART FAILURE","drg_severity":4,"drg_mortality":3} -{"index":{"_index":"drgcodes","_id":"8"}} -{"subject_id":10023117,"hadm_id":21607814,"drg_type":"HCFA","drg_code":292,"description":"HEART FAILURE & SHOCK W CC"} -{"index":{"_index":"drgcodes","_id":"9"}} -{"subject_id":10023117,"hadm_id":24244087,"drg_type":"HCFA","drg_code":682,"description":"RENAL FAILURE W MCC"} -{"index":{"_index":"drgcodes","_id":"10"}} -{"subject_id":10023117,"hadm_id":28872262,"drg_type":"APR","drg_code":177,"description":"CARDIAC PACEMAKER AND DEFIBRILLATOR REVISION EXCEPT DEVICE REPLACEMENT","drg_severity":3,"drg_mortality":3} -{"index":{"_index":"emar","_id":"1"}} -{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-421","emar_seq":421,"poe_id":"10011352-501","pharmacy_id":16237767,"enter_provider_id":"P62G22","charttime":"2133-03-17 08:25:00","medication":"Furosemide","event_txt":"Administered","scheduletime":"2133-03-17 08:26:00","storetime":"2133-03-17 08:32:00"} -{"index":{"_index":"emar","_id":"2"}} -{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-422","emar_seq":422,"poe_id":"10011352-369","pharmacy_id":36268547,"enter_provider_id":"P62G22","charttime":"2133-03-17 08:32:00","medication":"Sodium Chloride 0.9% Flush","event_txt":"Flushed","scheduletime":"2133-03-17 08:32:00","storetime":"2133-03-17 08:32:00"} -{"index":{"_index":"emar","_id":"3"}} -{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-423","emar_seq":423,"poe_id":"10011352-357","pharmacy_id":55273397,"enter_provider_id":"P62G22","charttime":"2133-03-17 15:49:00","medication":"Sodium Chloride 0.9% Flush","event_txt":"Flushed","scheduletime":"2133-03-17 15:49:00","storetime":"2133-03-17 15:50:00"} -{"index":{"_index":"emar","_id":"4"}} -{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-424","emar_seq":424,"poe_id":"10011352-366","pharmacy_id":82384346,"enter_provider_id":"P62G22","charttime":"2133-03-17 15:49:00","medication":"Sodium Chloride 0.9% Flush","event_txt":"Flushed","scheduletime":"2133-03-17 15:49:00","storetime":"2133-03-17 15:50:00"} -{"index":{"_index":"emar","_id":"5"}} -{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-425","emar_seq":425,"poe_id":"10011352-382","pharmacy_id":42180932,"enter_provider_id":"P62G22","charttime":"2133-03-17 15:49:00","medication":"Sodium Chloride 0.9% Flush","event_txt":"Flushed","scheduletime":"2133-03-17 15:49:00","storetime":"2133-03-17 15:50:00"} -{"index":{"_index":"emar","_id":"6"}} -{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-426","emar_seq":426,"poe_id":"10011352-365","enter_provider_id":"P62G22","charttime":"2133-03-17 15:54:00","medication":"Influenza Vaccine Quadrivalent","event_txt":"Not Given","scheduletime":"2133-03-17 15:54:00","storetime":"2133-03-17 15:54:00"} -{"index":{"_index":"emar","_id":"7"}} -{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-43","emar_seq":43,"poe_id":"10011352-43","pharmacy_id":54731632,"enter_provider_id":"P78BJU","charttime":"2133-03-01 08:35:00","medication":"FoLIC Acid","event_txt":"Administered","scheduletime":"2133-03-01 08:00:00","storetime":"2133-03-01 08:37:00"} -{"index":{"_index":"emar","_id":"8"}} -{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-44","emar_seq":44,"poe_id":"10011352-86","pharmacy_id":78272665,"enter_provider_id":"P78BJU","charttime":"2133-03-01 08:35:00","medication":"Midodrine","event_txt":"Administered","scheduletime":"2133-03-01 08:00:00","storetime":"2133-03-01 08:37:00"} -{"index":{"_index":"emar","_id":"9"}} -{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-45","emar_seq":45,"poe_id":"10011352-46","pharmacy_id":84497985,"enter_provider_id":"P78BJU","charttime":"2133-03-01 08:35:00","medication":"Multivitamins","event_txt":"Administered","scheduletime":"2133-03-01 08:00:00","storetime":"2133-03-01 08:37:00"} -{"index":{"_index":"emar","_id":"10"}} -{"subject_id":10011352,"hadm_id":29927609,"emar_id":"10011352-46","emar_seq":46,"poe_id":"10011352-48","pharmacy_id":47739534,"enter_provider_id":"P78BJU","charttime":"2133-03-01 08:35:00","medication":"Pantoprazole","event_txt":"Administered","scheduletime":"2133-03-01 08:00:00","storetime":"2133-03-01 08:37:00"} -{"index":{"_index":"icustays","_id":"1"}} -{"subject_id":10000032,"hadm_id":29079034,"stay_id":39553978,"first_careunit":"Medical Intensive Care Unit (MICU)","last_careunit":"Medical Intensive Care Unit (MICU)","intime":"2180-07-23 14:00:00","outtime":"2180-07-23 23:50:47","los":0.4102662} -{"index":{"_index":"icustays","_id":"2"}} -{"subject_id":10000690,"hadm_id":25860671,"stay_id":37081114,"first_careunit":"Medical Intensive Care Unit (MICU)","last_careunit":"Medical Intensive Care Unit (MICU)","intime":"2150-11-02 19:37:00","outtime":"2150-11-06 17:03:17","los":3.8932524} -{"index":{"_index":"icustays","_id":"3"}} -{"subject_id":10000980,"hadm_id":26913865,"stay_id":39765666,"first_careunit":"Medical Intensive Care Unit (MICU)","last_careunit":"Medical Intensive Care Unit (MICU)","intime":"2189-06-27 08:42:00","outtime":"2189-06-27 20:38:27","los":0.49753472} -{"index":{"_index":"icustays","_id":"4"}} -{"subject_id":10001217,"hadm_id":24597018,"stay_id":37067082,"first_careunit":"Surgical Intensive Care Unit (SICU)","last_careunit":"Surgical Intensive Care Unit (SICU)","intime":"2157-11-20 19:18:02","outtime":"2157-11-21 22:08:00","los":1.1180325} -{"index":{"_index":"icustays","_id":"5"}} -{"subject_id":10001217,"hadm_id":27703517,"stay_id":34592300,"first_careunit":"Surgical Intensive Care Unit (SICU)","last_careunit":"Surgical Intensive Care Unit (SICU)","intime":"2157-12-19 15:42:24","outtime":"2157-12-20 14:27:41","los":0.94811344} -{"index":{"_index":"icustays","_id":"6"}} -{"subject_id":10001725,"hadm_id":25563031,"stay_id":31205490,"first_careunit":"Medical/Surgical Intensive Care Unit (MICU/SICU)","last_careunit":"Medical/Surgical Intensive Care Unit (MICU/SICU)","intime":"2110-04-11 15:52:22","outtime":"2110-04-12 23:59:56","los":1.338588} -{"index":{"_index":"icustays","_id":"7"}} -{"subject_id":10001843,"hadm_id":26133978,"stay_id":39698942,"first_careunit":"Medical/Surgical Intensive Care Unit (MICU/SICU)","last_careunit":"Medical/Surgical Intensive Care Unit (MICU/SICU)","intime":"2134-12-05 18:50:03","outtime":"2134-12-06 14:38:26","los":0.8252662} -{"index":{"_index":"icustays","_id":"8"}} -{"subject_id":10001884,"hadm_id":26184834,"stay_id":37510196,"first_careunit":"Medical Intensive Care Unit (MICU)","last_careunit":"Medical Intensive Care Unit (MICU)","intime":"2131-01-11 04:20:05","outtime":"2131-01-20 08:27:30","los":9.171817} -{"index":{"_index":"icustays","_id":"9"}} -{"subject_id":10002013,"hadm_id":23581541,"stay_id":39060235,"first_careunit":"Cardiac Vascular Intensive Care Unit (CVICU)","last_careunit":"Cardiac Vascular Intensive Care Unit (CVICU)","intime":"2160-05-18 10:00:53","outtime":"2160-05-19 17:33:33","los":1.3143518} -{"index":{"_index":"icustays","_id":"10"}} -{"subject_id":10002114,"hadm_id":27793700,"stay_id":34672098,"first_careunit":"Coronary Care Unit (CCU)","last_careunit":"Coronary Care Unit (CCU)","intime":"2162-02-17 23:30:00","outtime":"2162-02-20 21:16:27","los":2.9072568} -{"index":{"_index":"patients","_id":"1"}} -{"subject_id":10000032,"gender":"F","anchor_age":52,"anchor_year":2180,"anchor_year_group":"2014 - 2016","dod":"2180-09-09"} -{"index":{"_index":"patients","_id":"2"}} -{"subject_id":10000048,"gender":"F","anchor_age":23,"anchor_year":2126,"anchor_year_group":"2008 - 2010"} -{"index":{"_index":"patients","_id":"3"}} -{"subject_id":10000058,"gender":"F","anchor_age":33,"anchor_year":2168,"anchor_year_group":"2020 - 2022"} -{"index":{"_index":"patients","_id":"4"}} -{"subject_id":10000068,"gender":"F","anchor_age":19,"anchor_year":2160,"anchor_year_group":"2008 - 2010"} -{"index":{"_index":"patients","_id":"5"}} -{"subject_id":10000084,"gender":"M","anchor_age":72,"anchor_year":2160,"anchor_year_group":"2017 - 2019","dod":"2161-02-13"} -{"index":{"_index":"patients","_id":"6"}} -{"subject_id":10000102,"gender":"F","anchor_age":27,"anchor_year":2136,"anchor_year_group":"2008 - 2010"} -{"index":{"_index":"patients","_id":"7"}} -{"subject_id":10000108,"gender":"M","anchor_age":25,"anchor_year":2163,"anchor_year_group":"2014 - 2016"} -{"index":{"_index":"patients","_id":"8"}} -{"subject_id":10000115,"gender":"M","anchor_age":24,"anchor_year":2154,"anchor_year_group":"2017 - 2019"} -{"index":{"_index":"patients","_id":"9"}} -{"subject_id":10000117,"gender":"F","anchor_age":48,"anchor_year":2174,"anchor_year_group":"2008 - 2010"} -{"index":{"_index":"patients","_id":"10"}} -{"subject_id":10000161,"gender":"M","anchor_age":60,"anchor_year":2163,"anchor_year_group":"2020 - 2022"} -{"index":{"_index":"poe","_id":"1"}} -{"poe_id":"13055950-257","poe_seq":257,"subject_id":13055950,"hadm_id":21079497,"ordertime":"2145-07-04 11:23:28","order_type":"General Care","order_subtype":"Other","transaction_type":"New","order_provider_id":"P64302","order_status":"Inactive"} -{"index":{"_index":"poe","_id":"2"}} -{"poe_id":"13055950-258","poe_seq":258,"subject_id":13055950,"hadm_id":21079497,"ordertime":"2145-07-05 06:09:50","order_type":"General Care","order_subtype":"Tubes/Drains","transaction_type":"D/C","discontinue_of_poe_id":"13055950-246","order_provider_id":"P64302","order_status":"Inactive"} -{"index":{"_index":"poe","_id":"3"}} -{"poe_id":"13055950-259","poe_seq":259,"subject_id":13055950,"hadm_id":21079497,"ordertime":"2145-07-05 06:09:50","order_type":"IV therapy","order_subtype":"IV fluids","transaction_type":"D/C","discontinue_of_poe_id":"13055950-240","order_provider_id":"P64302","order_status":"Inactive"} -{"index":{"_index":"poe","_id":"4"}} -{"poe_id":"13055950-268","poe_seq":268,"subject_id":13055950,"hadm_id":21079497,"ordertime":"2145-07-05 11:22:31","order_type":"Medications","transaction_type":"New","discontinued_by_poe_id":"13055950-269","order_provider_id":"P64302","order_status":"Inactive"} -{"index":{"_index":"poe","_id":"5"}} -{"poe_id":"13055950-308","poe_seq":308,"subject_id":13055950,"hadm_id":21079497,"ordertime":"2145-07-09 06:54:02","order_type":"Radiology","order_subtype":"General Xray","transaction_type":"New","order_provider_id":"P64302","order_status":"Inactive"} -{"index":{"_index":"poe","_id":"6"}} -{"poe_id":"13055950-360","poe_seq":360,"subject_id":13055950,"hadm_id":28507903,"ordertime":"2149-09-27 03:32:00","order_type":"Medications","transaction_type":"New","order_provider_id":"P758TM","order_status":"Inactive"} -{"index":{"_index":"poe","_id":"7"}} -{"poe_id":"13055950-372","poe_seq":372,"subject_id":13055950,"hadm_id":28507903,"ordertime":"2149-09-27 10:06:59","order_type":"Consults","order_subtype":"Physical Therapy","transaction_type":"New","order_provider_id":"P97CU2","order_status":"Inactive"} -{"index":{"_index":"poe","_id":"8"}} -{"poe_id":"13055950-387","poe_seq":387,"subject_id":13055950,"hadm_id":28507903,"ordertime":"2149-09-28 10:14:15","order_type":"IV therapy","order_subtype":"IV fluids","transaction_type":"Change","discontinue_of_poe_id":"13055950-386","discontinued_by_poe_id":"13055950-388","order_provider_id":"P85LAY","order_status":"Inactive"} -{"index":{"_index":"poe","_id":"9"}} -{"poe_id":"13055950-388","poe_seq":388,"subject_id":13055950,"hadm_id":28507903,"ordertime":"2149-09-28 10:15:49","order_type":"IV therapy","order_subtype":"IV fluids","transaction_type":"D/C","discontinue_of_poe_id":"13055950-387","order_provider_id":"P85LAY","order_status":"Inactive"} -{"index":{"_index":"poe","_id":"10"}} -{"poe_id":"13055950-397","poe_seq":397,"subject_id":13055950,"hadm_id":28507903,"ordertime":"2149-09-28 13:40:53","order_type":"Medications","transaction_type":"New","order_provider_id":"P78AKX","order_status":"Inactive"} diff --git a/helm-charts/cogstack-helm-ce/provisioning/generate_synthetic_bulk_ndjson.py b/helm-charts/cogstack-helm-ce/provisioning/generate_synthetic_bulk_ndjson.py new file mode 100644 index 0000000..46c924b --- /dev/null +++ b/helm-charts/cogstack-helm-ce/provisioning/generate_synthetic_bulk_ndjson.py @@ -0,0 +1,744 @@ +#!/usr/bin/env python3 +""" +Generate synthetic OpenSearch/Elasticsearch bulk NDJSON for CogStack demos. + +This generator produces synthetic data shaped like a small subset of the +MIMIC-IV dataset schema (MIMIC-IV Clinical Database Demo v2.2): +https://physionet.org/content/mimic-iv-demo/2.2/ + +The *schemas/field shapes* are retained for realism, but the generated content +is synthetic: it does not include any MIMIC-IV data, and it does not embed +dataset-derived enumerations/value sets. + +This writes a single .ndjson file in the bulk API format: + {"index":{"_index":"","_id":""}} + {"field": "...", ...} + +It generates N documents for each of 6 indices: + admissions, drgcodes, emar, icustays, patients, poe + +No third-party dependencies (built-in Python only). +""" + +from __future__ import annotations + +import argparse +import json +import random +import sys +from dataclasses import dataclass +from datetime import datetime, timedelta +from pathlib import Path +from typing import Dict, Iterable, Iterator, List, Optional, Sequence, Tuple + + +INDEX_ORDER: Tuple[str, ...] = ( + "admissions", + "drgcodes", + "emar", + "icustays", + "patients", + "poe", +) + + +# Note on value lists: +# Keep these lists generic and non-derivative. They are intended as plausible *synthetic* +# categories, not as extracted value sets from any dataset. + +# Use OMOP-like visit concepts (high level) for admissions. +ADMISSION_TYPES: Tuple[str, ...] = ( + "Inpatient Visit", + "Emergency Room Visit", + "Outpatient Visit", + "Observation Visit", +) + +# Generic sources / admitting contexts (avoid hospital-specific phrasing). +ADMISSION_LOCATIONS: Tuple[str, ...] = ( + "Home", + "Clinic", + "Emergency Department", + "Another Facility", + "Unknown", +) + +DISCHARGE_LOCATIONS: Tuple[str, ...] = ( + "Home", + "Rehabilitation Facility", + "Long-term Care Facility", + "Died", +) + +# Generic payer categories (avoid copying any particular dataset’s value sets). +INSURANCE: Tuple[str, ...] = ( + "Public", + "Private", + "Self Pay", + "Other", +) + +LANGUAGES: Tuple[str, ...] = ( + "English", + "Spanish", + "Portuguese", + "French", +) + +MARITAL_STATUS: Tuple[str, ...] = ( + "SINGLE", + "MARRIED", + "DIVORCED", + "WIDOWED", +) + +RACE: Tuple[str, ...] = ( + "Race_A", + "Race_B", + "Race_C", + "Race_D", + "Unknown", +) + +CAREUNITS: Tuple[str, ...] = ( + "Intensive Care Unit", + "Surgical ICU", + "Medical ICU", + "Step-down Unit", + "Cardiac Care Unit", +) + +ORDER_TYPES: Tuple[str, ...] = ( + "General Care", + "IV therapy", + "Medications", + "Radiology", + "Consults", +) + +ORDER_SUBTYPES: Dict[str, Tuple[str, ...]] = { + "General Care": ("Other", "Tubes/Drains"), + "IV therapy": ("IV fluids",), + "Radiology": ("General Xray",), + "Consults": ("Physical Therapy",), +} + +TRANSACTION_TYPES: Tuple[str, ...] = ( + "New", + "D/C", + "Change", +) + +ORDER_STATUS: Tuple[str, ...] = ( + "Inactive", + "Active", +) + +EMAR_EVENT_TXT: Tuple[str, ...] = ( + "Administered", + "Flushed", + "Not Given", +) + +MEDICATIONS: Tuple[str, ...] = ( + "Furosemide", + "Sodium Chloride 0.9% Flush", + "Influenza Vaccine Quadrivalent", + "Folic Acid", + "Midodrine", + "Multivitamins", + "Pantoprazole", + "Insulin", + "Acetaminophen", + "Heparin", +) + +DRG_TYPES: Tuple[str, ...] = ( + "APR", + "HCFA", +) + +# Avoid long, dataset-specific DRG descriptions; keep neutral synthetic labels. +DRG_DESCRIPTIONS: Tuple[str, ...] = ( + "Cardiology (general)", + "Heart failure (general)", + "Renal care (general)", + "Orthopedics (general)", + "Neurology (general)", + "Respiratory care (general)", + "Gastroenterology (general)", + "Infectious disease (general)", + "General medicine (general)", +) + + +def fmt_dt(dt: datetime) -> str: + return dt.strftime("%Y-%m-%d %H:%M:%S") + + +def fmt_dt_with_seconds(dt: datetime) -> str: + # Matches icustays `outtime` examples that include seconds. + return dt.strftime("%Y-%m-%d %H:%M:%S") + + +def choose(rng: random.Random, items: Sequence[str]) -> str: + return items[rng.randrange(len(items))] + + +def rand_upper_alnum(rng: random.Random, length: int) -> str: + alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + return "".join(alphabet[rng.randrange(len(alphabet))] for _ in range(length)) + + +def provider_id(idx: int) -> str: + # Deterministic provider identifier (string-typed). + return f"P{idx}" + + +def bounded_int(rng: random.Random, lo: int, hi: int) -> int: + return lo + rng.randrange(hi - lo + 1) + + +def maybe(rng: random.Random, probability: float) -> bool: + return rng.random() < probability + + +def rand_datetime( + rng: random.Random, + start: datetime, + end: datetime, + *, + resolution_seconds: int = 60, +) -> datetime: + if end <= start: + return start + span_seconds = int((end - start).total_seconds()) + steps = max(1, span_seconds // resolution_seconds) + offset_steps = rng.randrange(steps + 1) + return start + timedelta(seconds=offset_steps * resolution_seconds) + + +@dataclass(frozen=True) +class Patient: + subject_id: int + gender: str + anchor_age: int + anchor_year: int + anchor_year_group: str + dod: Optional[str] + + +@dataclass(frozen=True) +class Admission: + subject_id: int + hadm_id: int + admittime: datetime + dischtime: datetime + hospital_expire_flag: int + deathtime: Optional[datetime] + admission_type: str + admit_provider_id: str + admission_location: str + discharge_location: str + insurance: str + language: str + marital_status: str + race: str + edregtime: Optional[datetime] + edouttime: Optional[datetime] + + +@dataclass(frozen=True) +class IcuStay: + subject_id: int + hadm_id: int + stay_id: int + first_careunit: str + last_careunit: str + intime: datetime + outtime: datetime + los: float + + +@dataclass(frozen=True) +class PoeOrder: + poe_seq: int + subject_id: int + hadm_id: int + ordertime: datetime + order_type: str + order_subtype: Optional[str] + transaction_type: str + discontinue_of_poe_seq: Optional[int] + discontinued_by_poe_seq: Optional[int] + order_provider_id: str + order_status: str + + @property + def poe_id(self) -> str: + return f"{self.subject_id}-{self.poe_seq}" + + def poe_id_for_seq(self, seq: int) -> str: + return f"{self.subject_id}-{seq}" + + +@dataclass(frozen=True) +class EmarEvent: + emar_seq: int + subject_id: int + hadm_id: int + poe_id: str + pharmacy_id: Optional[int] + enter_provider_id: str + charttime: datetime + medication: str + event_txt: str + scheduletime: datetime + storetime: datetime + + @property + def emar_id(self) -> str: + return f"{self.subject_id}-{self.emar_seq}" + + +def make_patients(rng: random.Random, n: int) -> List[Patient]: + patients: List[Patient] = [] + for subject_id in range(n): + + gender = choose(rng, ("F", "M")) + anchor_age = bounded_int(rng, 18, 90) + anchor_year = bounded_int(rng, 2100, 2190) + anchor_year_group = choose( + rng, + ( + "2008 - 2010", + "2011 - 2013", + "2014 - 2016", + "2017 - 2019", + "2020 - 2022", + ), + ) + + dod: Optional[str] = None + if maybe(rng, 0.15): + dod_year = min(2199, anchor_year + bounded_int(rng, 0, 5)) + dod_dt = datetime(dod_year, bounded_int(rng, 1, 12), bounded_int(rng, 1, 28)) + dod = dod_dt.strftime("%Y-%m-%d") + + patients.append( + Patient( + subject_id=subject_id, + gender=gender, + anchor_age=anchor_age, + anchor_year=anchor_year, + anchor_year_group=anchor_year_group, + dod=dod, + ) + ) + + return patients + + +def make_admissions(rng: random.Random, patients: Sequence[Patient]) -> List[Admission]: + admissions: List[Admission] = [] + start = datetime(2110, 1, 1, 0, 0, 0) + end = datetime(2190, 12, 31, 23, 59, 0) + + for hadm_id, p in enumerate(patients): + + admittime = rand_datetime(rng, start, end, resolution_seconds=60) + stay_hours = bounded_int(rng, 6, 24 * 21) + dischtime = admittime + timedelta(hours=stay_hours) + + admission_type = choose(rng, ADMISSION_TYPES) + admission_location = choose(rng, ADMISSION_LOCATIONS) + insurance = choose(rng, INSURANCE) + language = choose(rng, LANGUAGES) + marital_status = choose(rng, MARITAL_STATUS) + race = choose(rng, RACE) + + expire = 1 if maybe(rng, 0.2) else 0 + if expire: + discharge_location = "DIED" + deathtime = dischtime + else: + discharge_location = choose(rng, tuple(x for x in DISCHARGE_LOCATIONS if x != "DIED")) + deathtime = None + + # ED times are optional in example. + if maybe(rng, 0.8): + edregtime = admittime - timedelta(hours=bounded_int(rng, 0, 6)) + edouttime = admittime + timedelta(hours=bounded_int(rng, 0, 6)) + else: + edregtime = None + edouttime = None + + admissions.append( + Admission( + subject_id=p.subject_id, + hadm_id=hadm_id, + admittime=admittime, + dischtime=dischtime, + hospital_expire_flag=expire, + deathtime=deathtime, + admission_type=admission_type, + admit_provider_id=provider_id(hadm_id), + admission_location=admission_location, + discharge_location=discharge_location, + insurance=insurance, + language=language, + marital_status=marital_status, + race=race, + edregtime=edregtime, + edouttime=edouttime, + ) + ) + + return admissions + + +def make_icustays(rng: random.Random, admissions: Sequence[Admission]) -> List[IcuStay]: + icustays: List[IcuStay] = [] + for stay_id, adm in enumerate(admissions): + + first_careunit = choose(rng, CAREUNITS) + last_careunit = choose(rng, CAREUNITS) + intime = adm.admittime + timedelta(hours=bounded_int(rng, 0, 36), minutes=bounded_int(rng, 0, 59)) + max_out = min(adm.dischtime, intime + timedelta(days=bounded_int(rng, 0, 10), hours=bounded_int(rng, 1, 20))) + outtime = rand_datetime(rng, intime + timedelta(hours=1), max_out, resolution_seconds=1) + + los_days = (outtime - intime).total_seconds() / 86400.0 + icustays.append( + IcuStay( + subject_id=adm.subject_id, + hadm_id=adm.hadm_id, + stay_id=stay_id, + first_careunit=first_careunit, + last_careunit=last_careunit, + intime=intime, + outtime=outtime, + los=round(los_days, 7), + ) + ) + + return icustays + + +def make_poe_orders(rng: random.Random, admissions: Sequence[Admission]) -> List[PoeOrder]: + orders: List[PoeOrder] = [] + + n = len(admissions) + for poe_seq, adm in enumerate(admissions): + ordertime = rand_datetime(rng, adm.admittime, adm.dischtime, resolution_seconds=1) + order_type = choose(rng, ORDER_TYPES) + order_subtype = None + if order_type in ORDER_SUBTYPES and maybe(rng, 0.85): + order_subtype = choose(rng, ORDER_SUBTYPES[order_type]) + + transaction_type = choose(rng, TRANSACTION_TYPES) + + # Optional link fields in the example depend on transaction type. + discontinue_of_seq: Optional[int] = None + discontinued_by_seq: Optional[int] = None + if transaction_type == "D/C": + discontinue_of_seq = max(0, poe_seq - bounded_int(rng, 1, 30)) + elif transaction_type == "Change": + discontinue_of_seq = max(0, poe_seq - bounded_int(rng, 1, 30)) + discontinued_by_seq = poe_seq + 1 if poe_seq + 1 < n else None + elif transaction_type == "New" and maybe(rng, 0.15): + discontinued_by_seq = poe_seq + 1 if poe_seq + 1 < n else None + + orders.append( + PoeOrder( + poe_seq=poe_seq, + subject_id=adm.subject_id, + hadm_id=adm.hadm_id, + ordertime=ordertime, + order_type=order_type, + order_subtype=order_subtype, + transaction_type=transaction_type, + discontinue_of_poe_seq=discontinue_of_seq, + discontinued_by_poe_seq=discontinued_by_seq, + order_provider_id=provider_id(poe_seq), + order_status=choose(rng, ORDER_STATUS), + ) + ) + + return orders + + +def make_emar_events(rng: random.Random, admissions: Sequence[Admission], poe_orders: Sequence[PoeOrder]) -> List[EmarEvent]: + events: List[EmarEvent] = [] + + # For each admission, pick a POE order to reference. + poe_by_hadm: Dict[int, PoeOrder] = {o.hadm_id: o for o in poe_orders} + + for emar_seq, adm in enumerate(admissions): + poe = poe_by_hadm.get(adm.hadm_id) + if poe is None: + # Shouldn't happen with our generation strategy. + poe_id = f"{adm.subject_id}-{bounded_int(rng, 1, 999)}" + else: + poe_id = poe.poe_id + + charttime = rand_datetime(rng, adm.admittime, adm.dischtime, resolution_seconds=60) + scheduletime = charttime + timedelta(minutes=bounded_int(rng, -30, 30)) + storetime = charttime + timedelta(minutes=bounded_int(rng, 0, 10)) + + event_txt = choose(rng, EMAR_EVENT_TXT) + medication = choose(rng, MEDICATIONS) + + pharmacy_id: Optional[int] = None + if maybe(rng, 0.8): + pharmacy_id = 10_000_000 + emar_seq + + events.append( + EmarEvent( + emar_seq=emar_seq, + subject_id=adm.subject_id, + hadm_id=adm.hadm_id, + poe_id=poe_id, + pharmacy_id=pharmacy_id, + enter_provider_id=provider_id(emar_seq), + charttime=charttime, + medication=medication, + event_txt=event_txt, + scheduletime=scheduletime, + storetime=storetime, + ) + ) + + return events + + +def make_drgcodes(rng: random.Random, admissions: Sequence[Admission]) -> List[dict]: + docs: List[dict] = [] + for adm in admissions: + drg_type = choose(rng, DRG_TYPES) + drg_code = bounded_int(rng, 100, 800) + description = choose(rng, DRG_DESCRIPTIONS) + + doc: Dict[str, object] = { + "subject_id": adm.subject_id, + "hadm_id": adm.hadm_id, + "drg_type": drg_type, + "drg_code": drg_code, + "description": description, + } + + # Example shows APR often has severity/mortality; HCFA often omits them. + if drg_type == "APR" and maybe(rng, 0.85): + doc["drg_severity"] = bounded_int(rng, 1, 4) + doc["drg_mortality"] = bounded_int(rng, 1, 4) + + docs.append(doc) + return docs + + +def admission_doc(a: Admission) -> Dict[str, object]: + doc: Dict[str, object] = { + "subject_id": a.subject_id, + "hadm_id": a.hadm_id, + "admittime": fmt_dt(a.admittime), + "dischtime": fmt_dt(a.dischtime), + "admission_type": a.admission_type, + "admit_provider_id": a.admit_provider_id, + "admission_location": a.admission_location, + "discharge_location": a.discharge_location, + "insurance": a.insurance, + "language": a.language, + "marital_status": a.marital_status, + "race": a.race, + "hospital_expire_flag": a.hospital_expire_flag, + } + if a.deathtime is not None: + doc["deathtime"] = fmt_dt(a.deathtime) + if a.edregtime is not None: + doc["edregtime"] = fmt_dt(a.edregtime) + if a.edouttime is not None: + doc["edouttime"] = fmt_dt(a.edouttime) + return doc + + +def patient_doc(p: Patient) -> Dict[str, object]: + doc: Dict[str, object] = { + "subject_id": p.subject_id, + "gender": p.gender, + "anchor_age": p.anchor_age, + "anchor_year": p.anchor_year, + "anchor_year_group": p.anchor_year_group, + } + if p.dod is not None: + doc["dod"] = p.dod + return doc + + +def icustay_doc(s: IcuStay) -> Dict[str, object]: + return { + "subject_id": s.subject_id, + "hadm_id": s.hadm_id, + "stay_id": s.stay_id, + "first_careunit": s.first_careunit, + "last_careunit": s.last_careunit, + "intime": fmt_dt(s.intime), + "outtime": fmt_dt_with_seconds(s.outtime), + "los": s.los, + } + + +def poe_doc(o: PoeOrder) -> Dict[str, object]: + doc: Dict[str, object] = { + "poe_id": o.poe_id, + "poe_seq": o.poe_seq, + "subject_id": o.subject_id, + "hadm_id": o.hadm_id, + "ordertime": o.ordertime.strftime("%Y-%m-%d %H:%M:%S"), + "order_type": o.order_type, + "transaction_type": o.transaction_type, + "order_provider_id": o.order_provider_id, + "order_status": o.order_status, + } + if o.order_subtype is not None: + doc["order_subtype"] = o.order_subtype + if o.discontinue_of_poe_seq is not None: + doc["discontinue_of_poe_id"] = o.poe_id_for_seq(o.discontinue_of_poe_seq) + if o.discontinued_by_poe_seq is not None: + doc["discontinued_by_poe_id"] = o.poe_id_for_seq(o.discontinued_by_poe_seq) + return doc + + +def emar_doc(e: EmarEvent) -> Dict[str, object]: + doc: Dict[str, object] = { + "subject_id": e.subject_id, + "hadm_id": e.hadm_id, + "emar_id": e.emar_id, + "emar_seq": e.emar_seq, + "poe_id": e.poe_id, + "enter_provider_id": e.enter_provider_id, + "charttime": fmt_dt(e.charttime), + "medication": e.medication, + "event_txt": e.event_txt, + "scheduletime": fmt_dt(e.scheduletime), + "storetime": fmt_dt(e.storetime), + } + if e.pharmacy_id is not None: + doc["pharmacy_id"] = e.pharmacy_id + return doc + + +Row = Tuple[str, str, Dict[str, object]] + + +def iter_bulk_rows( + *, + admissions: Sequence[Admission], + drgcodes: Sequence[dict], + emar: Sequence[EmarEvent], + icustays: Sequence[IcuStay], + patients: Sequence[Patient], + poe: Sequence[PoeOrder], +) -> Iterator[Row]: + # Deterministic order by index, with _id 1..N per index. + for i, a in enumerate(admissions, start=1): + yield ("admissions", str(i), admission_doc(a)) + for i, d in enumerate(drgcodes, start=1): + yield ("drgcodes", str(i), d) + for i, e in enumerate(emar, start=1): + yield ("emar", str(i), emar_doc(e)) + for i, s in enumerate(icustays, start=1): + yield ("icustays", str(i), icustay_doc(s)) + for i, p in enumerate(patients, start=1): + yield ("patients", str(i), patient_doc(p)) + for i, o in enumerate(poe, start=1): + yield ("poe", str(i), poe_doc(o)) + + +def write_bulk_ndjson(path: Path, rows: Iterable[Row]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", encoding="utf-8") as f: + for index_name, doc_id, doc in rows: + meta = {"index": {"_index": index_name, "_id": doc_id}} + f.write(json.dumps(meta, ensure_ascii=False) + "\n") + f.write(json.dumps(doc, ensure_ascii=False) + "\n") + + +def validate_bulk_ndjson(path: Path, expected_n: int) -> None: + # Lightweight structural validation: alternating meta/doc, correct index names, correct counts. + expected_lines = 2 * (len(INDEX_ORDER) * expected_n) + index_counts: Dict[str, int] = {idx: 0 for idx in INDEX_ORDER} + + with path.open("r", encoding="utf-8") as f: + lines = f.readlines() + + if len(lines) != expected_lines: + raise SystemExit(f"Validation failed: expected {expected_lines} lines, got {len(lines)}") + + for i in range(0, len(lines), 2): + meta = json.loads(lines[i]) + doc = json.loads(lines[i + 1]) + if "index" not in meta or "_index" not in meta["index"] or "_id" not in meta["index"]: + raise SystemExit(f"Validation failed: bad meta line at {i+1}") + idx = meta["index"]["_index"] + if idx not in index_counts: + raise SystemExit(f"Validation failed: unexpected index '{idx}' at line {i+1}") + if not isinstance(doc, dict): + raise SystemExit(f"Validation failed: doc is not an object at line {i+2}") + index_counts[idx] += 1 + + for idx, count in index_counts.items(): + if count != expected_n: + raise SystemExit(f"Validation failed: index '{idx}' expected {expected_n} docs, got {count}") + + +def build_dataset(rng: random.Random, n: int) -> Tuple[List[Patient], List[Admission], List[IcuStay], List[PoeOrder], List[EmarEvent], List[dict]]: + patients = make_patients(rng, n) + admissions = make_admissions(rng, patients) + icustays = make_icustays(rng, admissions) + poe_orders = make_poe_orders(rng, admissions) + emar_events = make_emar_events(rng, admissions, poe_orders) + drg_docs = make_drgcodes(rng, admissions) + return patients, admissions, icustays, poe_orders, emar_events, drg_docs + + +def parse_args(argv: Sequence[str]) -> argparse.Namespace: + p = argparse.ArgumentParser(description="Generate synthetic bulk NDJSON for Cogstack Opensearch dashboard.") + p.add_argument("--n", type=int, required=True, help="Number of documents per index.") + p.add_argument("--seed", type=int, default=0, help="Random seed (default: 0).") + p.add_argument( + "--out", + type=Path, + default=Path("synthetic_opensearch_ducuments_bulk_payload.ndjson"), + help="Output NDJSON file path (default: synthetic_opensearch_ducuments_bulk_payload.ndjson).", + ) + p.add_argument("--validate", action="store_true", help="Validate output structure after writing.") + return p.parse_args(list(argv)) + + +def main(argv: Sequence[str]) -> int: + print(f"Generating synthetic data for Cogstack Opensearch dashboards") + args = parse_args(argv) + if args.n <= 0: + raise SystemExit("--n must be > 0") + + rng = random.Random(args.seed) + patients, admissions, icustays, poe_orders, emar_events, drg_docs = build_dataset(rng, args.n) + + rows = iter_bulk_rows( + admissions=admissions, + drgcodes=drg_docs, + emar=emar_events, + icustays=icustays, + patients=patients, + poe=poe_orders, + ) + write_bulk_ndjson(args.out, rows) + + if args.validate: + validate_bulk_ndjson(args.out, args.n) + + print(f"Completed synthetic data genration. File written to {args.out}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/helm-charts/cogstack-helm-ce/provisioning/opensearch-provisioning.sh b/helm-charts/cogstack-helm-ce/provisioning/opensearch-provisioning.sh index 8333e51..206c7cb 100644 --- a/helm-charts/cogstack-helm-ce/provisioning/opensearch-provisioning.sh +++ b/helm-charts/cogstack-helm-ce/provisioning/opensearch-provisioning.sh @@ -18,6 +18,11 @@ log() { : "${CONFIG_DIR:?CONFIG_DIR is required. }" : "${CURL_BODY_FILE:=/tmp/curl_body.$$}" + +if ! command -v curl >/dev/null 2>&1; then + apt-get update && apt-get install -y curl +fi + wait_for_service() { local service_name="$1" local url="$2" @@ -73,7 +78,15 @@ fi if [ "$PROVISION_OPENSEARCH_EXAMPLE_DOCUMENTS_ENABLED" = "true" ]; then wait_for_service "OpenSearch" "$OPENSEARCH_URL" "-u $OPENSEARCH_AUTH" || exit 1 - log "Creating example admissions document (bulk) - POST $OPENSEARCH_URL/_bulk" + BULK_NDJSON_FILE="/tmp/document_bulk_synth.$$_.ndjson" + log "Generating synthetic bulk documents - $BULK_NDJSON_FILE" + python3 "${CONFIG_DIR}/generate_synthetic_bulk_ndjson.py" \ + --n 1000 \ + --seed 0 \ + --out "$BULK_NDJSON_FILE" \ + --validate + + log "Posting synthetic documents (bulk) - POST $OPENSEARCH_URL/_bulk" os_status="$(curl -sS \ -o "$CURL_BODY_FILE" \ -w "%{http_code}" \ @@ -81,15 +94,16 @@ if [ "$PROVISION_OPENSEARCH_EXAMPLE_DOCUMENTS_ENABLED" = "true" ]; then -H "Content-Type: application/x-ndjson" \ -u "$OPENSEARCH_AUTH" \ -k \ - --data-binary @"${CONFIG_DIR}/document_bulk.ndjson")" + --data-binary @"$BULK_NDJSON_FILE")" if [ "$os_status" != "200" ] && [ "$os_status" != "201" ]; then - log "Failed to create example admissions document (http_status=$os_status)" + log "Failed to create synthetic example documents (http_status=$os_status)" if [ -s "$CURL_BODY_FILE" ]; then log "Response body:" sed 's/^/ /' "$CURL_BODY_FILE" fi exit 1 fi + rm -f "$BULK_NDJSON_FILE" fi if [ "$PROVISION_OPENSEARCH_DASHBOARDS_ENABLED" = "true" ]; then diff --git a/helm-charts/cogstack-helm-ce/templates/opensearch-provisioning-post-install.yaml b/helm-charts/cogstack-helm-ce/templates/opensearch-provisioning-post-install.yaml index c70dd14..f28f434 100644 --- a/helm-charts/cogstack-helm-ce/templates/opensearch-provisioning-post-install.yaml +++ b/helm-charts/cogstack-helm-ce/templates/opensearch-provisioning-post-install.yaml @@ -13,7 +13,7 @@ metadata: spec: restartPolicy: OnFailure containers: - - image: curlimages/curl + - image: python:3.14-slim #curlimages/curl name: opensearch-provisioning command: ["/bin/sh", "/etc/config/opensearch-provisioning-config/opensearch-provisioning.sh"] env: From e787e6d1c3b6f29033a930f82e7a2a5a0f5cca18 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Wed, 18 Mar 2026 11:57:02 +0000 Subject: [PATCH 3/3] feat(helm): Generate synthetic data for dashboard demo usage --- .../templates/opensearch-provisioning-post-install.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm-charts/cogstack-helm-ce/templates/opensearch-provisioning-post-install.yaml b/helm-charts/cogstack-helm-ce/templates/opensearch-provisioning-post-install.yaml index f28f434..3be6a7e 100644 --- a/helm-charts/cogstack-helm-ce/templates/opensearch-provisioning-post-install.yaml +++ b/helm-charts/cogstack-helm-ce/templates/opensearch-provisioning-post-install.yaml @@ -13,7 +13,7 @@ metadata: spec: restartPolicy: OnFailure containers: - - image: python:3.14-slim #curlimages/curl + - image: python:3.14-slim name: opensearch-provisioning command: ["/bin/sh", "/etc/config/opensearch-provisioning-config/opensearch-provisioning.sh"] env: