Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion cogstack-cohorter/WebAPP/.dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,8 @@ client/node_modules/
client-react/node_modules/
server/node_modules/

# Data files are never baked into the image — supply them via volume mount at runtime
# Runtime-generated patient data and full production SNOMED files — these are
# either generated at startup (random mode) or supplied via a volume mount
# (production mode). They must NOT be baked into the image.
server/data/

28 changes: 11 additions & 17 deletions cogstack-cohorter/WebAPP/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,20 @@ set -e

DATA_DIR=/usr/src/app/server/data

# ── Step 1: extract archive if JSON data isn't already present ────────────────
if [ ! -f "$DATA_DIR/snomed_terms.json" ]; then
if [ -f "$DATA_DIR/snomed_terms_data.tar.gz" ]; then
echo "[webapp] Extracting SNOMED data archive..."
tar xzvf "$DATA_DIR/snomed_terms_data.tar.gz" -C "$DATA_DIR"
else
echo "[webapp] ERROR: No data found at $DATA_DIR." >&2
echo "[webapp] Mount a directory containing snomed_terms.json (and related files)" >&2
echo "[webapp] or snomed_terms_data.tar.gz via a Docker volume:" >&2
echo "[webapp] -v /your/data:/usr/src/app/server/data" >&2
exit 1
fi
fi
# Ensure the writable data directory exists (may be an emptyDir or PVC mount).
mkdir -p "$DATA_DIR"

# ── Step 2 (optional): generate random patient data ───────────────────────────
# Set RANDOM_DATA=true in the container environment to generate synthetic data.
if [ "${RANDOM_DATA}" = "true" ]; then
# ── Generate random patient data on first startup (random/demo mode) ──────────
if [ "${RANDOM_DATA}" = "true" ] && \
{ [ ! -f "$DATA_DIR/ptt2age.json" ] || \
[ ! -f "$DATA_DIR/ptt2sex.json" ] || \
[ ! -f "$DATA_DIR/ptt2eth.json" ] || \
[ ! -f "$DATA_DIR/ptt2dod.json" ] || \
[ ! -f "$DATA_DIR/cui2ptt_pos.jsonl" ] || \
[ ! -f "$DATA_DIR/cui2ptt_tsp.jsonl" ]; }; then
echo "[webapp] Generating random demo patient data..."
node --max-old-space-size=32768 /usr/src/app/server/gen_random_data.js
fi

# ── Step 3: start the server ──────────────────────────────────────────────────
# ── Start the server ──────────────────────────────────────────────────────────
exec node --max-old-space-size=32768 server.js

Large diffs are not rendered by default.

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion cogstack-cohorter/WebAPP/server/gen_random_data.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// node --max-old-space-size=32768 gen_random_data.js
console.log('Generating random data')
const fs = require('fs');
const snomed_terms = require('./data/snomed_terms.json');
const snomed_terms = require('./data-example/snomed_example.json');

// Returns a random integer between min (inclusive) and max (inclusive).
function random_int(min, max) {
Expand Down
12 changes: 9 additions & 3 deletions cogstack-cohorter/WebAPP/server/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,15 @@ const NL2DSL_SERVER = process.env.NL2DSL_SERVER || "http://localhost:3002/api/co
let port = process.env.PORT || 3000;
console.log('Loading data...');

// index all the snomed concepts
const snomed_terms = require('./data/snomed_terms.json');
const cui_pt2ch = require('./data/cui_pt2ch.json');
// In random/demo mode use the example subset baked into the image.
// In production mode use the full SNOMED files supplied via volume mount.
const isRandomMode = (process.env.RANDOM_DATA || 'true') === 'true';
const snomed_terms = isRandomMode
? require('./data-example/snomed_example.json')
: require('./data/snomed_terms.json');
const cui_pt2ch = isRandomMode
? require('./data-example/cui_pt2ch_example.json')
: require('./data/cui_pt2ch.json');

// for admin login
const admin_pwd = process.env.PASSWORD || 'admin_pass';
Expand Down
62 changes: 43 additions & 19 deletions helm-charts/cogstack-cohorter-helm/ci/ci-values.yaml
Original file line number Diff line number Diff line change
@@ -1,36 +1,60 @@
# CI smoke-test overrides.
# An init container seeds the empty data volume with minimal stub SNOMED data
# so the webapp entrypoint can proceed and RANDOM_DATA=true can generate
# synthetic patient records without requiring a real data mount.
# Only the webapp is deployed; NL2DSL, MedCAT, and Ollama are disabled to keep
# the smoke test fast and resource-light.
webapp:
env:
RANDOM_DATA: "true"

persistence:
enabled: false

# Seed the data volume before the webapp container starts so that:
# 1. gen_random_data.js is skipped (all six generated files are present).
initContainers:
- name: init-snomed-stub
- name: init-ci-stub
image: busybox
command:
- sh
- -c
- |
mkdir -p /data
# Minimal snomed_terms.json — a few entries covering each clinical
# category that gen_random_data.js filters on.
cat > /data/snomed_terms.json << 'EOF'
[
{"cui":"73211009","str":"Diabetes mellitus (disorder)"},
{"cui":"44054006","str":"Diabetes mellitus type 2 (disorder)"},
{"cui":"38341003","str":"Hypertensive disorder (disorder)"},
{"cui":"195967001","str":"Asthma (disorder)"},
{"cui":"271807003","str":"Eruption of skin (finding)"},
{"cui":"386661006","str":"Fever (finding)"},
{"cui":"80146002","str":"Appendectomy (procedure)"},
{"cui":"387517004","str":"Paracetamol (substance)"}
]
EOF
# cui_pt2ch.json — empty hierarchy is valid; server handles missing keys
# Minimal patient stubs so gen_random_data.js is skipped on startup.
# All six generated files must be present (entrypoint checks every one).
echo '{"0":25,"1":42,"2":67}' > /data/ptt2age.json
echo '{"0":"Male","1":"Female","2":"Male"}' > /data/ptt2sex.json
echo '{"0":"White","1":"Asian","2":"Black"}' > /data/ptt2eth.json
echo '{"0":0,"1":0,"2":0}' > /data/ptt2dod.json
echo '{"73211009":{"0":3},"386661006":{"1":1}}' > /data/cui2ptt_pos.jsonl
echo '{"73211009":{"0":1609459200},"386661006":{"1":1609459200}}' > /data/cui2ptt_tsp.jsonl
# Legacy stubs for old Docker Hub images that expect these files.
echo '[{"cui":"73211009","str":"Diabetes mellitus (disorder)"},{"cui":"386661006","str":"Fever (finding)"},{"cui":"80146002","str":"Appendectomy (procedure)"},{"cui":"387517004","str":"Paracetamol (substance)"}]' > /data/snomed_terms.json
echo '{}' > /data/cui_pt2ch.json
volumeMounts:
- name: data
mountPath: /data

# Relax probes for CI — image pull + npm start can be slow on shared runners.
livenessProbe:
httpGet:
path: /
port: http
initialDelaySeconds: 120
periodSeconds: 15
failureThreshold: 5

readinessProbe:
httpGet:
path: /
port: http
initialDelaySeconds: 60
periodSeconds: 10
failureThreshold: 6

nl2dsl:
enabled: false

medcat:
enabled: false

ollama:
enabled: false
20 changes: 14 additions & 6 deletions helm-charts/cogstack-cohorter-helm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,19 +114,27 @@ webapp:
# Environment variables injected into the webapp container.
# NL2DSL_SERVER is set automatically from nl2dsl.service.port.
env:
# Set to "true" to generate synthetic patient data on first startup (demo mode).
RANDOM_DATA: "false"
# true → demo mode: bundled SNOMED data is used and synthetic patient records are generated on start-up.
# false → production mode: supply real MIMIC-IV shaped EHR data via data.downloadUrl or a pre-populated persistence.existingClaim.
RANDOM_DATA: "true"

# EHR data configuration (production / non-random mode).
# Ignored when RANDOM_DATA=true — the image's bundled demo data is used instead.
data:
# URL to download a data archive (tar.gz) at startup.
# The archive must contain snomed_terms.json, cui_pt2ch.json, and MIMIC-IV shaped patient data files.
# Example: "https://your-storage/ehr_data.tar.gz"
downloadUrl: ""

service:
type: ClusterIP
port: 3000

# Persistent volume for SNOMED data directory (/usr/src/app/server/data).
# Populate the PVC with snomed_terms_data.tar.gz (auto-extracted on startup)
# or the pre-extracted files (snomed_terms.json, cui_pt2ch.json, patient data).
# Persistent volume for the data directory (/usr/src/app/server/data).
# In random/demo mode this stores the generated patient records across restarts.
# In production mode this is where real EHR data lives.
persistence:
enabled: true
# Use an existing PVC instead of creating a new one.
existingClaim: ""
storageClass: ""
accessMode: ReadWriteOnce
Expand Down
Loading