Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions services/otel-gateway/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# we use the contrib image for basicauth and other bits
#
# This image is build FROM scratch, so doesn't include *any* tools:
#
#
# https://github.com/open-telemetry/opentelemetry-collector-releases/blob/main/distributions/otelcol/Dockerfile
#
FROM otel/opentelemetry-collector-contrib:0.110.0
FROM otel/opentelemetry-collector-contrib:0.146.0 AS otelcol-base

FROM otelcol-base AS otel-gateway

LABEL org.opencontainers.image.authors="tech@opensafely.org" \
org.opencontainers.image.url="opensafely.org" \
Expand All @@ -14,11 +16,18 @@ LABEL org.opencontainers.image.authors="tech@opensafely.org" \
# default config, can be overridden at runtime
ENV HONEYCOMB_ENDPOINT="https://api.honeycomb.io"
ENV METRICS_DATASET="jobrunner-metrics"
# log level for the collector's telemetry
ENV LOG_LEVEL="info"
# verbosity level for the debug exporter
ENV LOG_VERBOSITY="normal"

# these must be provided at runtime
# ENV HONEYCOMB_KEY
# ENV BASIC_AUTH_USER
# ENV BASIC_AUTH_PASSWORD

# app.json must be in the WORKDIR, so make the implicit /app explict
WORKDIR /app

COPY app.json /app/app.json
COPY config.yaml /etc/otelcol-contrib/config.yaml
13 changes: 13 additions & 0 deletions services/otel-gateway/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,19 @@ dokku git:from-image otel-gateway ghcr.io/opensafely-core/otel-gateway:latest
dokku logs otel-gateway
```

## Health checks

Dokku deploy checks are configured in [`app.json`](app.json) using the collector
health endpoint at `:13133/healthz`. The file is copied into the image during
build so `dokku git:from-image` can use it.

Useful commands on dokku:

```bash
dokku checks:report otel-gateway
dokku checks:run otel-gateway
```


## Dokku app set up

Expand Down
14 changes: 14 additions & 0 deletions services/otel-gateway/app.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"healthchecks": {
"web": [
{
"type": "startup",
"path": "/healthz",
"port": 13133,
"wait": 2,
"timeout": 5,
"attempts": 15
}
]
}
}
17 changes: 12 additions & 5 deletions services/otel-gateway/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ extensions:
htpasswd:
inline: |
${BASIC_AUTH_USER}:${BASIC_AUTH_PASSWORD}
health_check:
endpoint: 0.0.0.0:13133
path: /healthz

receivers:
otlp:
Expand All @@ -18,8 +21,12 @@ processors:
batch:

exporters:
logging:
loglevel: "${LOG_LEVEL}"
# The debug exporter replaces the old logging exporter and takes a verbosity
# argument instead of a log level (detailed/normal/basic vs debug/info/warn).
# https://github.com/open-telemetry/opentelemetry-collector/issues/11337
# https://github.com/open-telemetry/opentelemetry-collector/tree/main/exporter/debugexporter
debug:
verbosity: "${LOG_VERBOSITY}"

otlphttp/traces:
endpoint: "${HONEYCOMB_ENDPOINT}"
Expand All @@ -39,13 +46,13 @@ service:
telemetry:
logs:
level: "${LOG_LEVEL}"
extensions: [basicauth/server]
extensions: [basicauth/server, health_check]
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [otlphttp/traces, logging]
exporters: [otlphttp/traces, debug]
metrics:
receivers: [otlp]
processors: [batch]
exporters: [otlphttp/metrics, logging]
exporters: [otlphttp/metrics, debug]
39 changes: 39 additions & 0 deletions services/otel-gateway/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
services:
otel-gateway:
build:
context: .
target: otel-gateway
image: otel-gateway
environment:
BASIC_AUTH_USER: ${BASIC_AUTH_USER}
BASIC_AUTH_PASSWORD: ${BASIC_AUTH_PASSWORD}
HONEYCOMB_KEY: ${HONEYCOMB_KEY}
HONEYCOMB_ENDPOINT: ${HONEYCOMB_ENDPOINT:-https://api.honeycomb.io}
LOG_LEVEL: ${LOG_LEVEL:-info}
LOG_VERBOSITY: ${LOG_VERBOSITY:-normal}
ports:
# otlp
- "4318:4318"
# http health check
- "13133:13133"
extra_hosts:
- "host.docker.internal:host-gateway"
networks:
- otel-test-net

mock-honeycomb:
build:
context: .
target: otelcol-base
image: otel-gateway-mock
user: "${LOCAL_UID:-1000}:${LOCAL_GID:-1000}"
volumes:
- ./mock-honeycomb-config.yaml:/etc/otelcol-contrib/config.yaml:ro
- ./exported/honeycomb:/exported
ports:
- "4319:4318"
networks:
- otel-test-net

networks:
otel-test-net:
69 changes: 36 additions & 33 deletions services/otel-gateway/justfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
set dotenv-load := true

export IMAGE_NAME := "otel-gateway"
export DOCKER_BUILDKIT := "1"

# when runnign via just, ensure we have detail logs for debugging tests
export LOG_LEVEL := "debug"
export LOG_VERBOSITY := "detailed"

# list available commands
default:
@"{{ just_executable() }}" --list
Expand Down Expand Up @@ -94,74 +97,74 @@ fix:

# build the docker image
build: _dotenv
docker build . -t $IMAGE_NAME
docker compose build otel-gateway

# run the gateway, in the foreground by default.
run *args: _checkenv build
docker run --rm --name otel-gateway \
-e BASIC_AUTH_USER=$BASIC_AUTH_USER \
-e BASIC_AUTH_PASSWORD=$BASIC_AUTH_PASSWORD \
-e HONEYCOMB_KEY \
--network=otel-test-net \
{{ args }} {{ IMAGE_NAME }}
#!/bin/bash
set -euo pipefail

docker compose up --force-recreate --no-deps {{ args }} otel-gateway

# run integration test. You will need a HONEYCOMB_KEY set in the environment
test-integration: _checkenv
#!/bin/bash
set -euo pipefail

{{ just_executable() }} run -d -e LOG_LEVEL=debug -p 4318:4318
trap 'docker compose stop otel-gateway >/dev/null 2>&1 || true' EXIT

docker compose up -d --build --force-recreate --no-deps otel-gateway
docker compose ps --status running --services | grep -qx otel-gateway || { docker compose logs otel-gateway; exit 1; }
{{ just_executable() }} _wait_for_health otel-gateway "http://127.0.0.1:13133/healthz"
{{ just_executable() }} run-python tests.py
echo "Data sent to honeycomb"
echo "https://ui.honeycomb.io/bennett-institute-for-applied-data-science/environments/development/datasets/otel-gateway-tests?query=%7B%22time_range%22%3A600%2C%22granularity%22%3A0%2C%22breakdowns%22%3A%5B%5D%2C%22calculations%22%3A%5B%5D%2C%22orders%22%3A%5B%5D%2C%22havings%22%3A%5B%5D%2C%22limit%22%3A100%7D"

_wait_for_health service url:
#!/bin/bash
set -euo pipefail

if ! curl -fsS --retry 20 --retry-delay 1 --retry-connrefused --retry-all-errors "{{ url }}" >/dev/null 2>&1; then
echo "{{ service }} did not become healthy in time: {{ url }}"
docker compose logs "{{ service }}"
exit 1
fi

_mock_honeycomb_start:
#!/bin/bash
set -euo pipefail

mkdir -p exported/honeycomb

# run a different instance of a collector as a test endpoint
docker network create otel-test-net 2>/dev/null || true
docker run --rm -d -p 4319:4318 \
--name mock-honeycomb -u "$(id -u):$(id -g)" \
-v $PWD/mock-honeycomb-config.yaml:/etc/otelcol-contrib/config.yaml \
-v $PWD/exported/honeycomb:/exported \
--network=otel-test-net \
otel/opentelemetry-collector-contrib:0.62.1
test "$(docker inspect mock-honeycomb -f '{{{{.State.Status}}')" == "running" || { docker logs mock-honeycomb; exit 1; }
# we need these so we can read/write the files as the right user
export LOCAL_UID="$(id -u)"
export LOCAL_GID="$(id -g)"

# point otel-gateway at the mock instance.
export HONEYCOMB_ENDPOINT="http://mock-honeycomb:4318"

docker compose up -d --build --force-recreate mock-honeycomb otel-gateway
docker compose ps --status running --services | grep -qx mock-honeycomb || { docker compose logs mock-honeycomb; exit 1; }
docker compose ps --status running --services | grep -qx otel-gateway || { docker compose logs otel-gateway; exit 1; }
{{ just_executable() }} _wait_for_health otel-gateway "http://127.0.0.1:13133/healthz"

_mock_honeycomb_stop:
docker stop mock-honeycomb
docker network remove otel-test-net
docker compose stop otel-gateway mock-honeycomb

# run tests against mock upstream servers
test-ci: _checkenv _mock_honeycomb_start && _mock_honeycomb_stop
#!/bin/bash
set -euo pipefail

export HONEYCOMB_ENDPOINT="http://mock-honeycomb:4318"

# run otel-gateway pointing at the test endpoint
{{ just_executable() }} run -d \
-e HONEYCOMB_ENDPOINT \
-e LOG_LEVEL=debug \
-p 4318:4318 \
--add-host=host.docker.internal:host-gateway

test "$(docker inspect otel-gateway -f '{{{{.State.Status}}')" == "running" || { docker logs otel-gateway; exit 1; }
{{ just_executable() }} run-python -m pytest tests.py

docker stop otel-gateway

# run a python script in the correct environment
run-python *args: _checkenv
#!/bin/bash
set -euo pipefail

TOKEN=$(echo -n "$BASIC_AUTH_USER:$BASIC_AUTH_PASSWORD" | base64)


export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:4318"
export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Basic%20$TOKEN"
export OTEL_SERVICE_NAME="otel-gateway-tests"
Expand Down
9 changes: 5 additions & 4 deletions services/otel-gateway/mock-honeycomb-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@ receivers:
otlp:
protocols:
http:
endpoint: 0.0.0.0:4318

processors:
batch:

exporters:
logging:
logLevel: debug
debug:
verbosity: detailed
file/traces:
path: /exported/traces.json
file/metrics:
Expand All @@ -22,8 +23,8 @@ service:
traces:
receivers: [otlp]
processors: [batch]
exporters: [file/traces, logging]
exporters: [file/traces, debug]
metrics:
receivers: [otlp]
processors: [batch]
exporters: [file/metrics, logging]
exporters: [file/metrics, debug]
10 changes: 7 additions & 3 deletions services/otel-gateway/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,21 @@ def generate_test_metric():


def get_output(path):
# wait for file to be written to, typically a few hundred 100ms
# Wait for the exporter to create and write the file.
timeout_count = 0
while path.exists() and path.stat().st_size == 0:
while (not path.exists()) or path.stat().st_size == 0:
time.sleep(0.01)
timeout_count = timeout_count + 1
if timeout_count > 500:
raise Exception(
"Test timed out - no output written to file after 5 seconds"
)

return json.loads(path.read_text())
# file exporter writes one JSON object per line (ndjson); read the latest.
lines = [line for line in path.read_text().splitlines() if line.strip()]
if not lines:
raise Exception("Test timed out - output file was empty")
return json.loads(lines[-1])


def service_name_helper(resource_attributes):
Expand Down