diff --git a/components/schemas/common/Capability.yml b/components/schemas/common/Capability.yml index a2ba9ceb..4b39fd6e 100644 --- a/components/schemas/common/Capability.yml +++ b/components/schemas/common/Capability.yml @@ -83,8 +83,6 @@ enum: - servers-provision - servers-manage - servers-view - - san-manage - - san-view # Monitor - monitor-manage diff --git a/components/schemas/infrastructure/servers/telemetry/ServerTelemetryDeviceStats.yml b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryDeviceStats.yml new file mode 100644 index 00000000..dc6f2493 --- /dev/null +++ b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryDeviceStats.yml @@ -0,0 +1,105 @@ +type: object +title: ServerTelemetryDeviceStats +description: Network interface counters for a single device, as reported by the kernel (/proc/net/dev). All counters are cumulative since boot. +required: + - name + - rx_bytes + - rx_packets + - rx_errors + - rx_drop + - rx_fifo + - rx_frame + - rx_compressed + - rx_multicast + - tx_bytes + - tx_packets + - tx_errors + - tx_drop + - tx_fifo + - tx_colls + - tx_carrier + - tx_compressed +properties: + name: + type: string + description: The name of the network interface (e.g. "eth0"). + rx_bytes: + type: integer + format: int64 + minimum: 0 + description: Total bytes received. + rx_packets: + type: integer + format: int64 + minimum: 0 + description: Total packets received. + rx_errors: + type: integer + format: int64 + minimum: 0 + description: Total receive errors. + rx_drop: + type: integer + format: int64 + minimum: 0 + description: Total received packets dropped. + rx_fifo: + type: integer + format: int64 + minimum: 0 + description: Total receive FIFO buffer errors. + rx_frame: + type: integer + format: int64 + minimum: 0 + description: Total receive framing errors. + rx_compressed: + type: integer + format: int64 + minimum: 0 + description: Total compressed packets received. + rx_multicast: + type: integer + format: int64 + minimum: 0 + description: Total multicast packets received. + tx_bytes: + type: integer + format: int64 + minimum: 0 + description: Total bytes transmitted. + tx_packets: + type: integer + format: int64 + minimum: 0 + description: Total packets transmitted. + tx_errors: + type: integer + format: int64 + minimum: 0 + description: Total transmit errors. + tx_drop: + type: integer + format: int64 + minimum: 0 + description: Total transmitted packets dropped. + tx_fifo: + type: integer + format: int64 + minimum: 0 + description: Total transmit FIFO buffer errors. + tx_colls: + type: integer + format: int64 + minimum: 0 + description: Total collisions detected on the interface. + tx_carrier: + type: integer + format: int64 + minimum: 0 + description: Total transmit carrier losses. + tx_compressed: + type: integer + format: int64 + minimum: 0 + description: Total compressed packets transmitted. diff --git a/components/schemas/infrastructure/servers/telemetry/ServerTelemetryDisk.yml b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryDisk.yml new file mode 100644 index 00000000..5977ac7e --- /dev/null +++ b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryDisk.yml @@ -0,0 +1,13 @@ +type: object +title: ServerTelemetryDisk +description: Disk telemetry for the server. +required: + - devices +properties: + devices: + type: + - object + - "null" + description: Per-device disk telemetry, keyed by device name. + additionalProperties: + $ref: ./ServerTelemetryDiskDevice.yml diff --git a/components/schemas/infrastructure/servers/telemetry/ServerTelemetryDiskDevice.yml b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryDiskDevice.yml new file mode 100644 index 00000000..3944054c --- /dev/null +++ b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryDiskDevice.yml @@ -0,0 +1,20 @@ +type: object +title: ServerTelemetryDiskDevice +description: Telemetry for a single disk device on the server. +required: + - name + - io + - usage +properties: + name: + type: string + description: The name of the disk device (e.g. "vda"). + mount: + type: string + description: The filesystem mount point for the device. + io: + $ref: ./ServerTelemetryDiskIo.yml + description: Block-device I/O counters for the device. + usage: + $ref: ./ServerTelemetryDiskUsage.yml + description: Filesystem usage for the device. diff --git a/components/schemas/infrastructure/servers/telemetry/ServerTelemetryDiskIo.yml b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryDiskIo.yml new file mode 100644 index 00000000..7d70835c --- /dev/null +++ b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryDiskIo.yml @@ -0,0 +1,53 @@ +type: object +title: ServerTelemetryDiskIo +description: Block-device I/O counters for a single disk device, as reported by the kernel (/proc/diskstats). All counters are cumulative since boot. +required: + - read_requests + - read_sectors + - read_time_ms + - write_requests + - write_sectors + - write_time_ms + - io_in_progress + - io_time_ms +properties: + read_requests: + type: integer + format: int64 + minimum: 0 + description: Total number of completed read requests. + read_sectors: + type: integer + format: int64 + minimum: 0 + description: Total number of sectors read. Multiply by 512 for bytes. + read_time_ms: + type: integer + format: int64 + minimum: 0 + description: Total time spent servicing read requests, in milliseconds. + write_requests: + type: integer + format: int64 + minimum: 0 + description: Total number of completed write requests. + write_sectors: + type: integer + format: int64 + minimum: 0 + description: Total number of sectors written. Multiply by 512 for bytes. + write_time_ms: + type: integer + format: int64 + minimum: 0 + description: Total time spent servicing write requests, in milliseconds. + io_in_progress: + type: integer + format: int64 + minimum: 0 + description: Number of I/O requests currently in progress. + io_time_ms: + type: integer + format: int64 + minimum: 0 + description: Total time spent performing I/O, in milliseconds. diff --git a/components/schemas/infrastructure/servers/telemetry/ServerTelemetryDiskUsage.yml b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryDiskUsage.yml new file mode 100644 index 00000000..2e4497eb --- /dev/null +++ b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryDiskUsage.yml @@ -0,0 +1,29 @@ +type: object +title: ServerTelemetryDiskUsage +description: Filesystem usage for a single disk device. Reported as zero for devices without a mounted filesystem. +required: + - total_bytes + - free_bytes + - available_bytes + - used_percent +properties: + total_bytes: + type: integer + format: int64 + minimum: 0 + description: Total size of the filesystem, in bytes. + free_bytes: + type: integer + format: int64 + minimum: 0 + description: Free space on the filesystem, in bytes. + available_bytes: + type: integer + format: int64 + minimum: 0 + description: Free space accessible to unprivileged users, in bytes. + used_percent: + type: number + format: double + minimum: 0 + description: Percentage of the filesystem currently in use. diff --git a/components/schemas/infrastructure/servers/telemetry/ServerTelemetryLoadAvg.yml b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryLoadAvg.yml new file mode 100644 index 00000000..0ec6b850 --- /dev/null +++ b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryLoadAvg.yml @@ -0,0 +1,23 @@ +type: object +title: ServerTelemetryLoadAvg +description: System load averages over the last 1, 5, and 15 minutes. +required: + - last_1_min + - last_5_min + - last_15_min +properties: + last_1_min: + type: number + format: double + minimum: 0 + description: The system load average over the last 1 minute. + last_5_min: + type: number + format: double + minimum: 0 + description: The system load average over the last 5 minutes. + last_15_min: + type: number + format: double + minimum: 0 + description: The system load average over the last 15 minutes. diff --git a/components/schemas/infrastructure/servers/telemetry/ServerTelemetryNetwork.yml b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryNetwork.yml new file mode 100644 index 00000000..a85e2f39 --- /dev/null +++ b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryNetwork.yml @@ -0,0 +1,13 @@ +type: object +title: ServerTelemetryNetwork +description: Network telemetry for the server. +required: + - devices +properties: + devices: + type: + - object + - "null" + description: Per-interface network counters, keyed by interface name. + additionalProperties: + $ref: ./ServerTelemetryDeviceStats.yml diff --git a/components/schemas/infrastructure/servers/telemetry/ServerTelemetryProcess.yml b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryProcess.yml new file mode 100644 index 00000000..702c5a57 --- /dev/null +++ b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryProcess.yml @@ -0,0 +1,27 @@ +type: object +title: ServerTelemetryProcess +description: Telemetry for a single running process on the server. +required: + - pid + - cmdline + - fds + - stat + - io +properties: + pid: + type: integer + format: int64 + minimum: 0 + description: The process ID. + cmdline: + type: string + description: The full command line the process was started with. + fds: + $ref: ./ServerTelemetryProcessFds.yml + description: A breakdown of the file descriptors the process holds open. + stat: + $ref: ./ServerTelemetryProcessStats.yml + description: Scheduling and memory statistics for the process. + io: + $ref: ./ServerTelemetryProcessIo.yml + description: I/O accounting for the process. diff --git a/components/schemas/infrastructure/servers/telemetry/ServerTelemetryProcessFds.yml b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryProcessFds.yml new file mode 100644 index 00000000..0fd357f1 --- /dev/null +++ b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryProcessFds.yml @@ -0,0 +1,30 @@ +type: object +title: ServerTelemetryProcessFds +description: A breakdown of the file descriptors held open by a process. +required: + - total + - files + - sockets + - pipes + - other +properties: + total: + type: integer + format: int64 + description: Total number of open file descriptors. + files: + type: integer + format: int64 + description: Number of descriptors referring to regular files. + sockets: + type: integer + format: int64 + description: Number of descriptors referring to sockets. + pipes: + type: integer + format: int64 + description: Number of descriptors referring to pipes. + other: + type: integer + format: int64 + description: Number of descriptors of other types (e.g. epoll, eventfd, anonymous inodes). diff --git a/components/schemas/infrastructure/servers/telemetry/ServerTelemetryProcessIo.yml b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryProcessIo.yml new file mode 100644 index 00000000..973c44eb --- /dev/null +++ b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryProcessIo.yml @@ -0,0 +1,47 @@ +type: object +title: ServerTelemetryProcessIo +description: I/O accounting for a process, as reported by the kernel (/proc/[pid]/io). All counters are cumulative over the life of the process. +required: + - rchar + - wchar + - syscr + - syscw + - read_bytes + - write_bytes + - cancelled_write_bytes +properties: + rchar: + type: integer + format: int64 + minimum: 0 + description: Bytes read by the process via read-like syscalls, whether or not they hit storage. + wchar: + type: integer + format: int64 + minimum: 0 + description: Bytes written by the process via write-like syscalls, whether or not they hit storage. + syscr: + type: integer + format: int64 + minimum: 0 + description: Number of read syscalls issued. + syscw: + type: integer + format: int64 + minimum: 0 + description: Number of write syscalls issued. + read_bytes: + type: integer + format: int64 + minimum: 0 + description: Bytes actually fetched from the storage layer. + write_bytes: + type: integer + format: int64 + minimum: 0 + description: Bytes actually sent to the storage layer. + cancelled_write_bytes: + type: integer + format: int64 + minimum: 0 + description: Bytes written but later truncated or cancelled before reaching storage. diff --git a/components/schemas/infrastructure/servers/telemetry/ServerTelemetryProcessStats.yml b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryProcessStats.yml new file mode 100644 index 00000000..ffe97528 --- /dev/null +++ b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryProcessStats.yml @@ -0,0 +1,297 @@ +type: object +title: ServerTelemetryProcessStats +description: Per-process scheduling and memory statistics, as reported by the kernel (/proc/[pid]/stat). Time fields are expressed in clock ticks unless noted otherwise. +required: + - pid + - comm + - state + - ppid + - pgrp + - session + - tty_nr + - tpgid + - flags + - minflt + - cminflt + - majflt + - cmajflt + - utime + - stime + - cutime + - cstime + - priority + - nice + - num_threads + - itrealvalue + - starttime + - vsize + - rss + - rsslim + - startcode + - endcode + - startstack + - kstkesp + - kstkeip + - signal + - blocked + - sigignore + - sigcatch + - wchan + - nswap + - cnswap + - exit_signal + - processor + - rt_priority + - policy + - delayacct_blkio_ticks + - guest_time + - cguest_time + - start_data + - end_data + - start_brk + - arg_start + - arg_end + - env_start + - env_end + - exit_code +properties: + pid: + type: integer + format: int64 + minimum: 0 + description: The process ID. + comm: + type: string + description: The executable filename, as reported by the kernel. + state: + type: string + description: The process state code (e.g. R running, S sleeping, D uninterruptible sleep, Z zombie, T stopped). + ppid: + type: integer + format: int64 + description: The parent process ID. + pgrp: + type: integer + format: int64 + description: The process group ID. + session: + type: integer + format: int64 + description: The session ID. + tty_nr: + type: integer + format: int64 + description: The controlling terminal device number. + tpgid: + type: integer + format: int64 + description: The foreground process group ID of the controlling terminal. + flags: + type: integer + format: int64 + minimum: 0 + description: The kernel flags word for the process. + minflt: + type: integer + format: int64 + minimum: 0 + description: Minor faults that did not require loading a page from disk. + cminflt: + type: integer + format: int64 + minimum: 0 + description: Minor faults of the process's waited-for children. + majflt: + type: integer + format: int64 + minimum: 0 + description: Major faults that required loading a page from disk. + cmajflt: + type: integer + format: int64 + minimum: 0 + description: Major faults of the process's waited-for children. + utime: + type: integer + format: int64 + minimum: 0 + description: Time scheduled in user mode, in clock ticks. + stime: + type: integer + format: int64 + minimum: 0 + description: Time scheduled in kernel mode, in clock ticks. + cutime: + type: integer + format: int64 + description: Children's time scheduled in user mode, in clock ticks. + cstime: + type: integer + format: int64 + description: Children's time scheduled in kernel mode, in clock ticks. + priority: + type: integer + format: int64 + description: The scheduling priority. + nice: + type: integer + format: int64 + description: The nice value, ranging from -20 (high priority) to 19 (low priority). + num_threads: + type: integer + format: int64 + description: The number of threads in the process. + itrealvalue: + type: integer + format: int64 + description: Time in jiffies before the next SIGALRM. Obsolete and always 0 on modern kernels. + starttime: + type: integer + format: int64 + minimum: 0 + description: The time the process started after boot, in clock ticks. + vsize: + type: integer + format: int64 + minimum: 0 + description: The virtual memory size, in bytes. + rss: + type: integer + format: int64 + description: The resident set size, the number of pages the process has in real memory. + rsslim: + type: integer + format: int64 + minimum: 0 + description: The soft limit on the resident set size, in bytes. A very large sentinel value indicates no limit. + startcode: + type: integer + format: int64 + minimum: 0 + description: The address above which program text can run. + endcode: + type: integer + format: int64 + minimum: 0 + description: The address below which program text can run. + startstack: + type: integer + format: int64 + minimum: 0 + description: The address of the start (bottom) of the stack. + kstkesp: + type: integer + format: int64 + minimum: 0 + description: The current value of the stack pointer. + kstkeip: + type: integer + format: int64 + minimum: 0 + description: The current instruction pointer. + signal: + type: integer + format: int64 + minimum: 0 + description: Bitmap of pending signals. Obsolete; per-thread signal fields should be used instead. + blocked: + type: integer + format: int64 + minimum: 0 + description: Bitmap of blocked signals. + sigignore: + type: integer + format: int64 + minimum: 0 + description: Bitmap of ignored signals. + sigcatch: + type: integer + format: int64 + minimum: 0 + description: Bitmap of caught signals. + wchan: + type: integer + format: int64 + minimum: 0 + description: The address of the kernel function where the process is sleeping. + nswap: + type: integer + format: int64 + minimum: 0 + description: Pages swapped. Not maintained and always 0. + cnswap: + type: integer + format: int64 + minimum: 0 + description: Cumulative nswap of children. Not maintained and always 0. + exit_signal: + type: integer + format: int64 + description: The signal sent to the parent when this process dies. + processor: + type: integer + format: int64 + description: The CPU number the process last executed on. + rt_priority: + type: integer + format: int64 + minimum: 0 + description: The real-time scheduling priority. + policy: + type: integer + format: int64 + minimum: 0 + description: The scheduling policy. + delayacct_blkio_ticks: + type: integer + format: int64 + minimum: 0 + description: Aggregated block I/O delays, in clock ticks. + guest_time: + type: integer + format: int64 + minimum: 0 + description: Time spent running a virtual CPU for a guest, in clock ticks. + cguest_time: + type: integer + format: int64 + description: Children's guest time, in clock ticks. + start_data: + type: integer + format: int64 + minimum: 0 + description: The address above which program initialized and uninitialized (bss) data are placed. + end_data: + type: integer + format: int64 + minimum: 0 + description: The address below which program initialized and uninitialized (bss) data are placed. + start_brk: + type: integer + format: int64 + minimum: 0 + description: The address above which the program heap can be expanded with brk. + arg_start: + type: integer + format: int64 + minimum: 0 + description: The address above which the program command-line arguments are placed. + arg_end: + type: integer + format: int64 + minimum: 0 + description: The address below which the program command-line arguments are placed. + env_start: + type: integer + format: int64 + minimum: 0 + description: The address above which the program environment is placed. + env_end: + type: integer + format: int64 + minimum: 0 + description: The address below which the program environment is placed. + exit_code: + type: integer + format: int64 + description: The thread's exit status in the form reported by waitpid. diff --git a/components/schemas/infrastructure/servers/telemetry/ServerTelemetryProcesses.yml b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryProcesses.yml new file mode 100644 index 00000000..a5f3ce32 --- /dev/null +++ b/components/schemas/infrastructure/servers/telemetry/ServerTelemetryProcesses.yml @@ -0,0 +1,13 @@ +type: object +title: ServerTelemetryProcesses +description: Telemetry for the processes running on the server. +required: + - list +properties: + list: + type: + - array + - "null" + description: The list of processes observed on the server. + items: + $ref: ./ServerTelemetryProcess.yml diff --git a/components/schemas/infrastructure/servers/telemetry/ServerTelemetrySnapshot.yml b/components/schemas/infrastructure/servers/telemetry/ServerTelemetrySnapshot.yml index 1807b8ef..b65d1b94 100644 --- a/components/schemas/infrastructure/servers/telemetry/ServerTelemetrySnapshot.yml +++ b/components/schemas/infrastructure/servers/telemetry/ServerTelemetrySnapshot.yml @@ -1,14 +1,23 @@ type: object title: ServerTelemetrySnapshot -description: A single frame of server telemetry, pushed over the server telemetry websocket. +description: A single frame of server telemetry. required: - generated + - system - cpu - memory + - network + - disk + - processes properties: generated: $ref: "../../../DateTime.yml" description: The timestamp at which this telemetry snapshot was generated. + system: + description: General system telemetry. Null when system telemetry is unavailable. + oneOf: + - $ref: ./ServerTelemetrySystem.yml + - type: "null" cpu: description: CPU telemetry for this snapshot. Null when CPU telemetry is unavailable. oneOf: @@ -19,3 +28,18 @@ properties: oneOf: - $ref: ./ServerTelemetryMemory.yml - type: "null" + network: + description: Network telemetry for this snapshot. Null when network telemetry is unavailable. + oneOf: + - $ref: ./ServerTelemetryNetwork.yml + - type: "null" + disk: + description: Disk telemetry for this snapshot. Null when disk telemetry is unavailable. + oneOf: + - $ref: ./ServerTelemetryDisk.yml + - type: "null" + processes: + description: Process telemetry for this snapshot. Null when process telemetry is unavailable. + oneOf: + - $ref: ./ServerTelemetryProcesses.yml + - type: "null" diff --git a/components/schemas/infrastructure/servers/telemetry/ServerTelemetrySystem.yml b/components/schemas/infrastructure/servers/telemetry/ServerTelemetrySystem.yml new file mode 100644 index 00000000..2d2c9f1a --- /dev/null +++ b/components/schemas/infrastructure/servers/telemetry/ServerTelemetrySystem.yml @@ -0,0 +1,21 @@ +type: object +title: ServerTelemetrySystem +description: General system telemetry for the server. +required: + - uptime_seconds + - idle_seconds + - load_average +properties: + uptime_seconds: + type: number + format: double + minimum: 0 + description: Total time elapsed since the server booted, in seconds. + idle_seconds: + type: number + format: double + minimum: 0 + description: Total time all CPUs have spent idle since boot, in seconds. + load_average: + $ref: ./ServerTelemetryLoadAvg.yml + description: System load averages over the last 1, 5, and 15 minutes.