Skip to content
Merged
14 changes: 7 additions & 7 deletions cmd/flamegraph/flamegraph.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ const (
func init() {
Cmd.Flags().StringVar(&common.FlagInput, common.FlagInputName, "", "")
Cmd.Flags().StringSliceVar(&common.FlagFormat, common.FlagFormatName, []string{report.FormatAll}, "")
Cmd.Flags().IntVar(&flagDuration, flagDurationName, 30, "")
Cmd.Flags().IntVar(&flagDuration, flagDurationName, 0, "")
Cmd.Flags().IntVar(&flagFrequency, flagFrequencyName, 11, "")
Cmd.Flags().IntSliceVar(&flagPids, flagPidsName, nil, "")
Cmd.Flags().BoolVar(&flagNoSystemSummary, flagNoSystemSummaryName, false, "")
Expand Down Expand Up @@ -100,7 +100,7 @@ func getFlagGroups() []common.FlagGroup {
flags := []common.Flag{
{
Name: flagDurationName,
Help: "number of seconds to run the collection",
Help: "number of seconds to run the collection. If 0, the collection will run indefinitely. Ctrl+c to stop.",
},
{
Name: flagFrequencyName,
Expand Down Expand Up @@ -155,19 +155,19 @@ func validateFlags(cmd *cobra.Command, args []string) error {
return common.FlagValidationError(cmd, fmt.Sprintf("input file %s does not exist", common.FlagInput))
}
}
if flagDuration <= 0 {
return common.FlagValidationError(cmd, "duration must be greater than 0")
if flagDuration < 0 {
return common.FlagValidationError(cmd, "duration must be 0 or greater")
}
if flagFrequency <= 0 {
return common.FlagValidationError(cmd, "frequency must be greater than 0")
return common.FlagValidationError(cmd, "frequency must be 1 or greater")
}
for _, pid := range flagPids {
if pid < 0 {
return common.FlagValidationError(cmd, "PID must be greater than or equal to 0")
return common.FlagValidationError(cmd, "PID must be 0 or greater")
}
}
if flagMaxDepth < 0 {
return common.FlagValidationError(cmd, "max depth must be greater than or equal to 0")
return common.FlagValidationError(cmd, "max depth must be 0 or greater")
}
// common target flags
if err := common.ValidateTargetFlags(cmd); err != nil {
Expand Down
4 changes: 2 additions & 2 deletions cmd/metrics/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ func getUncoreDeviceIDs(isAMDArchitecture bool, scriptOutputs map[string]script.
// getCPUInfo - reads and returns all data from /proc/cpuinfo
func getCPUInfo(t target.Target) (cpuInfo []map[string]string, err error) {
cmd := exec.Command("cat", "/proc/cpuinfo")
stdout, stderr, exitcode, err := t.RunCommand(cmd, 0, true)
stdout, stderr, exitcode, err := t.RunCommand(cmd)
if err != nil {
err = fmt.Errorf("failed to get cpuinfo: %s, %d, %v", stderr, exitcode, err)
return
Expand All @@ -717,7 +717,7 @@ func getCPUInfo(t target.Target) (cpuInfo []map[string]string, err error) {
// getLscpu - runs lscpu on the target and returns the output
func getLscpu(t target.Target) (output string, err error) {
cmd := exec.Command("lscpu")
output, stderr, exitcode, err := t.RunCommand(cmd, 0, true)
output, stderr, exitcode, err := t.RunCommand(cmd)
if err != nil || exitcode != 0 {
err = fmt.Errorf("failed to run lscpu: %s, %d, %v", stderr, exitcode, err)
return
Expand Down
6 changes: 3 additions & 3 deletions cmd/metrics/nmi_watchdog.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func getNMIWatchdog(myTarget target.Target) (setting string, err error) {
return
}
cmd := exec.Command(sysctl, "kernel.nmi_watchdog") // #nosec G204 // nosemgrep
stdout, _, _, err := myTarget.RunCommand(cmd, 0, true)
stdout, _, _, err := myTarget.RunCommand(cmd)
if err != nil {
return
}
Expand Down Expand Up @@ -88,7 +88,7 @@ func setNMIWatchdog(myTarget target.Target, setting string, localTempDir string)
// findSysctl - gets a useable path to sysctl or error
func findSysctl(myTarget target.Target) (path string, err error) {
cmd := exec.Command("which", "sysctl")
stdout, _, _, err := myTarget.RunCommand(cmd, 0, true)
stdout, _, _, err := myTarget.RunCommand(cmd)
if err == nil {
//found it
path = strings.TrimSpace(stdout)
Expand All @@ -97,7 +97,7 @@ func findSysctl(myTarget target.Target) (path string, err error) {
// didn't find it on the path, try being specific
sbinPath := "/usr/sbin/sysctl"
cmd = exec.Command("which", sbinPath)
_, _, _, err = myTarget.RunCommand(cmd, 0, true)
_, _, _, err = myTarget.RunCommand(cmd)
if err == nil {
// found it
path = sbinPath
Expand Down
6 changes: 3 additions & 3 deletions cmd/metrics/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func GetHotProcesses(myTarget target.Target, maxProcesses int, filter string) (p
}
// run ps to get list of processes sorted by cpu utilization (descending)
cmd := exec.Command("ps", "-a", "-x", "-h", "-o", "pid,ppid,comm,cmd", "--sort=-%cpu")
stdout, stderr, exitcode, err := myTarget.RunCommand(cmd, 0, true)
stdout, stderr, exitcode, err := myTarget.RunCommand(cmd)
if err != nil {
err = fmt.Errorf("failed to get hot processes: %s, %d, %v", stderr, exitcode, err)
return
Expand Down Expand Up @@ -177,7 +177,7 @@ done | sort -nr | head -n %d

func processExists(myTarget target.Target, pid string) (exists bool) {
cmd := exec.Command("ps", "-p", pid)
_, _, _, err := myTarget.RunCommand(cmd, 0, true)
_, _, _, err := myTarget.RunCommand(cmd)
if err != nil {
exists = false
return
Expand All @@ -188,7 +188,7 @@ func processExists(myTarget target.Target, pid string) (exists bool) {

func getProcess(myTarget target.Target, pid string) (process Process, err error) {
cmd := exec.Command("ps", "-q", pid, "h", "-o", "pid,ppid,comm,cmd", "ww")
stdout, stderr, exitcode, err := myTarget.RunCommand(cmd, 0, true)
stdout, stderr, exitcode, err := myTarget.RunCommand(cmd)
if err != nil {
err = fmt.Errorf("failed to get process: %s, %d, %v", stderr, exitcode, err)
return
Expand Down
5 changes: 1 addition & 4 deletions cmd/telemetry/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ func init() {
Cmd.Flags().StringVar(&common.FlagInput, common.FlagInputName, "", "")
Cmd.Flags().BoolVar(&flagAll, flagAllName, true, "")
Cmd.Flags().StringSliceVar(&common.FlagFormat, common.FlagFormatName, []string{report.FormatAll}, "")
Cmd.Flags().IntVar(&flagDuration, flagDurationName, 30, "")
Cmd.Flags().IntVar(&flagDuration, flagDurationName, 0, "")
Cmd.Flags().IntVar(&flagInterval, flagIntervalName, 2, "")
Cmd.Flags().IntVar(&flagInstrMixPid, flagInstrMixPidName, 0, "")
Cmd.Flags().IntVar(&flagInstrMixFrequency, flagInstrMixFrequencyName, instrmixFrequencyDefaultSystemWide, "")
Expand Down Expand Up @@ -244,9 +244,6 @@ func validateFlags(cmd *cobra.Command, args []string) error {
if flagDuration < 0 {
return common.FlagValidationError(cmd, "duration must be 0 or greater")
}
if flagDuration == 0 && (cmd.Flags().Lookup(common.FlagTargetsFileName).Changed || cmd.Flags().Lookup(common.FlagTargetHostName).Changed) {
return common.FlagValidationError(cmd, "duration must be greater than 0 when collecting from a remote target")
}
if flagInstrMixFrequency < 100000 { // 100,000 instructions is the minimum frequency
return common.FlagValidationError(cmd, "instruction mix frequency must be 100,000 or greater to limit overhead")
}
Expand Down
118 changes: 98 additions & 20 deletions internal/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ package common
// SPDX-License-Identifier: BSD-3-Clause

import (
"context"
"errors"
"fmt"
"log/slog"
"os"
"os/exec"
"os/signal"
"path/filepath"
"perfspect/internal/progress"
Expand All @@ -20,6 +22,7 @@ import (
"perfspect/internal/util"
"strings"
"syscall"
"time"

"slices"

Expand Down Expand Up @@ -118,22 +121,6 @@ func (rc *ReportingCommand) Run() error {
localTempDir := appContext.LocalTempDir
outputDir := appContext.OutputDir
logFilePath := appContext.LogFilePath
// handle signals
// child processes will exit when the signals are received which will
// allow this app to exit normally
sigChannel := make(chan os.Signal, 1)
signal.Notify(sigChannel, syscall.SIGINT, syscall.SIGTERM)
go func() {
sig := <-sigChannel
slog.Info("received signal", slog.String("signal", sig.String()))
// when perfspect receives ctrl-c while in the shell, the shell makes sure to propogate the
// signal to all our children. But when perfspect is run in the background or disowned and
// then receives SIGINT, e.g., from a script, we need to send the signal to our children
err := util.SignalChildren(syscall.SIGINT)
if err != nil {
slog.Error("error sending signal to children", slog.String("error", err.Error()))
}
}()
// create output directory
err := util.CreateDirectoryIfNotExists(outputDir, 0755) // #nosec G301
if err != nil {
Expand All @@ -144,8 +131,8 @@ func (rc *ReportingCommand) Run() error {
return err
}

var orderedTargetScriptOutputs []TargetScriptOutputs
var myTargets []target.Target
var orderedTargetScriptOutputs []TargetScriptOutputs
if FlagInput != "" {
var err error
orderedTargetScriptOutputs, err = outputsFromInput(rc.Tables, rc.SummaryTableName)
Expand Down Expand Up @@ -203,6 +190,8 @@ func (rc *ReportingCommand) Run() error {
for i := len(indicesToRemove) - 1; i >= 0; i-- {
myTargets = slices.Delete(myTargets, indicesToRemove[i], indicesToRemove[i]+1)
}
// set up signal handler to help with cleaning up child processes on ctrl-c/SIGINT or SIGTERM
configureSignalHandler(myTargets, multiSpinner.Status)
// collect data from targets
orderedTargetScriptOutputs, err = outputsFromTargets(rc.Cmd, myTargets, rc.Tables, rc.ScriptParams, multiSpinner.Status, localTempDir)
if err != nil {
Expand Down Expand Up @@ -299,6 +288,94 @@ func (rc *ReportingCommand) Run() error {
return nil
}

// configureSignalHandler sets up a signal handler to catch SIGINT and SIGTERM
//
// When perfspect receives ctrl-c while in the shell, the shell propagates the
// signal to all our children. But when perfspect is run in the background or disowned and
// then receives SIGINT, e.g., from a script, we need to send the signal to our children
//
// Also, when running scripts in parallel using the parallel_master.sh script, we need to
// send the signal to the parallel_master.sh script on each target so that it can clean up
// its child processes. This is because the parallel_master.sh script is run in its own process group
// and does not receive the signal when perfspect receives it.
//
// Parameters:
// - myTargets: The list of targets to send the signal to.
// - statusFunc: A function to update the status of the progress indicator.
func configureSignalHandler(myTargets []target.Target, statusFunc progress.MultiSpinnerUpdateFunc) {
sigChannel := make(chan os.Signal, 1)
signal.Notify(sigChannel, syscall.SIGINT, syscall.SIGTERM)
go func() {
sig := <-sigChannel
slog.Debug("received signal", slog.String("signal", sig.String()))
// Scripts that are run in parallel using the parallel_master.sh script and a few other sequential scripts need to be handled specially
// because they are run in their own process group, we need to send the signal directly to the PID of the script.
// For every target, look for the primary_collection_script PID file and send SIGINT to it.
for _, t := range myTargets {
if statusFunc != nil {
_ = statusFunc(t.GetName(), "Signal received, cleaning up...")
}
pidFilePath := filepath.Join(t.GetTempDirectory(), "primary_collection_script.pid")
stdout, _, exitcode, err := t.RunCommandEx(exec.Command("cat", pidFilePath), 5, false, true) // #nosec G204
if err != nil {
slog.Error("error retrieving target primary_collection_script PID", slog.String("target", t.GetName()), slog.String("error", err.Error()))
}
if exitcode == 0 {
pidStr := strings.TrimSpace(stdout)
_, _, _, err := t.RunCommandEx(exec.Command("sudo", "kill", "-SIGINT", pidStr), 5, false, true) // #nosec G204
if err != nil {
slog.Error("error sending signal to target primary_collection_script", slog.String("target", t.GetName()), slog.String("error", err.Error()))
}
}
}
// now wait until all primary collection scripts have exited
slog.Debug("waiting for primary_collection_script scripts to exit")
for _, t := range myTargets {
// create a per-target timeout context
targetTimeout := 10 * time.Second
ctx, cancel := context.WithTimeout(context.Background(), targetTimeout)
timedOut := false
pidFilePath := filepath.Join(t.GetTempDirectory(), "primary_collection_script.pid")
for {
// check for timeout
select {
case <-ctx.Done():
if statusFunc != nil {
_ = statusFunc(t.GetName(), "cleanup timeout exceeded")
}
slog.Warn("signal handler cleanup timeout exceeded for target", slog.String("target", t.GetName()))
timedOut = true
default:
}
if timedOut {
break
}
// read the pid file
stdout, _, exitcode, err := t.RunCommandEx(exec.Command("cat", pidFilePath), 5, false, true) // #nosec G204
if err != nil || exitcode != 0 {
// pid file doesn't exist
break
}
pidStr := strings.TrimSpace(stdout)
// determine if the process still exists
_, _, exitcode, err = t.RunCommandEx(exec.Command("ps", "-p", pidStr), 5, false, true) // #nosec G204
if err != nil || exitcode != 0 {
break // process no longer exists, script has exited
}
// sleep for a short time before checking again
time.Sleep(500 * time.Millisecond)
}
cancel()
}

// send SIGINT to perfspect's children
err := util.SignalChildren(syscall.SIGINT)
if err != nil {
slog.Error("error sending signal to children", slog.String("error", err.Error()))
}
}()
}

// DefaultInsightsFunc returns the insights table values from the table values
func DefaultInsightsFunc(allTableValues []table.TableValues, scriptOutputs map[string]script.ScriptOutput) table.TableValues {
insightsTableValues := table.TableValues{
Expand Down Expand Up @@ -554,7 +631,8 @@ func outputsFromTargets(cmd *cobra.Command, myTargets []target.Target, tables []
scriptsToRunOnTarget = append(scriptsToRunOnTarget, script)
}
// run the selected scripts on the target
go collectOnTarget(target, scriptsToRunOnTarget, localTempDir, scriptParams["Duration"], cmd.Name() == "telemetry", channelTargetScriptOutputs, channelError, statusUpdate)
ctrlCToStop := cmd.Name() == "telemetry" || cmd.Name() == "flamegraph"
go collectOnTarget(target, scriptsToRunOnTarget, localTempDir, scriptParams["Duration"], ctrlCToStop, channelTargetScriptOutputs, channelError, statusUpdate)
}
// wait for scripts to run on all targets
var allTargetScriptOutputs []TargetScriptOutputs
Expand Down Expand Up @@ -631,10 +709,10 @@ func elevatedPrivilegesRequired(tables []table.TableDefinition) bool {
}

// collectOnTarget runs the scripts on the target and sends the results to the appropriate channels
func collectOnTarget(myTarget target.Target, scriptsToRun []script.ScriptDefinition, localTempDir string, duration string, isTelemetry bool, channelTargetScriptOutputs chan TargetScriptOutputs, channelError chan error, statusUpdate progress.MultiSpinnerUpdateFunc) {
func collectOnTarget(myTarget target.Target, scriptsToRun []script.ScriptDefinition, localTempDir string, duration string, ctrlCToStop bool, channelTargetScriptOutputs chan TargetScriptOutputs, channelError chan error, statusUpdate progress.MultiSpinnerUpdateFunc) {
// run the scripts on the target
status := "collecting data"
if isTelemetry && duration == "0" { // telemetry is the only command that uses this common code that can run indefinitely
if ctrlCToStop && duration == "0" {
status += ", press Ctrl+c to stop"
} else if duration != "0" && duration != "" {
status += fmt.Sprintf(" for %s seconds", duration)
Expand Down
12 changes: 6 additions & 6 deletions internal/common/targets.go
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ func parseMountOutput(mountOutput string) ([]mountRecord, error) {
// isDirNoExec checks if the target directory is on a file system that is mounted with noexec.
func isDirNoExec(t target.Target, dir string) (bool, error) {
dfCmd := exec.Command("df", "-P", dir)
dfOutput, _, _, err := t.RunCommand(dfCmd, 0, true)
dfOutput, _, _, err := t.RunCommand(dfCmd)
if err != nil {
err = fmt.Errorf("failed to run df command: %w", err)
return false, err
Expand All @@ -509,7 +509,7 @@ func isDirNoExec(t target.Target, dir string) (bool, error) {
return false, err
}
mountCmd := exec.Command("mount")
mountOutput, _, _, err := t.RunCommand(mountCmd, 0, true)
mountOutput, _, _, err := t.RunCommand(mountCmd)
if err != nil {
err = fmt.Errorf("failed to run mount command: %w", err)
return false, err
Expand Down Expand Up @@ -553,7 +553,7 @@ func GetTargetVendor(t target.Target) (string, error) {
if vendor == "" {
cmd := exec.Command("bash", "-c", "lscpu | grep -i \"^Vendor ID:\" | awk '{print $NF}'")
var err error
vendor, _, _, err = t.RunCommand(cmd, 0, true)
vendor, _, _, err = t.RunCommand(cmd)
if err != nil {
return "", fmt.Errorf("failed to get target CPU vendor: %v", err)
}
Expand All @@ -568,7 +568,7 @@ func GetTargetFamily(t target.Target) (string, error) {
if family == "" {
cmd := exec.Command("bash", "-c", "lscpu | grep -i \"^CPU family:\" | awk '{print $NF}'")
var err error
family, _, _, err = t.RunCommand(cmd, 0, true)
family, _, _, err = t.RunCommand(cmd)
if err != nil {
return "", fmt.Errorf("failed to get target CPU family: %v", err)
}
Expand All @@ -583,7 +583,7 @@ func GetTargetModel(t target.Target) (string, error) {
if model == "" {
cmd := exec.Command("bash", "-c", "lscpu | grep -i \"^Model:\" | awk '{print $NF}'")
var err error
model, _, _, err = t.RunCommand(cmd, 0, true)
model, _, _, err = t.RunCommand(cmd)
if err != nil {
return "", fmt.Errorf("failed to get target CPU model: %v", err)
}
Expand All @@ -598,7 +598,7 @@ func GetTargetStepping(t target.Target) (string, error) {
if stepping == "" {
cmd := exec.Command("bash", "-c", "lscpu | grep -i \"^Stepping:\" | awk '{print $NF}'")
var err error
stepping, _, _, err = t.RunCommand(cmd, 0, true)
stepping, _, _, err = t.RunCommand(cmd)
if err != nil {
return "", fmt.Errorf("failed to get target CPU stepping: %v", err)
}
Expand Down
Loading