From dd84b9d64fb98746a230cd24233ff50a562c39c9 Mon Sep 17 00:00:00 2001 From: 简律纯 Date: Fri, 28 Apr 2023 01:36:44 +0800 Subject: --- cli/internal/process/child.go | 406 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 406 insertions(+) create mode 100644 cli/internal/process/child.go (limited to 'cli/internal/process/child.go') diff --git a/cli/internal/process/child.go b/cli/internal/process/child.go new file mode 100644 index 0000000..1c3e6e7 --- /dev/null +++ b/cli/internal/process/child.go @@ -0,0 +1,406 @@ +package process + +/** + * Code in this file is based on the source code at + * https://github.com/hashicorp/consul-template/tree/3ea7d99ad8eff17897e0d63dac86d74770170bb8/child/child.go + * + * Major changes include removing the ability to restart a child process, + * requiring a fully-formed exec.Cmd to be passed in, and including cmd.Dir + * in the description of a child process. + */ + +import ( + "errors" + "fmt" + "math/rand" + "os" + "os/exec" + "strings" + "sync" + "syscall" + "time" + + "github.com/hashicorp/go-hclog" +) + +func init() { + // Seed the default rand Source with current time to produce better random + // numbers used with splay + rand.Seed(time.Now().UnixNano()) +} + +var ( + // ErrMissingCommand is the error returned when no command is specified + // to run. + ErrMissingCommand = errors.New("missing command") + + // ExitCodeOK is the default OK exit code. + ExitCodeOK = 0 + + // ExitCodeError is the default error code returned when the child exits with + // an error without a more specific code. + ExitCodeError = 127 +) + +// Child is a wrapper around a child process which can be used to send signals +// and manage the processes' lifecycle. +type Child struct { + sync.RWMutex + + timeout time.Duration + + killSignal os.Signal + killTimeout time.Duration + + splay time.Duration + + // cmd is the actual child process under management. + cmd *exec.Cmd + + // exitCh is the channel where the processes exit will be returned. + exitCh chan int + + // stopLock is the mutex to lock when stopping. stopCh is the circuit breaker + // to force-terminate any waiting splays to kill the process now. stopped is + // a boolean that tells us if we have previously been stopped. + stopLock sync.RWMutex + stopCh chan struct{} + stopped bool + + // whether to set process group id or not (default on) + setpgid bool + + Label string + + logger hclog.Logger +} + +// NewInput is input to the NewChild function. +type NewInput struct { + // Cmd is the unstarted, preconfigured command to run + Cmd *exec.Cmd + + // Timeout is the maximum amount of time to allow the command to execute. If + // set to 0, the command is permitted to run infinitely. + Timeout time.Duration + + // KillSignal is the signal to send to gracefully kill this process. This + // value may be nil. + KillSignal os.Signal + + // KillTimeout is the amount of time to wait for the process to gracefully + // terminate before force-killing. + KillTimeout time.Duration + + // Splay is the maximum random amount of time to wait before sending signals. + // This option helps reduce the thundering herd problem by effectively + // sleeping for a random amount of time before sending the signal. This + // prevents multiple processes from all signaling at the same time. This value + // may be zero (which disables the splay entirely). + Splay time.Duration + + // Logger receives debug log lines about the process state and transitions + Logger hclog.Logger +} + +// New creates a new child process for management with high-level APIs for +// sending signals to the child process, restarting the child process, and +// gracefully terminating the child process. +func newChild(i NewInput) (*Child, error) { + // exec.Command prepends the command to be run to the arguments list, so + // we only need the arguments here, it will include the command itself. + label := fmt.Sprintf("(%v) %v", i.Cmd.Dir, strings.Join(i.Cmd.Args, " ")) + child := &Child{ + cmd: i.Cmd, + timeout: i.Timeout, + killSignal: i.KillSignal, + killTimeout: i.KillTimeout, + splay: i.Splay, + stopCh: make(chan struct{}, 1), + setpgid: true, + Label: label, + logger: i.Logger.Named(label), + } + + return child, nil +} + +// ExitCh returns the current exit channel for this child process. This channel +// may change if the process is restarted, so implementers must not cache this +// value. +func (c *Child) ExitCh() <-chan int { + c.RLock() + defer c.RUnlock() + return c.exitCh +} + +// Pid returns the pid of the child process. If no child process exists, 0 is +// returned. +func (c *Child) Pid() int { + c.RLock() + defer c.RUnlock() + return c.pid() +} + +// Command returns the human-formatted command with arguments. +func (c *Child) Command() string { + return c.Label +} + +// Start starts and begins execution of the child process. A buffered channel +// is returned which is where the command's exit code will be returned upon +// exit. Any errors that occur prior to starting the command will be returned +// as the second error argument, but any errors returned by the command after +// execution will be returned as a non-zero value over the exit code channel. +func (c *Child) Start() error { + // log.Printf("[INFO] (child) spawning: %s", c.Command()) + c.Lock() + defer c.Unlock() + return c.start() +} + +// Signal sends the signal to the child process, returning any errors that +// occur. +func (c *Child) Signal(s os.Signal) error { + c.logger.Debug("receiving signal %q", s.String()) + c.RLock() + defer c.RUnlock() + return c.signal(s) +} + +// Kill sends the kill signal to the child process and waits for successful +// termination. If no kill signal is defined, the process is killed with the +// most aggressive kill signal. If the process does not gracefully stop within +// the provided KillTimeout, the process is force-killed. If a splay was +// provided, this function will sleep for a random period of time between 0 and +// the provided splay value to reduce the thundering herd problem. This function +// does not return any errors because it guarantees the process will be dead by +// the return of the function call. +func (c *Child) Kill() { + c.logger.Debug("killing process") + c.Lock() + defer c.Unlock() + c.kill(false) +} + +// Stop behaves almost identical to Kill except it suppresses future processes +// from being started by this child and it prevents the killing of the child +// process from sending its value back up the exit channel. This is useful +// when doing a graceful shutdown of an application. +func (c *Child) Stop() { + c.internalStop(false) +} + +// StopImmediately behaves almost identical to Stop except it does not wait +// for any random splay if configured. This is used for performing a fast +// shutdown of consul-template and its children when a kill signal is received. +func (c *Child) StopImmediately() { + c.internalStop(true) +} + +func (c *Child) internalStop(immediately bool) { + c.Lock() + defer c.Unlock() + + c.stopLock.Lock() + defer c.stopLock.Unlock() + if c.stopped { + return + } + c.kill(immediately) + close(c.stopCh) + c.stopped = true +} + +func (c *Child) start() error { + setSetpgid(c.cmd, c.setpgid) + if err := c.cmd.Start(); err != nil { + return err + } + + // Create a new exitCh so that previously invoked commands (if any) don't + // cause us to exit, and start a goroutine to wait for that process to end. + exitCh := make(chan int, 1) + go func() { + var code int + // It's possible that kill is called before we even + // manage to get here. Make sure we still have a valid + // cmd before waiting on it. + c.RLock() + var cmd = c.cmd + c.RUnlock() + var err error + if cmd != nil { + err = cmd.Wait() + } + if err == nil { + code = ExitCodeOK + } else { + code = ExitCodeError + if exiterr, ok := err.(*exec.ExitError); ok { + if status, ok := exiterr.Sys().(syscall.WaitStatus); ok { + code = status.ExitStatus() + } + } + } + + // If the child is in the process of killing, do not send a response back + // down the exit channel. + c.stopLock.RLock() + defer c.stopLock.RUnlock() + if !c.stopped { + select { + case <-c.stopCh: + case exitCh <- code: + } + } + + close(exitCh) + }() + + c.exitCh = exitCh + + // If a timeout was given, start the timer to wait for the child to exit + if c.timeout != 0 { + select { + case code := <-exitCh: + if code != 0 { + return fmt.Errorf( + "command exited with a non-zero exit status:\n"+ + "\n"+ + " %s\n"+ + "\n"+ + "This is assumed to be a failure. Please ensure the command\n"+ + "exits with a zero exit status.", + c.Command(), + ) + } + case <-time.After(c.timeout): + // Force-kill the process + c.stopLock.Lock() + defer c.stopLock.Unlock() + if c.cmd != nil && c.cmd.Process != nil { + c.cmd.Process.Kill() + } + + return fmt.Errorf( + "command did not exit within %q:\n"+ + "\n"+ + " %s\n"+ + "\n"+ + "Commands must exit in a timely manner in order for processing to\n"+ + "continue. Consider using a process supervisor or utilizing the\n"+ + "built-in exec mode instead.", + c.timeout, + c.Command(), + ) + } + } + + return nil +} + +func (c *Child) pid() int { + if !c.running() { + return 0 + } + return c.cmd.Process.Pid +} + +func (c *Child) signal(s os.Signal) error { + if !c.running() { + return nil + } + + sig, ok := s.(syscall.Signal) + if !ok { + return fmt.Errorf("bad signal: %s", s) + } + pid := c.cmd.Process.Pid + if c.setpgid { + // kill takes negative pid to indicate that you want to use gpid + pid = -(pid) + } + // cross platform way to signal process/process group + p, err := os.FindProcess(pid) + if err != nil { + return err + } + return p.Signal(sig) +} + +// kill sends the signal to kill the process using the configured signal +// if set, else the default system signal +func (c *Child) kill(immediately bool) { + + if !c.running() { + c.logger.Debug("Kill() called but process dead; not waiting for splay.") + return + } else if immediately { + c.logger.Debug("Kill() called but performing immediate shutdown; not waiting for splay.") + } else { + c.logger.Debug("Kill(%v) called", immediately) + select { + case <-c.stopCh: + case <-c.randomSplay(): + } + } + + var exited bool + defer func() { + if !exited { + c.logger.Debug("PKill") + c.cmd.Process.Kill() + } + c.cmd = nil + }() + + if c.killSignal == nil { + return + } + + if err := c.signal(c.killSignal); err != nil { + c.logger.Debug("Kill failed: %s", err) + if processNotFoundErr(err) { + exited = true // checked in defer + } + return + } + + killCh := make(chan struct{}, 1) + go func() { + defer close(killCh) + c.cmd.Process.Wait() + }() + + select { + case <-c.stopCh: + case <-killCh: + exited = true + case <-time.After(c.killTimeout): + c.logger.Debug("timeout") + } +} + +func (c *Child) running() bool { + select { + case <-c.exitCh: + return false + default: + } + return c.cmd != nil && c.cmd.Process != nil +} + +func (c *Child) randomSplay() <-chan time.Time { + if c.splay == 0 { + return time.After(0) + } + + ns := c.splay.Nanoseconds() + offset := rand.Int63n(ns) + t := time.Duration(offset) + + c.logger.Debug("waiting %.2fs for random splay", t.Seconds()) + + return time.After(t) +} -- cgit v1.2.3-70-g09d2