buildpack/vendor/autossh/autossh.c
2020-02-14 19:40:36 +02:00

1822 lines
42 KiB
C

/*
* Start an ssh session (or tunnel) and monitor it.
* If it fails or blocks, restart it.
*
* From the example of rstunnel.
*
* Copyright (c) Carson Harding, 2002-2018.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are freely permitted.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $Id: autossh.c,v 1.91 2019/01/05 01:23:39 harding Exp $
*
*/
#include "config.h"
#include <sys/types.h>
#include <sys/time.h>
#ifndef HAVE_SOCKLEN_T
typedef int32_t socklen_t;
#endif
#include <sys/socket.h>
#include <sys/utsname.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <fcntl.h>
#include <netdb.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <limits.h>
#include <sys/wait.h>
#include <setjmp.h>
#include <stdarg.h>
#include <syslog.h>
#include <time.h>
#include <errno.h>
#ifndef HAVE_POLL
# ifdef HAVE_SELECT
# include "fakepoll.h"
# else
# error "System lacks both select() and poll()!"
# endif
#else
# include <poll.h>
#endif
#ifndef __attribute__
# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 8) || __STRICT_ANSI__
# define __attribute__(x)
# endif
#endif
#ifndef _PATH_DEVNULL
# define _PATH_DEVNULL "/dev/null"
#endif
#ifndef HAVE_DAEMON
# include "daemon.h"
#endif
#ifdef HAVE___PROGNAME
extern char *__progname;
#else
char *__progname;
#endif
const char *rcsid = "$Id: autossh.c,v 1.91 2019/01/05 01:23:39 harding Exp $";
#ifndef SSH_PATH
# define SSH_PATH "/usr/bin/ssh"
#endif
#define POLL_TIME 600 /* 10 minutes default */
#define GATE_TIME 30 /* 30 seconds default */
#define MAX_LIFETIME 0 /* default max lifetime of forever */
#define TIMEO_NET 15000 /* poll on accept() and io (msecs) */
#define MAX_CONN_TRIES 3 /* how many attempts */
#define MAX_START (-1) /* max # of runs; <0 == forever */
#define MAX_MESSAGE 64 /* max length of message we can add */
#define P_CONTINUE 0 /* continue monitoring */
#define P_RESTART 1 /* restart ssh process */
#define P_EXITOK 2 /* exit ok */
#define P_EXITERR 3 /* exit with error */
#define L_FILELOG 0x01 /* log to file */
#define L_SYSLOG 0x02 /* log to syslog */
#define NO_RD_SOCK -2 /* magic flag for echo: no read socket */
#define N_FAST_TRIES 5 /* try this many times fast before slowing */
#define OPTION_STRING "M:V1246ab:c:e:fgi:kl:m:no:p:qstvw:xyACD:E:F:GI:MJKL:NO:PQ:R:S:TW:XY"
int logtype = L_SYSLOG; /* default log to syslog */
int loglevel = LOG_INFO; /* default loglevel */
int syslog_perror; /* use PERROR option? */
FILE *flog; /* log file */
char *writep; /* write port as string */
char readp[16]; /* read port as string */
char *echop; /* echo port as string */
char *mhost = "127.0.0.1"; /* host in port forwards */
char *env_port; /* port spec'd in environment */
char *echo_message = ""; /* message to append to echo string */
char *pid_file_name; /* path to pid file */
int pid_file_created; /* we have created pid file */
time_t pid_start_time; /* time autossh process started */
int poll_time = POLL_TIME; /* default connection poll time */
int first_poll_time = POLL_TIME; /* initial connection poll time */
double gate_time = GATE_TIME; /* time to "make it out of the gate" */
int max_start = MAX_START; /* how many times to run (default no limit) */
double max_lifetime = MAX_LIFETIME; /* how long can the process/daemon live */
int net_timeout = TIMEO_NET; /* timeout on network data */
char *ssh_path = SSH_PATH; /* default path to ssh */
int start_count; /* # of times exec()d ssh */
time_t start_time; /* time we exec()d ssh */
#if defined(__CYGWIN__)
int ntservice; /* set some stuff for running as nt service */
#endif
int newac; /* argc, argv for ssh */
char **newav;
#define START_AV_SZ 16
int cchild; /* current child */
volatile sig_atomic_t exit_signalled; /* signalled outside of monitor loop */
volatile sig_atomic_t restart_ssh; /* signalled to restart ssh child */
volatile sig_atomic_t dolongjmp;
sigjmp_buf jumpbuf;
void usage(int code) __attribute__ ((__noreturn__));
void get_env_args(void);
void add_arg(char *s);
void strip_arg(char *arg, char ch, char *opts);
int ssh_run(int sock, char **argv);
int ssh_watch(int sock);
int ssh_wait(int options);
void ssh_kill(void);
int conn_test(int sock, char *host, char *write_port);
int conn_poll_for_accept(int sock, struct pollfd *pfd);
int conn_send_and_receive(char *rp, char *wp, size_t len,
struct pollfd *pfd, int ntopoll);
#ifndef HAVE_ADDRINFO
void conn_addr(char *host, char *port, struct sockaddr_in *resp);
#else
void conn_addr(char *host, char *port, struct addrinfo **resp);
#endif
int conn_listen(char *host, char *port);
int conn_remote(char *host, char *port);
void grace_time(time_t last_start);
void unlink_pid_file(void);
void errlog(int level, char *fmt, ...)
__attribute__ ((__format__ (__printf__, 2, 3)));
void xerrlog(int level, char *fmt, ...)
__attribute__ ((__format__ (__printf__, 2, 3)));
void doerrlog(int level, char *fmt, va_list ap);
char *timestr(void);
void set_exit_sig_handler(void);
void set_sig_handlers(void);
void unset_sig_handlers(void);
void sig_catch(int sig);
int exceeded_lifetime(void);
unsigned int clear_alarm_timer(void);
void
usage(int code)
{
fprintf(code ? stderr : stdout,
"usage: %s [-V] [-M monitor_port[:echo_port]] [-f] [SSH_OPTIONS]\n",
__progname);
if (code) {
fprintf(stderr, "\n");
fprintf(stderr,
" -M specifies monitor port. May be overridden by"
" environment\n"
" variable AUTOSSH_PORT. 0 turns monitoring"
" loop off.\n"
" Alternatively, a port for an echo service on"
" the remote\n"
" machine may be specified. (Normally port 7.)\n");
fprintf(stderr,
" -f run in background (autossh handles this, and"
" does not\n"
" pass it to ssh.)\n");
fprintf(stderr,
" -V print autossh version and exit.\n");
fprintf(stderr, "\n");
fprintf(stderr, "Environment variables are:\n");
fprintf(stderr,
" AUTOSSH_GATETIME "
"- how long must an ssh session be established\n"
" "
" before we decide it really was established\n"
" "
" (in seconds). Default is %d seconds; use of -f\n"
" "
" flag sets this to 0.\n", GATE_TIME);
fprintf(stderr,
" AUTOSSH_LOGFILE "
"- file to log to (default is to use the syslog\n"
" "
" facility)\n");
fprintf(stderr,
" AUTOSSH_LOGLEVEL "
"- level of log verbosity\n");
fprintf(stderr,
" AUTOSSH_MAXLIFETIME "
"- set the maximum time to live (seconds)\n");
fprintf(stderr,
" AUTOSSH_MAXSTART "
"- max times to restart (default is no limit)\n");
fprintf(stderr,
" AUTOSSH_MESSAGE "
"- message to append to echo string (max 64 bytes)\n");
#if defined(__CYGWIN__)
fprintf(stderr,
" AUTOSSH_NTSERVICE "
"- tweak some things for running under cygrunsrv\n");
#endif
fprintf(stderr,
" AUTOSSH_PATH "
"- path to ssh if not default\n");
fprintf(stderr,
" AUTOSSH_PIDFILE "
"- write pid to this file\n");
fprintf(stderr,
" AUTOSSH_POLL "
"- how often to check the connection (seconds)\n");
fprintf(stderr,
" AUTOSSH_FIRST_POLL "
"- time before first connection check (seconds)\n");
fprintf(stderr,
" AUTOSSH_PORT "
"- port to use for monitor connection\n");
fprintf(stderr,
" AUTOSSH_DEBUG "
"- turn logging to maximum verbosity and log to\n"
" "
" stderr\n");
fprintf(stderr, "\n");
}
exit(code);
}
int
main(int argc, char **argv)
{
int i;
int n;
int ch;
char *s;
int wp, rp, ep = 0;
char wmbuf[256], rmbuf[256];
FILE *pid_file;
int retval = 0;
int sock = -1;
int done_fwds = 0;
int runasdaemon = 0;
int sawargstop = 0;
#if defined(__CYGWIN__)
int sawoptionn = 0;
#endif
#ifndef HAVE___PROGNAME
__progname = "autossh";
#endif
/*
* set up options from environment
*/
get_env_args();
/*
* We accept all ssh args, and quietly pass them on
* to ssh when we call it.
*/
while ((ch = getopt(argc, argv, OPTION_STRING)) != -1) {
switch(ch) {
case 'M':
if (!env_port)
writep = optarg;
break;
case 'V':
fprintf(stdout, "%s %s\n", __progname, VER);
exit(0);
break;
case 'f':
runasdaemon = 1;
break;
#if defined(__CYGWIN__)
case 'N':
sawoptionn = 1;
break;
#endif
case '?':
usage(1);
break;
default:
/* other options get passed to ssh */
break;
}
}
/* if we got it from the environment */
if (env_port)
writep = env_port;
/*
* We must at least have a monitor port and a remote host.
*/
if (!writep || argc == optind)
usage(1);
if (logtype & L_SYSLOG)
openlog(__progname, LOG_PID|syslog_perror, LOG_USER);
/*
* Check for echo port
*/
if ((s = strchr(writep, ':')) != NULL) {
*s = '\0';
echop = s + 1;
ep = strtoul(echop, &s, 0);
if (*echop == '\0' || *s != '\0' || ep == 0)
xerrlog(LOG_ERR, "invalid echo port \"%s\"", echop);
}
/*
* Check, and get the read port (write port + 1);
* then construct port-forwarding arguments for ssh.
*/
wp = strtoul(writep, &s, 0);
if (*writep == '\0' || *s != '\0')
xerrlog(LOG_ERR, "invalid port \"%s\"", writep);
if (wp == 0) {
errlog(LOG_INFO, "port set to 0, monitoring disabled");
writep = NULL;
}
else if (wp > 65534 || wp < 0)
xerrlog(LOG_ERR, "monitor port (%d) out of range", wp);
else {
rp = wp+1;
/* all this for solaris; we could use asprintf() */
(void)snprintf(readp, sizeof(readp), "%d", rp);
/* port-forward arg strings */
n = snprintf(wmbuf, sizeof(wmbuf), "%d:%s:%d", wp, mhost,
echop ? ep : wp);
if (n > sizeof(wmbuf))
xerrlog(LOG_ERR,
"overflow building forwarding string");
if (!echop) {
n = snprintf(rmbuf, sizeof(rmbuf), "%d:%s:%d",
wp, mhost, rp);
if (n > sizeof(rmbuf))
xerrlog(LOG_ERR,
"overflow building forwarding string");
}
}
/*
* Adjust timeouts if necessary: net_timeout is first
* the timeout for accept and then for io, so if the
* poll_time is set less than 2 timeouts, the timeouts need
* to be adjusted to be at least 1/2. Perhaps there should be
* be some padding here as well....
*/
if ((poll_time * 1000) / 2 < net_timeout) {
net_timeout = (poll_time * 1000) / 2;
errlog(LOG_INFO,
"short poll time: adjusting net timeouts to %d",
net_timeout);
}
/*
* Build a new arg list, skipping -f, -M and inserting
* port forwards.
*/
add_arg(ssh_path);
#if defined(__CYGWIN__)
if (ntservice && !sawoptionn)
add_arg("-N");
#endif
for (i = 1; i < argc; i++) {
/*
* We step past the first '--', taking it as ours
* (autossh's). Any further ones we pass to ssh.
*/
if (argv[i][0] == '-' && argv[i][1] == '-') {
if (!sawargstop) {
sawargstop = 1;
continue;
}
}
if (wp && env_port && !done_fwds) {
add_arg("-L");
add_arg(wmbuf);
if (!echop) {
add_arg("-R");
add_arg(rmbuf);
}
done_fwds = 1;
} else if (!sawargstop && argv[i][0] == '-' && argv[i][1] == 'M') {
if (argv[i][2] == '\0')
i++;
if (wp && !done_fwds) {
add_arg("-L");
add_arg(wmbuf);
if (!echop) {
add_arg("-R");
add_arg(rmbuf);
}
done_fwds = 1;
}
continue;
}
/* look for -f in option args and strip out */
strip_arg(argv[i], 'f', OPTION_STRING);
add_arg(argv[i]);
}
if (runasdaemon) {
if (daemon(0, 0) == -1) {
xerrlog(LOG_ERR, "run as daemon failed: %s",
strerror(errno));
}
/*
* If running as daemon, the user likely wants it
* to just run and not fail early (perhaps machines
* are coming up, etc.)
*/
gate_time = 0;
}
/*
* Only if we're doing the network monitor thing.
* Socket once opened stays open for listening for
* the duration of the program.
*/
if (writep) {
if (!echop) {
sock = conn_listen(mhost, readp);
/* set close-on-exec */
(void)fcntl(sock, F_SETFD, FD_CLOEXEC);
} else
sock = NO_RD_SOCK;
}
if (pid_file_name) {
pid_file = fopen(pid_file_name, "w");
if (!pid_file) {
xerrlog(LOG_ERR, "cannot open pid file \"%s\": %s",
pid_file_name, strerror(errno));
}
pid_file_created = 1;
atexit(unlink_pid_file);
if (fprintf(pid_file, "%d\n", (int)getpid()) == 0)
xerrlog(LOG_ERR, "write failed to pid file \"%s\": %s",
pid_file_name, strerror(errno));
fflush(pid_file);
fclose(pid_file);
}
retval = ssh_run(sock, newav);
if (sock >= 0) {
shutdown(sock, SHUT_RDWR);
close(sock);
}
if (logtype & L_SYSLOG)
closelog();
if (retval == P_EXITERR)
exit(1);
exit(0);
}
/*
* Add an argument to the argument array.
*/
void
add_arg(char *s)
{
char *p;
size_t len;
static size_t newamax = START_AV_SZ;
len = strlen(s);
if (len == 0)
return;
if (!newav) {
newav = calloc(START_AV_SZ, sizeof(char *));
if (!newav)
xerrlog(LOG_ERR, "malloc: %s", strerror(errno));
} else if (newac >= newamax-1) {
newamax *= 2;
newav = realloc(newav, newamax * sizeof(char *));
if (!newav)
xerrlog(LOG_ERR, "realloc: %s", strerror(errno));
}
p = malloc(len+1);
if (!p) xerrlog(LOG_ERR, "malloc: %s", strerror(errno));
memmove(p, s, len);
p[len] = '\0';
newav[newac++] = p;
newav[newac] = NULL;
return;
}
/*
* strip an argument option from an option string; strings that
* end up with just a '-' become zero length (add_arg() will
* skip them). An option that enters as '-' is untouched.
*
*/
void
strip_arg(char *arg, char ch, char *opts)
{
char *f, *o;
size_t len;
if (arg[0] == '-' && arg[1] != '\0') {
for (len = strlen(arg), f = arg; *f != '\0'; f++, len--) {
/*
* If f in option string and next char is ':' then
* what follows is a parameter to the flag, and
* what we're stripping may be valid in it. We do
* not validate f in opts: that is really someone
* else's job, and the options may change. In that
* case, this provides a best effort. This is
* terribly inefficient.
*/
if ((o = strchr(opts, *f)) != NULL) {
if (*(o+1) == ':')
return;
}
if (*f == ch)
(void)memmove(f, f+1, len);
}
/* left with "-" alone? then truncate */
if (arg[1] == '\0')
arg[0] = '\0';
}
return;
}
/*
* Ugly, but as we've used so many command args...
*/
void
get_env_args(void)
{
char *s;
char *t;
if ((s = getenv("AUTOSSH_PATH")) != NULL)
ssh_path = s;
if ((s = getenv("AUTOSSH_DEBUG")) != NULL) {
#ifdef HAVE_LOG_PERROR
syslog_perror = LOG_PERROR;
#else
syslog_perror = 0;
logtype |= L_FILELOG;
flog = stderr;
#endif
loglevel = LOG_DEBUG;
} else if ((s = getenv("AUTOSSH_LOGLEVEL")) != NULL) {
loglevel = strtoul(s, &t, 0);
if (*s == '\0' || *t != '\0' ||
loglevel < LOG_EMERG || loglevel > LOG_DEBUG)
xerrlog(LOG_ERR, "invalid log level \"%s\"", s);
}
if ((s = getenv("AUTOSSH_POLL")) != NULL) {
poll_time = strtoul(s, &t, 0);
if (*s == '\0' || poll_time == 0 || *t != '\0' )
xerrlog(LOG_ERR,
"invalid poll time \"%s\"", s);
if (poll_time <= 0)
poll_time = POLL_TIME;
}
if ((s = getenv("AUTOSSH_FIRST_POLL")) != NULL) {
first_poll_time = strtoul(s, &t, 0);
if (*s == '\0' || first_poll_time == 0 || *t != '\0' )
xerrlog(LOG_ERR,
"invalid first poll time \"%s\"", s);
if (first_poll_time <= 0)
first_poll_time = POLL_TIME;
} else {
/*
* If first poll time not explicitly set, first
* poll time should equal poll time.
*/
first_poll_time = poll_time;
}
if ((s = getenv("AUTOSSH_GATETIME")) != NULL) {
gate_time = (double)strtol(s, &t, 0);
if (*s == '\0' || gate_time < 0 || *t != '\0' )
xerrlog(LOG_ERR, "invalid gate time \"%s\"", s);
}
if ((s = getenv("AUTOSSH_MAXSTART")) != NULL) {
max_start = (int)strtol(s, &t, 0);
if (*s == '\0' || max_start < -1 || *t != '\0')
xerrlog(LOG_ERR, "invalid max start number \"%s\"", s);
}
if ((s = getenv("AUTOSSH_MESSAGE")) != NULL) {
if (*s != '\0')
echo_message = s;
if (strlen(echo_message) > MAX_MESSAGE)
xerrlog(LOG_ERR, "echo message may only be %d bytes long",
MAX_MESSAGE);
}
if ((s = getenv("AUTOSSH_PORT")) != NULL)
if (*s != '\0')
env_port = s;
if ((s = getenv("AUTOSSH_MAXLIFETIME")) != NULL) {
max_lifetime = (double)strtoul(s, &t, 0);
if (*s == '\0' || *t != '\0' )
xerrlog(LOG_ERR,
"invalid max lifetime \"%s\"", s);
/* can't really be < 0, as converted as unsigned long */
if (max_lifetime <= 0 )
max_lifetime = MAX_LIFETIME;
else {
if (poll_time > max_lifetime) {
errlog( LOG_INFO,
"poll time is greater than lifetime,"
" dropping poll time to %.0f", max_lifetime );
poll_time = max_lifetime;
}
if (first_poll_time > max_lifetime) {
errlog( LOG_INFO,
"first poll time is greater than lifetime,"
" dropping first poll time to %.0f", max_lifetime );
first_poll_time = max_lifetime;
}
time(&pid_start_time);
}
}
if ((s = getenv("AUTOSSH_PIDFILE")) != NULL)
if (*s != '\0')
pid_file_name = s;
#if defined(__CYGWIN__)
if ((s = getenv("AUTOSSH_NTSERVICE")) != NULL) {
if (*s != '\0' && strncasecmp("yes", s, strlen(s)) == 0) {
ntservice = 1;
logtype = L_FILELOG;
flog = stdout;
}
}
#endif
/*
* Look for this after nt service; in case we may wish to log
* elsewhere than stdout when running under cygrunsrv.
*/
if ((s = getenv("AUTOSSH_LOGFILE")) != NULL) {
flog = fopen(s, "a");
if (!flog)
xerrlog(LOG_ERR, "%s: %s", s, strerror(errno));
logtype = L_FILELOG;
}
return;
}
/*
* Run ssh
*/
int
ssh_run(int sock, char **av)
{
int retval;
struct timeval tv;
/*
* There are much better things. and we all wait
* for solaris to get /dev/random.
*/
gettimeofday(&tv, NULL);
srandom(getpid() ^ tv.tv_usec ^ tv.tv_sec);
set_exit_sig_handler();
while (max_start < 0 || start_count < max_start) {
if (exceeded_lifetime())
return P_EXITOK;
restart_ssh = 0;
start_count++;
grace_time(start_time);
if (exit_signalled) {
errlog(LOG_ERR, "signalled to exit");
return P_EXITERR;
}
time(&start_time);
if (max_start < 0)
errlog(LOG_INFO, "starting ssh (count %d)",
start_count);
else
errlog(LOG_INFO, "starting ssh (count %d of %d)",
start_count, max_start);
cchild = fork();
switch (cchild) {
case 0:
errlog(LOG_DEBUG, "child of %d execing %s",
getppid(), av[0]);
execvp(av[0], av);
errlog(LOG_ERR, "%s: %s", av[0], strerror(errno));
/* else can loop restarting! */
kill(getppid(), SIGTERM);
_exit(1);
break;
case -1:
cchild = 0;
xerrlog(LOG_ERR, "fork: %s", strerror(errno));
break;
default:
errlog(LOG_INFO, "ssh child pid is %d", (int)cchild);
set_sig_handlers();
retval = ssh_watch(sock);
dolongjmp = 0;
clear_alarm_timer();
unset_sig_handlers();
if (retval == P_EXITOK || retval == P_EXITERR)
return retval;
break;
}
}
errlog(LOG_INFO, "max start count reached; exiting");
return P_EXITOK;
}
/*
* Periodically test network connection. On signals, determine what
* happened or what to do with child. Return as necessary for exit
* or restart of child.
*/
int
ssh_watch(int sock)
{
int r;
int val;
static int secs_left;
int my_poll_time = first_poll_time;
time_t now;
double secs_to_shutdown;
#if defined(HAVE_SETPROCTITLE)
setproctitle("parent of %d (%d)",
(int)cchild, start_count);
#endif
for (;;) {
if (restart_ssh) {
errlog(LOG_INFO, "signalled to kill and restart ssh");
ssh_kill();
return P_RESTART;
}
if ((val = sigsetjmp(jumpbuf, 1)) == 0) {
errlog(LOG_DEBUG, "check on child %d", cchild);
/* poll for expired child */
r = ssh_wait(WNOHANG);
if (r != P_CONTINUE) {
errlog(LOG_DEBUG,
"expired child, returning %d", r);
return r;
}
secs_left = clear_alarm_timer();
if (secs_left == 0)
secs_left = my_poll_time;
my_poll_time = poll_time;
if (max_lifetime != 0) {
time(&now);
secs_to_shutdown = max_lifetime - difftime(now,pid_start_time);
if (secs_to_shutdown < poll_time)
secs_left = secs_to_shutdown;
}
errlog(LOG_DEBUG,
"set alarm for %d secs", secs_left);
dolongjmp = 1;
alarm(secs_left);
/* In case we were signalled while setting
all this up */
if (exit_signalled) {
errlog(LOG_INFO, "signalled to exit");
ssh_kill();
return P_EXITERR;
}
pause();
} else {
switch(val) {
case SIGINT:
case SIGTERM:
case SIGQUIT:
case SIGABRT:
errlog(LOG_INFO,
"received signal to exit (%d)", val);
ssh_kill();
return P_EXITERR;
break;
case SIGALRM:
r = exceeded_lifetime();
errlog(LOG_DEBUG,
"received SIGALRM (end-of-life %d)", r);
/* exit if user-configured lifetime exceeded */
if (r) {
ssh_kill();
return P_EXITOK;
}
if (writep && sock != -1 &&
!conn_test(sock, mhost, writep)) {
errlog(LOG_INFO,
"port down, restarting ssh");
ssh_kill();
return P_RESTART;
}
#ifdef TOUCH_PIDFILE
/*
* utimes() with a NULL time argument sets
* file access and modification times to
* the current time
*/
if (pid_file_name &&
utimes(pid_file_name, NULL) != 0) {
errlog(LOG_ERR,
"could not touch pid file: %s",
strerror(errno));
}
#endif
break;
default:
break;
}
}
}
}
/*
* Clear any pending signal timer alarm and return the number of seconds
* before it would have gone off. Return 0 if there was no alarm pending.
*/
unsigned int
clear_alarm_timer(void)
{
unsigned int secs_left = alarm(0);
errlog(LOG_DEBUG,
"clear alarm timer (%d secs left)", secs_left);
return secs_left;
}
/*
* Checks to see if we have exceeded our time to live
* Returns 1 if we have, 0 if we haven't
*/
int
exceeded_lifetime(void)
{
time_t now;
if (max_lifetime > 0 ) {
time(&now);
if (difftime(now, pid_start_time) >= max_lifetime ) {
errlog(LOG_INFO,
"exceeded maximum time to live, shutting down");
return 1;
}
}
return 0;
}
/*
* Wait on child: with options == WNOHANG, poll for
* dead child, else if options == 0, then wait for
* known dead child.
*
* If child was deliberately killed (TERM, INT, KILL),
* the pass message back to restart. If child called exit(0)
* or _exit(0), then pass message on return to give up (P_EXITOK).
* Otherwise death was unnatural (or unintended), and pass
* message back to restart (P_RESTART).
*
* However, if child died with exit(1) on first try, then
* there is some startup error (anything from network
* connection to authentication failure), so we exit.
* If on a restart, however, we keep trying as it must
* have worked once. This doesn't necessarily work if
* the user did an interactive authentication, and then
* isn't there on the restart to enter his password....
* But we can only know very little about what's going
* on inside ssh.
*
* This is further complicated by the behaviour of
* OpenSSH when sent SIGTERM (15). It is possible to
* kill it before it installs the handler for that
* signal, in which case it autossh behaves as above
* and exits. But, in at least interactive use, it
* appears that once the session is established ssh
* installs a handler, and then when signalled (killed)
* it exits with status 255. autossh does not know
* it (ssh) was signalled, so restarts it.
*
*/
int
ssh_wait(int options) {
int status;
int evalue;
time_t now;
if (waitpid(cchild, &status, options) > 0) {
if (WIFSIGNALED(status)) {
switch(WTERMSIG(status)) {
#if 0
/* If someone kills the child, we assume
it was hung up or something and they
wished to restart it. Not entirely sure
want to keep this behaviour or what
signals it should apply to, therefore
the #if 0.
*/
case SIGINT:
case SIGTERM:
case SIGKILL:
/* someone meant it */
errlog(LOG_INFO,
"ssh exited on signal %d; parent exiting",
WTERMSIG(status));
return P_EXITERR;
break;
#endif
default:
/* continue on and restart */
errlog(LOG_INFO,
"ssh exited on signal %d, restarting ssh",
WTERMSIG(status));
return P_RESTART;
break;
}
} else if (WIFEXITED(status)) {
evalue = WEXITSTATUS(status);
if (start_count == 1 && gate_time != 0) {
/*
* If ssh exits too quickly, give up.
*/
time(&now);
if (difftime(now, start_time) <= gate_time) {
errlog(LOG_ERR,
"ssh exited prematurely "
"with status %d; %s exiting",
evalue, __progname);
return P_EXITERR;
}
}
switch(evalue) {
case 255:
/*
* we can get this on an initial
* connection if the connection itself
* is ok, but authentication fails.
* But there's no way to do this nicely:
* we don't have enough info from the
* ssh session and we get the same exit
* status from a dropped connection.
* Hence the gate_time above.
*/
errlog(LOG_INFO,
"ssh exited with error "
"status %d; restarting ssh",
evalue);
return P_RESTART;
break;
case 0: /* exited on success */
#if defined(__CYGWIN__)
if (ntservice)
return P_RESTART;
#endif
errlog(LOG_INFO,
"ssh exited with status %d; %s exiting",
evalue, __progname);
return P_EXITOK;
case 2:
/* can exit with 2 with temporary issues
setting up tunnels */
/* FALLTHROUGH */
case 1:
/*
* the first time, it could be any of
* a number of errors; so we exit and let
* the user fix. But if been running ok
* already, then network may be down and
* then ssh fails exit(1) on the attempt
* to reconnect....so we try to restart.
*/
if (start_count > 1 || gate_time == 0) {
errlog(LOG_INFO,
"ssh exited with error "
"status %d; restarting ssh",
evalue);
return P_RESTART;
}
/* FALLTHROUGH */
default: /* remote command error status */
errlog(LOG_INFO,
"ssh exited with status %d; %s exiting",
evalue, __progname);
return P_EXITERR;
break;
}
}
}
/* do nothing */
return P_CONTINUE;
}
/*
* Kill ssh child. This can be overly aggressive, and
* result in kill KILL before TERM has time to take....
* Perhaps just use TERM?
*/
void
ssh_kill(void)
{
int w;
int status;
if (cchild) {
/* overkill */
kill(cchild, SIGTERM);
/* if (kill(cchild, 0) != -1)
+ kill(cchild, SIGKILL);
*/
do {
errno = 0;
w = waitpid(cchild, &status, 0);
} while (w < 0 && errno == EINTR );
if (w <= 0) {
errlog(LOG_ERR,
"waitpid() not successful: %s",
strerror(errno));
}
}
return;
}
/*
* Try to prevent rapid-fire restarts on such things
* as connection refused. Back off and try more slowly.
* Calculate a grace period to wait based time between
* now and the last restart and the number of tries
* in a row that have had less than the poll_time
* between them.
*
* Questions:
* - should it back off faster? slower?
*/
void
grace_time(time_t last_start)
{
int n;
double t;
int interval;
time_t now;
static int tries;
double min_time;
/*
* Minimum time we have to stay up to avoid backoff
* behaviour. With default poll_time this is 60 secs.
* This may be too complicated.
*/
min_time = (double)(poll_time / 10);
if (min_time < 10) min_time = 10;
time(&now);
if (difftime(now, last_start) >= min_time)
tries = 0;
else
tries++;
errlog(LOG_DEBUG,
"checking for grace period, tries = %d", tries);
if (tries > N_FAST_TRIES) {
t = (double)(tries - N_FAST_TRIES);
n = (int)((poll_time / 100.0) * (t * (t/3)));
interval = (n > poll_time) ? poll_time : n;
if (interval) {
errlog(LOG_DEBUG,
"sleeping for grace time %d secs", interval);
sleep(interval);
}
}
return;
}
void
set_exit_sig_handler()
{
struct sigaction act;
memset(&act, 0, sizeof(act));
act.sa_handler = sig_catch;
sigemptyset(&act.sa_mask);
act.sa_flags = 0;
sigaction(SIGTERM, &act, NULL);
sigaction(SIGINT, &act, NULL);
}
void
set_sig_handlers(void)
{
struct sigaction act;
sigset_t blockmask;
memset(&act, 0, sizeof(act));
act.sa_handler = sig_catch;
act.sa_flags = 0;
/* create mask to ensure sig_catch() processes one signal at-a-time */
sigemptyset(&blockmask);
sigaddset(&blockmask, SIGTERM);
sigaddset(&blockmask, SIGINT);
sigaddset(&blockmask, SIGHUP);
sigaddset(&blockmask, SIGUSR1);
sigaddset(&blockmask, SIGUSR2);
sigaddset(&blockmask, SIGCHLD);
sigaddset(&blockmask, SIGALRM);
sigaddset(&blockmask, SIGPIPE);
act.sa_mask = blockmask;
sigaction(SIGTERM, &act, NULL);
sigaction(SIGINT, &act, NULL);
sigaction(SIGHUP, &act, NULL);
sigaction(SIGUSR1, &act, NULL);
sigaction(SIGUSR2, &act, NULL);
sigaction(SIGCHLD, &act, NULL);
act.sa_flags |= SA_RESTART;
sigaction(SIGALRM, &act, NULL);
act.sa_handler = SIG_IGN;
act.sa_flags = 0;
sigaction(SIGPIPE, &act, NULL);
}
void
unset_sig_handlers(void)
{
struct sigaction act;
memset(&act, 0, sizeof(act));
act.sa_handler = SIG_DFL;
sigemptyset(&act.sa_mask);
act.sa_flags = 0;
/* We don't reset SIGTERM, as we want the
handler to persist in the run_ssh() loop */
/* This seems a little hidden down in here... */
/* sigaction(SIGTERM, &act, NULL); */
/* sigaction(SIGINT, &act, NULL); */
sigaction(SIGHUP, &act, NULL);
sigaction(SIGUSR1, &act, NULL);
sigaction(SIGUSR2, &act, NULL);
sigaction(SIGCHLD, &act, NULL);
sigaction(SIGALRM, &act, NULL);
sigaction(SIGPIPE, &act, NULL);
}
/*
* If we're primed, longjump back.
*/
void
sig_catch(int sig)
{
if (sig == SIGUSR1)
restart_ssh = 1;
else if (sig == SIGTERM || sig == SIGINT)
exit_signalled = 1;
if (dolongjmp) {
dolongjmp = 0;
siglongjmp(jumpbuf, sig);
}
return;
}
/*
* Test the connection monitor loop can pass traffic, and that
* we get back what we send. This needs the most testing.
*/
int
conn_test(int sock, char *host, char *write_port)
{
int rval; /* default return value (failure) */
int tries; /* message attempts */
int send_error; /* did it go/come ok? */
struct pollfd pfd[2]; /* poll fds */
int ntopoll; /* # fds to poll */
int rd, wd; /* read and write descriptors */
long id; /* for a random number */
struct utsname uts;
char wbuf[64+sizeof(uts.nodename)+MAX_MESSAGE];
char rbuf[sizeof(wbuf)];
wd = -1; /* default desc. values */
rd = -1;
rval = 0; /* default return value : no success */
tries = 0; /* number of attempts */
uts.nodename[0] = '\0';
(void)uname(&uts);
id = random();
if (dolongjmp != 0)
errlog(LOG_ERR, "conn_test(): error: dolongjmp != 0");
/* set up write connection */
if ((wd = conn_remote(host, write_port)) == -1)
return 0;
pfd[1].fd = wd;
pfd[1].events = POLLOUT;
while (tries++ < MAX_CONN_TRIES) {
if (tries >= MAX_CONN_TRIES) {
errlog(LOG_DEBUG,
"tried connection %d times and failed",
tries);
break; /* give up */
}
/* close read socket if we're coming around again */
if (sock != NO_RD_SOCK && rd != -1) {
shutdown(rd, SHUT_RDWR);
close(rd);
rd = -1;
}
/*
* Some data to send: something that is identifiable
* as coming from ourselves. Any user can still trash
* our listening port. We'd really like to be able to
* connect and accept connections from certain pids
* (ourself, our children).
*/
if (snprintf(wbuf, sizeof(wbuf),
"%s %s %d %ld %s\r\n", uts.nodename, __progname,
(int)getpid(), id, echo_message) >= sizeof(wbuf))
xerrlog(LOG_ERR, "conn_test: buffer overflow");
memset(rbuf, '\0', sizeof(rbuf));
if (sock != NO_RD_SOCK) {
/*
* If doing loop of connections, then accept() the read
* connection and use both read and write fds for
* poll(). Replace poll fd with accepted connection fd.
*/
rd = conn_poll_for_accept(sock, pfd);
if (rd < 0)
break; /* give up */
pfd[0].fd = rd;
pfd[0].events = POLLIN;
ntopoll = 2;
} else {
/*
* For talking to echo service, shift over and
* just use the one descriptor for both read and
* write.
*/
pfd[0].fd = wd;
pfd[0].events = POLLIN|POLLOUT;
ntopoll = 1;
}
send_error = conn_send_and_receive(rbuf, wbuf,
strlen(wbuf), pfd, ntopoll);
if (send_error == 0) {
/* we try again if received does not match sent */
if (strcmp(rbuf, wbuf) == 0) {
errlog(LOG_DEBUG, "connection ok");
rval = 1; /* success */
break; /* out of here */
} else {
errlog(LOG_DEBUG,
"not what I sent: \"%s\" : \"%s\"",
wbuf, rbuf);
/* loop again */
}
} else if (send_error == 1) {
errlog(LOG_DEBUG,
"timeout on io poll, looping to accept again");
} else {
errlog(LOG_DEBUG, "error on poll: %s",
strerror(errno));
break; /* hard error, we're out of here */
}
}
shutdown(wd, SHUT_RDWR);
close(wd);
if (sock != NO_RD_SOCK) {
shutdown(rd, SHUT_RDWR);
close(rd);
}
return rval;
}
/*
* poll for accept(), return file descriptor for accepted connection,
* or -1 for error.
*/
int
conn_poll_for_accept(int sock, struct pollfd *pfd)
{
int rd; /* new descriptor on accept */
int timeo_polla; /* for accept() */
struct sockaddr cliaddr;
socklen_t len; /* listen socket info */
rd = 0;
timeo_polla = net_timeout; /* timeout value for accept() */
len = sizeof(struct sockaddr);
/*
* first we're going to poll for accept()
*/
pfd[0].fd = sock;
pfd[0].events = POLLIN;
for (;;) {
switch(poll(pfd, 1, timeo_polla)) {
case 0:
errlog(LOG_INFO,
"timeout polling to accept read connection");
return -1;
case -1:
errlog(LOG_ERR,
"error polling to accept read connection: %s",
strerror(errno));
return -1;
default:
break;
}
if (pfd[0].revents & POLLIN) {
rd = accept(sock, &cliaddr, &len);
if (rd == -1) {
errlog(LOG_ERR,
"error accepting read connection: %s",
strerror(errno));
return -1;
}
break;
}
break;
}
return rd;
}
/*
* Send from wp and receive into rp.
* 1 = try again
* 0 = ok
* -1 = error
*/
int
conn_send_and_receive(char *rp, char *wp, size_t len,
struct pollfd *pfd, int ntopoll)
{
ssize_t nwrite, nread;
size_t rleft, wleft;
int timeo_pollio;
int ird, iwr;
int loops = 0;
timeo_pollio = net_timeout; /* timeout value for net io */
rleft = wleft = len;
/*
* If two fds, one is to read, one is to write,
* else read and write on the same fd.
*/
if (ntopoll == 2) {
ird = 0;
iwr = 1;
} else {
iwr = ird = 0;
}
/*
* Now, send and receive. When we're doing the loop thing, we stop
* polling for write() once we've sent the whole message.
*/
while (rleft > 0) {
switch(poll(pfd, ntopoll, timeo_pollio)) {
case 0:
return 1;
break;
case -1:
return -1;
break;
default:
break;
}
if (wleft && pfd[iwr].revents & POLLOUT) {
while (wleft > 0) {
nwrite = write(pfd[iwr].fd, wp, wleft);
if (nwrite == 0) {
wleft = 0; /* EOF */
break;
} else if (nwrite == -1) {
if (errno == EINTR || errno == EAGAIN)
break;
else
return -1;
}
wleft -= nwrite;
wp += nwrite;
}
/* if complete, turn off polling for write */
if (wleft == 0) {
ntopoll = 1;
/*
* if we are reading and writing to the
* same fd then we must clear the write bit
* so that poll doesn't loop tight.
*/
if (iwr == ird)
pfd[ird].events = POLLIN;
}
}
if (pfd[ird].revents & POLLIN || pfd[ird].revents & POLLHUP) {
while (rleft > 0) {
nread = read(pfd[ird].fd, rp, rleft);
if (nread == 0) {
rleft = 0; /* EOF */
break;
} else if (nread == -1) {
if (errno == EINTR || errno == EAGAIN)
break;
else
return -1;
}
rleft -= nread;
rp += nread;
}
}
/*
* we can run into situations where the data gets black-holed
* and poll() can't tell. And then we loop fast and
* things go nuts. So if we do that, give up after a while.
*/
if (loops++ > 5) {
sleep(1);
if (loops > 10) {
errlog(LOG_INFO,
"too many loops without data");
return -1;
}
}
}
return 0;
}
#ifndef HAVE_ADDRINFO
/*
* Convert names to addresses, setup for connection.
*/
void
conn_addr(char *host, char *port, struct sockaddr_in *resp)
{
struct hostent *h;
if ((h = gethostbyname(host)) == NULL)
xerrlog(LOG_ERR, "%s: %s", host, hstrerror(h_errno));
resp->sin_family = h->h_addrtype;
resp->sin_port = htons(atoi(port));
resp->sin_addr = *((struct in_addr *) h->h_addr_list[0]);
return;
}
/*
* Open connection we're writing to.
*/
int
conn_remote(char *host, char *port)
{
int sock;
static struct sockaddr_in res = {AF_UNSPEC};
/* Cache the address info */
if (res.sin_family == AF_UNSPEC)
conn_addr(host, port, &res);
if ((sock = socket(res.sin_family, SOCK_STREAM, 0)) == -1)
xerrlog(LOG_ERR, "socket: %s", strerror(errno));
if (connect(sock, (struct sockaddr *) &res, sizeof(res)) == -1) {
errlog(LOG_INFO, "%s:%s: %s", host, port, strerror(errno));
close(sock);
return -1;
}
return sock;
}
/*
* Returns a socket listening on a local port, bound to specified source
* address. Errors in binding to the local listening port are fatal.
*/
int
conn_listen(char *host, char *port)
{
int sock;
struct sockaddr_in res;
int on = 1;
/*
* Unlike conn_remote, we don't need to cache the
* info; we're only calling once at start. All errors
* here are fatal.
*/
conn_addr(host, port, &res);
if ((sock = socket(res.sin_family, SOCK_STREAM, 0)) == -1)
xerrlog(LOG_ERR, "socket: %s", strerror(errno));
if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
(char *) &on, sizeof on) != 0) {
xerrlog(LOG_ERR, "setsockopt: %s", strerror(errno));
}
if (bind(sock, (struct sockaddr *)&res, sizeof(res)) == -1)
xerrlog(LOG_ERR, "bind on %s:%s: %s",
host, port, strerror(errno));
if (listen(sock, 1) < 0)
xerrlog(LOG_ERR, "listen: %s", strerror(errno));
return sock;
}
#else /* HAVE_ADDRINFO */
/*
* Convert names to addresses, setup for connection.
*/
void
conn_addr(char *host, char *port, struct addrinfo **resp)
{
int family = AF_INET;
struct addrinfo hints;
int error;
memset(&hints, 0, sizeof(struct addrinfo));
hints.ai_family = family;
hints.ai_socktype = SOCK_STREAM;
hints.ai_protocol = IPPROTO_TCP;
/* Allow nodename to be null */
hints.ai_flags |= AI_PASSIVE;
/*
* In the case of binding to a wildcard address
* default to binding to an ipv4 address.
*/
if (host == NULL && hints.ai_family == AF_UNSPEC)
hints.ai_family = AF_INET;
if ((error = getaddrinfo(host, port, &hints, resp)))
xerrlog(LOG_ERR, "%s", gai_strerror(error));
return;
}
/*
* Open connection we're writing to.
*/
int
conn_remote(char *host, char *port)
{
int sock;
static struct addrinfo *res;
/* Cache the address info */
if (!res)
conn_addr(host, port, &res);
if ((sock = socket(res->ai_family, res->ai_socktype,
res->ai_protocol)) == -1)
xerrlog(LOG_ERR, "socket: %s", strerror(errno));
if (connect(sock, res->ai_addr, res->ai_addrlen) == -1) {
errlog(LOG_INFO, "%s:%s: %s", host, port, strerror(errno));
close(sock);
return -1;
}
return sock;
}
/*
* Returns a socket listening on a local port, bound to specified source
* address. Errors in binding to the local listening port are fatal.
*/
int
conn_listen(char *host, char *port)
{
int sock;
struct addrinfo *res;
int on = 1;
/*
* Unlike conn_remote, we don't need to cache the
* info; we're only calling once at start. All errors
* here are fatal.
*/
conn_addr(host, port, &res);
if ((sock = socket(res->ai_family, res->ai_socktype,
res->ai_protocol)) == -1)
xerrlog(LOG_ERR, "socket: %s", strerror(errno));
if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
(char *) &on, sizeof on) != 0) {
xerrlog(LOG_ERR, "setsockopt: %s", strerror(errno));
}
if (bind(sock, (struct sockaddr *)res->ai_addr,
res->ai_addrlen) == -1)
xerrlog(LOG_ERR, "bind on %s:%s: %s",
host, port, strerror(errno));
if (listen(sock, 1) < 0)
xerrlog(LOG_ERR, "listen: %s", strerror(errno));
freeaddrinfo(res);
return sock;
}
#endif /* ! HAVE_ADDRINFO */
/*
* On OpenBSD _exit() calls atexit() registered functions.
* Solaris has a function, _exithandle(), you can call
* before _exit().
*/
void
unlink_pid_file(void)
{
if (pid_file_created)
(void)unlink(pid_file_name);
pid_file_created = 0;
}
/*
* Nicely formatted time string for logging
*/
char *
timestr(void)
{
static char timestr[32];
time_t now;
struct tm *tm;
(void)time(&now);
tm = localtime(&now);
(void)strftime(timestr, sizeof(timestr),
"%Y/%m/%d %H:%M:%S", tm);
return timestr;
}
/*
* Log errors.
*/
void
errlog(int level, char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
doerrlog(level, fmt, ap);
va_end(ap);
}
/*
* Log and then exit with error status.
*/
void
xerrlog(int level, char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
doerrlog(level, fmt, ap);
va_end(ap);
ssh_kill();
unlink_pid_file();
_exit(1);
}
/*
* Log to file and/or syslog as directed. We want different
* behaviour before syslog has been called and set up; and
* different behaviour before we fork for ssh: errors before
* that point result in exit.
*/
void
doerrlog(int level, char *fmt, va_list ap)
{
FILE *fl;
#ifndef HAVE_VSYSLOG
char logbuf[1024];
#endif
fl = flog; /* only set per-call */
if (loglevel >= level) {
if (logtype & L_SYSLOG) {
#ifndef HAVE_VSYSLOG
(void)vsnprintf(logbuf, sizeof(logbuf), fmt, ap);
syslog(level, logbuf);
#else
vsyslog(level, fmt, ap);
#endif
} else if (!fl) {
/*
* if we're not using syslog, and we
* don't have a log file, then use
* stderr.
*/
fl = stderr;
}
if ((logtype & L_FILELOG) && fl) {
fprintf(fl,
"%s %s[%d]: ", timestr(),
__progname, (int)getpid());
vfprintf(fl, fmt, ap);
fprintf(fl, "\n");
fflush(fl);
}
}
return;
}
/* END */