Logo Search packages:      
Sourcecode: salinfo version File versions  Download package

salinfo_decode_all.c

/*
 * salinfo_decode_all.c - start and monitor the salinfo_decode tasks.
 *
 * Copyright (c) 2005 Silicon Graphics, Inc
 *    Keith Owens <kaos@sgi.com>
 * 2005-12-14 Initial release.
 *          Keith Owens <kaos@sgi.com>
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

/* This program takes no arguments, instead everything is passed via
 * environment variables to make it easier to set data in /etc/sysconfig.
 * All variables come in two forms, global (applies to all record types) and
 * per record (only applies to that record type).  The per record variables
 * have a prefix of 'CMC_', 'CPE_', 'INIT_' or 'MCA_', global settings have no
 * prefix.  The global value is used if there is no record specific variable in
 * the environment.
 *
 * Required variables are :-
 *
 * DIRECTORY            The value passed as parameter -D to salinfo_decode.
 *
 * RETRIES        How many times a version of salinfo_decode is restarted
 *                before we give up and log the failure.
 *
 * Optional variables are :-
 *
 * INODE_PCT            Passed as -i <value> to salinfo_decode.
 *
 * SPACE_PCT            Passed as -s <value> to salinfo_decode.
 *
 * RATE_LIMIT           Passed as -l <value> to salinfo_decode.
 *
 * TRIGGER        Passed as -T <value> to salinfo_decode.
 */

#include <ctype.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <syslog.h>
#include <time.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>

#ifndef NUL
#define NUL '\0'
#endif

#ifndef ARRAY_SIZE
#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
#endif

static char *prefix;

static const char *type[] = { "CMC", "CPE", "INIT", "MCA" };
#define GLOBAL (ARRAY_SIZE(type))

struct child {
      int pid;
      int status;
      int died;
      int retries;
      int max_retries;
      time_t start_time;
};
static struct child child[GLOBAL];

enum {
      DIRECTORY,
      RETRIES,
      INODE_PCT,
      SPACE_PCT,
      RATE_LIMIT,
      TRIGGER,
} varnum;

static const char *var[] = {
      [ DIRECTORY ]     = "DIRECTORY",
      [ RETRIES ] = "RETRIES",
      [ INODE_PCT ]     = "INODE_PCT",
      [ SPACE_PCT ]     = "SPACE_PCT",
      [ RATE_LIMIT ]    = "RATE_LIMIT",
      [ TRIGGER ] = "TRIGGER",
};

static const char parm[] = {
      [ DIRECTORY ]     = 'D',
      [ RETRIES ] = NUL,
      [ INODE_PCT ]     = 'i',
      [ SPACE_PCT ]     = 's',
      [ RATE_LIMIT ]    = 'l',
      [ TRIGGER ] = 'T',
};

static char *value[ARRAY_SIZE(var)][GLOBAL + 1];

static int errors;

static void
get_all_env(void)
{
      char name[200], *p;
      int v, t;
      for (v = 0; v < ARRAY_SIZE(var); ++v) {
            for (t = 0; t <= GLOBAL; ++t) {
                  if (t == GLOBAL)
                        snprintf(name, sizeof(name), "%s", var[v]);
                  else
                        snprintf(name, sizeof(name), "%s_%s", type[t], var[v]);
                  value[v][t] = getenv(name);
                  if (value[v][t] && !*value[v][t])
                        value[v][t] = NULL;
            }
      }
      for (v = 0; v < ARRAY_SIZE(var); ++v) {
            for (t = 0; t < GLOBAL; ++t)
                  if (!value[v][t])
                        value[v][t] = value[v][GLOBAL];
      }
      for (v = 0; v < ARRAY_SIZE(var); ++v) {
            if (v != DIRECTORY && v != RETRIES)
                  continue;
            for (t = 0; t < GLOBAL; ++t) {
                  if (!value[v][t]) {
                        fprintf(stderr,
                              "%s: no value for environment variable %s_%s nor %s\n",
                              prefix, type[t], var[v], var[v]);
                        ++errors;
                  }
            }
      }
      for (t = 0; t < GLOBAL; ++t) {
            if (!value[RETRIES][t])
                  continue;
            child[t].max_retries = strtol(value[RETRIES][t], &p, 0);
            if (*p) {
                  fprintf(stderr,
                              "%s: non-numeric value for %s retries (%s)\n",
                              prefix, type[t], value[RETRIES][t]);
                        ++errors;
                  }
      }
      if (errors)
            exit(1);
}

static void
fork_one(int t)
{
      char *argv[2 * (ARRAY_SIZE(var) + 3)], *p;
      int i, v;
      i = 0;
      argv[i++] = "salinfo_decode";
      argv[i++] = "-t";
      p = argv[i++] = alloca(strlen(type[t]) + 1);
      strcpy(p, type[t]);
      while (*p) {
            *p = tolower(*p);
            ++p;
      }
      for (v = 0; v < ARRAY_SIZE(var); ++v) {
            if (value[v][t] && parm[v] != NUL) {
                  p = argv[i++] = alloca(3);
                  p[0] = '-';
                  p[1] = parm[v];
                  p[2] = NUL;
                  argv[i++] = value[v][t];
            }
      }
      argv[i] = NULL;
      if ((child[t].pid = fork()) == 0) {
            execvp(argv[0], argv);
            fprintf(stderr, "%s: exec of %s for %s task failed (%m)\n",
                  prefix, argv[0], type[t]);
            ++errors;
      } else if (child[t].pid < 0) {
            fprintf(stderr, "%s: fork for %s task failed (%m)\n",
                  prefix, type[t]);
            ++errors;
      } else
            child[t].start_time = time(NULL);
}

static void
fork_all(void)
{
      int t;
      for (t = 0; t < GLOBAL; ++t)
            fork_one(t);
      if (errors) {
            for (t = 0; t < GLOBAL; ++t)
                  if (child[t].pid > 0)
                        kill(child[t].pid, SIGKILL);
            exit(1);
      }
}

static void
sig_chld (int sig)
{
      int t, p, status, moretodo = 1;
      while (moretodo) {
            moretodo = 0;
            for (t = 0; t < GLOBAL; ++t) {
                  if (child[t].died)
                        continue;
                  p = waitpid(child[t].pid, &status, WNOHANG);
                  if (p > 0) {
                        child[t].died = 1;
                        child[t].status = status;
                        moretodo = 1;
                  }
            }
      }
      signal(SIGCHLD, sig_chld);
}

/* Shutdown typically only kills this program, not its children.  Catch all
 * signals and kill the kids!.
 */
static void
sig_all (int sig)
{
      int t;
      for (t = 0; t < GLOBAL; ++t)
            kill(child[t].pid, sig);
      exit(0);
}

/* Loop forever, monitoring all the children.  Every few minutes (or earlier if
 * interrupted by a signal), check the status of the children.  Respawn unless
 * we hit the retry limit.  If a child dies within 5 seconds (arbitrary) of
 * start up then do not respawn it, it will almost certainly do exactly the
 * same thing again.
 */

static void
monitor_all(void)
{
      int t, moretodo = 1;
      char log[200];
      while (moretodo) {
            moretodo = 0;
            sleep(10*60*60);
            for (t = 0; t < GLOBAL; ++t) {
                  if (!child[t].died) {
                        if (kill(child[t].pid, 0)) {
                              /* Strange, the child went away without
                               * us noticing.
                               */
                              child[t].died = 1;
                              signal(SIGCHLD, sig_chld);
                        } else {
                              moretodo = 1;
                              continue;
                        }
                  }
                  if (child[t].died == 2)
                        continue;
                  if (time(NULL) - child[t].start_time <= 5) {
                        child[t].died = 2;
                        snprintf(log, sizeof(log),
                               "Type %s died very quickly, no respawn, last status was %d",
                               type[t], child[t].status);
                        syslog(LOG_ERR, "%s", log);
                  } else if (++child[t].retries > child[t].max_retries) {
                        child[t].died = 2;
                        snprintf(log, sizeof(log),
                               "Retries for type %s exceeded, last status was %d",
                               type[t], child[t].status);
                        syslog(LOG_ERR, "%s", log);
                  } else {
                        child[t].died = 0;
                        snprintf(log, sizeof(log),
                               "Retry %d for type %s, previous status was %d",
                               child[t].retries, type[t], child[t].status);
                        syslog(LOG_WARNING, "%s", log);
                        moretodo = 1;
                        fork_one(t);
                  }
            }
      }
}


int main(int argc, char **argv)
{
      prefix = argv[0];
      int i;
      if (argc != 1) {
            fprintf(stderr, "%s takes no parameters\n", prefix);
            exit(1);
      }
      get_all_env();
      if (fork() != 0)
            return 0;
      openlog(prefix, LOG_PID, LOG_DAEMON);
      for (i = 1; i < 32; ++i) {
            if (i != SIGSTOP && i != SIGCONT)
                  signal(i, sig_all);
      }
      signal(SIGCHLD, sig_chld);
      fork_all();
      monitor_all();
      syslog(LOG_ERR, "All children have died, giving up");
      return 1;
}

Generated by  Doxygen 1.6.0   Back to index