source: trunk/athena/etc/larvnetd/ws.c @ 19803

Revision 19803, 7.6 KB checked in by ghudson, 21 years ago (diff)
Fix fencepost error in packet-reading code.
Line 
1/* Copyright 1998 by the Massachusetts Institute of Technology.
2 *
3 * Permission to use, copy, modify, and distribute this
4 * software and its documentation for any purpose and without
5 * fee is hereby granted, provided that the above copyright
6 * notice appear in all copies and that both that copyright
7 * notice and this permission notice appear in supporting
8 * documentation, and that the name of M.I.T. not be used in
9 * advertising or publicity pertaining to distribution of the
10 * software without specific, written prior permission.
11 * M.I.T. makes no representations about the suitability of
12 * this software for any purpose.  It is provided "as is"
13 * without express or implied warranty.
14 */
15
16/* This file is part of larvnetd, a monitoring server.  It implements
17 * functions to poll and receive notifications of workstation status.
18 */
19
20static const char rcsid[] = "$Id: ws.c,v 1.4 2003-09-15 17:08:35 ghudson Exp $";
21
22#include <sys/types.h>
23#include <sys/socket.h>
24#include <netinet/in.h>
25#include <arpa/inet.h>
26#include <stdlib.h>
27#include <string.h>
28#include <time.h>
29#include <syslog.h>
30#include <netdb.h>
31#include "larvnetd.h"
32#include "larvnet.h"
33#include "timer.h"
34
35/* Begin polling this many seconds after the last status notification. */
36#define POLLINT         30 * 60
37
38/* After this many polls have gone unanswered, decide that we don't know
39 * what a machine's status is.
40 */
41#define POLLTRIES       8
42
43/* After sending this many polls, reschedule for one second later and
44 * stop scanning machines.
45 */
46#define POLLMAX         20
47
48/* Retry schedule, in seconds between tries */
49static int schedule[] = { 0, 5, 5, 10, 10, 30, 30, 60, 60, 5 * 60, 5 * 60,
50                          10 * 60, 10 * 60, 30 * 60, 30 * 60, 60 * 60,
51                          60 * 60 };
52
53struct wsarg {
54  struct serverstate *state;
55  struct machine *machine;
56};
57
58static void poll_callback(void *arg, int status, struct hostent *host);
59static int ws_searchcomp(const void *key, const void *elem);
60static int ws_sortcomp(const void *elem1, const void *elem2);
61
62void ws_poll(void *arg)
63{
64    struct serverstate *state = (struct serverstate *) arg;
65    struct config *config = &state->config;
66    struct machine *machine;
67    struct wsarg *wsarg;
68    int i, polls_sent = 0, nexttimeout = POLLINT, nextpoll;
69    time_t now;
70
71    syslog(LOG_DEBUG, "ws_poll: startmachine %d", state->startmachine);
72
73    time(&now);
74    for (i = state->startmachine; i < config->nmachines; i++)
75      {
76        machine = &config->machines[i];
77        if (now - machine->laststatus >= POLLINT
78            && now - machine->lastpoll >= schedule[machine->numpolls])
79          {
80            syslog(LOG_DEBUG, "ws_poll: machine %s laststatus %d lastpoll "
81                   "%d numpolls %d", machine->name, machine->laststatus,
82                   machine->lastpoll, machine->numpolls);
83
84            /* Resolve the machine name so that we can send a poll. */
85            wsarg = (struct wsarg *) emalloc(sizeof(struct wsarg));
86            wsarg->state = state;
87            wsarg->machine = machine;
88            ares_gethostbyname(state->channel, machine->name, AF_INET,
89                               poll_callback, wsarg);
90
91            /* If we've already had POLLTRIES polls go unanswered, decide
92             * we don't know if the machine is free or not any more.
93             */
94            if (machine->numpolls == POLLTRIES)
95              {
96                syslog(LOG_DEBUG, "ws_poll: machine %s busy state set to "
97                       "unknown", machine->name);
98                machine->busy = UNKNOWN_BUSYSTATE;
99              }
100
101            /* Update the number of polls sent and the time of the last
102             * poll. */
103            if (machine->numpolls < (sizeof(schedule) / sizeof(int)) - 1)
104              machine->numpolls++;
105            machine->lastpoll = now;
106
107            polls_sent++;
108            if (polls_sent >= POLLMAX)
109              {
110                syslog(LOG_DEBUG, "ws_poll: hit POLLMAX, stalling");
111                nexttimeout = 1;
112                state->startmachine = i + 1;
113                break;
114              }
115          }
116
117        /* Compute the time to the next poll. */
118        if (now - machine->laststatus < POLLINT)
119          nextpoll = machine->laststatus + POLLINT - now;
120        else
121          nextpoll = machine->lastpoll + schedule[machine->numpolls] - now;
122
123        if (nexttimeout > nextpoll)
124          nexttimeout = nextpoll;
125      }
126
127    if (i == config->nmachines)
128      state->startmachine = 0;
129    syslog(LOG_DEBUG, "ws_poll: rescheduling for %d seconds with start "
130           "machine %d", nexttimeout, state->startmachine);
131    timer_set_rel(nexttimeout, ws_poll, state);
132}
133
134void ws_handle_status(int s, struct config *config)
135{
136  struct sockaddr_in sin;
137  int sz = sizeof(sin), count;
138  char buf[LARVNET_MAX_PACKET + 1];
139  const char *name, *arch;
140  struct machine *machine;
141  enum busystate busystate;
142
143  /* Read a packet from the server socket. */
144  count = recvfrom(s, buf, sizeof(buf) - 1, 0, (struct sockaddr *) &sin, &sz);
145  if (count == -1)
146    {
147      syslog(LOG_ERR, "ws_handle_status: recvfrom: %m");
148      return;
149    }
150  if (count == 0)
151    {
152      syslog(LOG_NOTICE, "ws_handle_status: empty packet from %s",
153             inet_ntoa(sin.sin_addr));
154      return;
155    }
156
157  /* Pull the busy state, name, and arch name out of the packet. */
158  buf[count] = 0;
159  busystate = (buf[0] == '1') ? BUSY : (buf[0] == '0') ? FREE
160    : UNKNOWN_BUSYSTATE;
161  name = buf + 1;
162  arch = name + strlen(name) + 1;
163  if (arch >= buf + count)
164    {
165      syslog(LOG_NOTICE, "ws_handle_status: invalid packet from %s",
166             inet_ntoa(sin.sin_addr));
167      return;
168    }
169
170  syslog(LOG_DEBUG, "ws_handle_status: addr %s busy %c name %s arch %s",
171         inet_ntoa(sin.sin_addr), buf[0], name, arch);
172
173  /* We could, at this point, resolve the given name and see if
174   * sin.sin_addr is one of its interface addresses.  That would make
175   * it a little harder to spoof, but not much, so we won't bother.
176   */
177
178  machine = ws_find(config, name);
179  if (machine)
180    {
181      syslog(LOG_DEBUG, "ws_handle_status: machine %s set to state %s arch %s",
182             machine->name, (busystate == BUSY) ? "busy"
183             : (busystate == FREE) ? "free" : "unknown", arch);
184      machine->busy = busystate;
185      if (machine->arch)
186        free(machine->arch);
187      machine->arch = estrdup(arch);
188      time(&machine->laststatus);
189      machine->numpolls = 0;
190    }
191}
192
193struct machine *ws_find(struct config *config, const char *name)
194{
195  return bsearch(name, config->machines, config->nmachines,
196                 sizeof(struct machine), ws_searchcomp);
197}
198
199void ws_sort(struct config *config)
200{
201  qsort(config->machines, config->nmachines, sizeof(struct machine),
202        ws_sortcomp);
203}
204
205static void poll_callback(void *arg, int status, struct hostent *host)
206{
207  struct wsarg *wsarg = (struct wsarg *) arg;
208  struct serverstate *state = wsarg->state;
209  struct machine *machine = wsarg->machine;
210  struct sockaddr_in sin;
211  char dummy = 0, *errmem;
212
213  free(wsarg);
214  if (status != ARES_SUCCESS)
215    {
216      if (status == ARES_EDESTRUCTION)
217        {
218          syslog(LOG_DEBUG, "poll_callback: query for %s halted for channel "
219                 "destruction", machine->name);
220        }
221      else
222        {
223          syslog(LOG_ERR, "poll_callback: could not resolve ws name %s: %s",
224                 machine->name, ares_strerror(status, &errmem));
225          ares_free_errmem(errmem);
226        }
227      return;
228    }
229  /* Send a poll. */
230  memset(&sin, 0, sizeof(sin));
231  sin.sin_family = AF_INET;
232  memcpy(&sin.sin_addr, host->h_addr, sizeof(sin.sin_addr));
233  sin.sin_port = state->poll_port;
234  sendto(state->server_socket, &dummy, 1, 0,
235         (struct sockaddr *) &sin, sizeof(sin));
236
237  syslog(LOG_DEBUG, "poll_callback: query for ws name %s yielded %s; poll "
238         "sent", machine->name, inet_ntoa(sin.sin_addr));
239}
240
241static int ws_searchcomp(const void *key, const void *elem)
242{
243  const char *s = (const char *) key;
244  const struct machine *m = (const struct machine *) elem;
245
246  return strcasecmp(s, m->name);
247}
248
249static int ws_sortcomp(const void *elem1, const void *elem2)
250{
251  const struct machine *m1 = (const struct machine *) elem1;
252  const struct machine *m2 = (const struct machine *) elem2;
253
254  return strcasecmp(m1->name, m2->name);
255}
Note: See TracBrowser for help on using the repository browser.