Rack power control script
This one started off quite simply and turned into a monster by the gradual addition of features. We use it to reboot and power up or down our various development machines. Here's what it does:
- uses native SNMP to talk to APC networked power bars, turning machines on/off or rebooting them
alternatively/also uses ipmitool to use the IPMI management controller to do the same thing directly at the machine
finally, also talks to a conserver to make sure that it doesn't do something nasty to a machine that someone is currently using
It probably doesn't make much sense outside ETH in its current form, but there are bits of code in there that might prove useful. An obvious first step might be factoring them out into modules (and moving the configuration out of the program!).
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 # Andrew Baumann <andrewb@inf.ethz.ch>, 2008/02/27 2008/11/17 2009/07/02
5
6 import socket, popen2, sys, os, time
7 from pysnmp.entity.rfc3413.oneliner import cmdgen
8 from pysnmp.proto import rfc1902
9 from optparse import OptionParser
10
11 # delay in seconds to wait before querying IPMI status after sending a command
12 IPMI_DELAY = 4
13
14 # enable verbose/debug output
15 debug_enable = False
16
17 # command constants
18 class commands:
19 ON = 1
20 OFF = 2
21 RESET = 3
22
23 class config:
24 community = cmdgen.CommunityData('my-agent', 'private', 0)
25
26 snmp_port = socket.getservbyname('snmp', 'udp')
27 power1 = ('power1', snmp_port)
28 power2 = ('power2', snmp_port)
29
30 ports = {
31 # host/console: [(powerbar, outlet)]
32 'nos1': [(power1, 16)],
33 'nos2': [(power2, 16)],
34 'nos3': [(power1, 15)],
35 'nos4': [(power2, 15)],
36 'nos5': [(power1, 14)],
37 'nos6': [(power2, 14)],
38 'gruyere': [(power1,10), (power1,11), (power1,12), (power1,13)],
39 'sbrinz1': [(power2, 7), (power2, 8)],
40 'sbrinz2': [(power2, 9), (power2, 10)],
41 }
42
43 ipmi = {
44 # host/console: (ipmi-host, user, password)
45 'gruyere': ('gruyere-mgmt', 'foo', 'bar'),
46 'sbrinz1': ('sbrinz1-mgmt', 'foo', 'bar'),
47 'sbrinz2': ('sbrinz2-mgmt', 'foo', 'bar'),
48 }
49
50 # print a message if debug is enabled
51 def debug(msg):
52 if debug_enable:
53 print os.path.basename(sys.argv[0]) + ": " + msg
54
55 # run a process and capture its output
56 def runcmd(cmdline):
57 child = popen2.Popen3(cmdline, True)
58 out = child.fromchild.readlines()
59 err = child.childerr.readlines()
60
61 ret = child.wait()
62 if ret != 0 or err != []:
63 msg = "'%s' exited %d" % (cmdline, ret)
64 if err != []:
65 msg = msg + ', stderr follows:\n' + '\n'.join(err)
66 debug(msg)
67
68 # only raise an exception if they returned non-zero
69 if ret != 0:
70 raise Exception(msg)
71
72 return out
73
74 class conserver_client:
75 def __init__(self):
76 self.consoles = self.__getstate()
77
78 def who_owns(self, consolename):
79 return self.consoles.get(consolename)
80
81 def __getstate(self):
82 ret = {}
83 for line in runcmd('console -i'):
84 parts = line.strip().split(':')
85 conname, child, contype, details, users, state = parts[:6]
86 ret[conname] = None
87 if users:
88 for userinfo in users.split(','):
89 mode, username, host, port = userinfo.split('@')[:4]
90 if 'w' in mode:
91 ret[conname] = username
92 return ret
93
94 class ipmi_client:
95 command_map = {
96 commands.ON: 'on',
97 commands.OFF: 'off',
98 commands.RESET: 'reset',
99 }
100
101 def _mkcmd(self, (host, user, password), cmd):
102 return 'ipmitool -H %s -U "%s" -P "%s" power %s' % (host, user, password, cmd)
103
104 def get(self, controller):
105 return runcmd(self._mkcmd(controller, 'status'))[0].split()[-1]
106
107 def set(self, controller, cmd):
108 runcmd(self._mkcmd(controller, self.command_map[cmd]))
109
110 class apc_control:
111 def __init__(self):
112 self.cg = cmdgen.CommandGenerator()
113
114 port_control_oid = (1,3,6,1,4,1,318,1,1,12,3,3,1,1,4)
115
116 def state_to_string(self, state):
117 try:
118 return ['on', 'off', 'rebooting'][state - 1]
119 except:
120 raise Exception('Invalid state %d' % state)
121
122 command_map = {
123 commands.ON: 1, # immediateOn
124 commands.OFF: 2, # immediateOff
125 commands.RESET: 3, # immediateReboot
126 #: 4, # delayedOn
127 #: 5, # delayedOff
128 #: 6, # delayedReboot
129 #: 7, # cancelPendingCommand
130 }
131
132 def get(self, (dst, portnum)):
133 # construct a get request
134 target = cmdgen.UdpTransportTarget(dst)
135 oid = self.port_control_oid + (portnum,)
136
137 ret = self.cg.getCmd(config.community, target, oid)
138 errorIndication, errorStatus, errorIndex, varBinds = ret
139
140 assert(not (errorIndication or errorStatus))
141
142 try:
143 [(obj, retval)] = varBinds
144 assert(obj == oid)
145 assert(retval is not None)
146 except:
147 raise Exception("unexpected data returned from SNMP command")
148 return self.state_to_string(int(retval))
149
150 def set(self, (dst, portnum), cmd):
151 # construct a set request
152 target = cmdgen.UdpTransportTarget(dst)
153 oid = self.port_control_oid + (portnum,)
154 val = rfc1902.Integer32(self.command_map[cmd])
155
156 ret = self.cg.setCmd(config.community, target, (oid, val))
157 errorIndication, errorStatus, errorIndex, varBinds = ret
158
159 assert(not (errorIndication or errorStatus))
160
161 try:
162 [(obj, retval)] = varBinds
163 assert(retval == val)
164 except:
165 raise Exception("unexpected data returned from SNMP command")
166
167 def parse_args():
168 p = OptionParser(usage='%prog [options] [victim]',
169 description='APC powerbar / IPMI control utility')
170
171 p.add_option('-u', action='store_const', dest='cmd', const=commands.ON,
172 help='switch outlet on')
173 p.add_option('-d', action='store_const', dest='cmd', const=commands.OFF,
174 help='switch outlet off')
175 p.add_option('-r', action='store_const', dest='cmd', const=commands.RESET,
176 help='power cycle (reboot) if already on, switch outlet on if off')
177 p.add_option('-i', action='store_false', dest='ipmi', default=True,
178 help="don't use IPMI, force use of the power bar")
179 p.add_option('-v', action='store_true', dest='verbose', default=False,
180 help="verbose output")
181 p.set_defaults(cmd=None)
182
183 options, args = p.parse_args()
184 if len(args) == 0:
185 victim = None
186 elif len(args) == 1:
187 victim = args[0]
188 if not (config.ports.has_key(victim) or config.ipmi.has_key(victim)):
189 p.error('unknown victim %s' % victim)
190 else:
191 p.error('more than one victim specified')
192 if options.cmd is not None and victim is None:
193 p.error('no victim specified for command')
194 return victim, options
195
196 def main():
197 victim, options = parse_args()
198 global debug_enable
199 debug_enable = options.verbose
200
201 apc = apc_control()
202 c = conserver_client()
203 i = ipmi_client()
204
205 if options.cmd:
206 # check for console ownership
207 owner = c.who_owns(victim)
208 if owner and owner != os.environ['LOGNAME']:
209 sys.stderr.write(
210 "Error: according to conserver %s currently owns this console\n"
211 "If you really need to do this, force them off first\n" % owner)
212 return 1
213
214 apccfg = config.ports.get(victim)
215 ipmicfg = config.ipmi.get(victim)
216
217 # find status of powerbar
218 if apccfg:
219 apcstate = map(apc.get, apccfg)
220 else:
221 apcstate = None
222
223 if apcstate:
224 debug("current APC status: " + " ".join(apcstate))
225
226 # try to use IPMI if enabled and the port is switched on
227 if options.ipmi and ipmicfg and (apcstate is None or 'on' in apcstate):
228 # get current status
229 status = i.get(ipmicfg)
230 debug("using IPMI: current status is %s" % status)
231
232 # if they asked for a reset but the outlet is off, turn it on
233 if status == 'off' and options.cmd == commands.RESET:
234 options.cmd = commands.ON
235
236 # do it
237 debug("sending IPMI %s command..." % ipmi_client.command_map[options.cmd])
238 i.set(ipmicfg, options.cmd)
239
240 # make sure it really happened
241 debug("waiting for %d seconds to check status" % IPMI_DELAY)
242 time.sleep(IPMI_DELAY)
243 status = i.get(ipmicfg)
244 debug("IPMI status is now %s" % status)
245 if ((options.cmd == commands.OFF and status == 'on')
246 or (options.cmd in [commands.ON, commands.RESET] and status == 'off')):
247 print "Warning: IPMI status is still %s, trying again" % status
248 if options.cmd == commands.RESET:
249 i.set(ipmicfg, commands.ON)
250 else:
251 i.set(ipmicfg, options.cmd)
252
253 else:
254 # use APC on every configured port
255 for p in apccfg:
256 debug("APC: port %d on %s" % (p[1], p[0][0]))
257 apc.set(p, options.cmd)
258
259 else:
260 # print current status
261 if victim:
262 victims = [victim]
263 else:
264 victims = list(set(config.ports.keys() + config.ipmi.keys()))
265 victims.sort()
266
267 formatstr = "%-10s %-15s %-4s %s"
268 print formatstr % ('VICTIM', 'POWER', 'IPMI', 'OWNER')
269 for victim in victims:
270 owner = c.who_owns(victim) or ""
271
272 apccfg = config.ports.get(victim)
273 if apccfg:
274 apcstate = map(apc.get, apccfg)
275 else:
276 apcstate = []
277
278 ipmicfg = config.ipmi.get(victim)
279 if options.ipmi and ipmicfg and ('on' in apcstate):
280 try:
281 ipmistate = i.get(ipmicfg)
282 except:
283 ipmistate = "ERR"
284 else:
285 ipmistate = ""
286
287 print formatstr % (victim, " ".join(apcstate), ipmistate, owner)
288
289 return 0
290
291 if __name__ == '__main__':
292 sys.exit(main())
293