※リポジトリは、https://github.com/linux-ha-japan/pm_logconv-hb-1.0 へ移行しました。
Pacemaker 対応ログメッセージ変換機能。
Heartbeat-2.1.4 用 hb-logconv(*) のPacemaker1.0 + Heartbeat スタック対応版。
(*) http://sourceforge.jp/projects/linux-ha/releases/?package_id=10282
Revision | 2d98f677a5a2ca5b17b934d3a60418dff44808b8 (tree) |
---|---|
Time | 2010-10-06 10:47:39 |
Author | YoshihikoSATO |
Commiter | YoshihikoSATO |
Initial commit for pm_logconv - Pacemaker and Heartbeat log convert tool
@@ -0,0 +1,34 @@ | ||
1 | +logconv_NAME = pm_logconv | |
2 | +logconv_SCRIPTS = $(logconv_NAME).py | |
3 | +logconv_CONFIG = $(logconv_NAME).conf | |
4 | + | |
5 | +MAINTAINERCLEANFILES = Makefile.in | |
6 | +logconvdir = @HA_NOARCHDATAHBDIR@/$(logconv_NAME) | |
7 | +logconvcfdir = @CONFIG_DIR@ | |
8 | + | |
9 | +SHAREDIR = $(logconvdir) | |
10 | +SPEC = $(logconv_NAME).spec | |
11 | +TARFILE = $(PACKAGE_NAME)-$(VERSION).tar.gz | |
12 | +EXTRA_DIST = $(logconv_SCRIPTS) $(logconv_CONFIG) $(SPEC) | |
13 | + | |
14 | +install-data-hook: | |
15 | + @$(NORMAL_INSTALL) | |
16 | + test -z "$(logconvcfdir)" || $(mkdir_p) "$(DESTDIR)$(logconvcfdir)" | |
17 | + $(INSTALL_DATA) "$(logconv_CONFIG)" "$(DESTDIR)$(logconvcfdir)" | |
18 | + | |
19 | +$(TARFILE): | |
20 | + $(MAKE) dist | |
21 | + | |
22 | +RPM_ROOT = $(shell pwd) | |
23 | +RPMBUILDOPTS = --define "_sourcedir $(RPM_ROOT)" --define "_specdir $(RPM_ROOT)" | |
24 | + | |
25 | +srpm: clean | |
26 | + rm -f $(TARFILE) | |
27 | + $(MAKE) $(SPEC) $(TARFILE) | |
28 | + rpmbuild $(RPMBUILDOPTS) --nodeps -bs --rmsource $(SPEC) | |
29 | + | |
30 | +rpm: clean | |
31 | + rm -f $(TARFILE) | |
32 | + $(MAKE) $(SPEC) $(TARFILE) | |
33 | + rpmbuild $(RPMBUILDOPTS) -ba --rmsource $(SPEC) | |
34 | + |
@@ -0,0 +1,11 @@ | ||
1 | +#!/bin/sh | |
2 | +# Run this to generate all the initial makefiles, etc. | |
3 | + | |
4 | +echo Building configuration system... | |
5 | +autoreconf -i | |
6 | +if [ $? -ne 0 ]; then | |
7 | + exit 1 | |
8 | +fi | |
9 | +rm -rf autom4te.cache | |
10 | +echo Now run ./configure | |
11 | + |
@@ -0,0 +1,56 @@ | ||
1 | +# -*- Autoconf -*- | |
2 | +# Process this file with autoconf to produce a configure script. | |
3 | + | |
4 | +AC_PREREQ([2.65]) | |
5 | +AC_INIT([pm_logconv-hb], [1.0]) | |
6 | +AM_INIT_AUTOMAKE | |
7 | +AC_PREFIX_DEFAULT(/usr) | |
8 | +PM_PKG="pacemaker" | |
9 | + | |
10 | +# | |
11 | +# check for python | |
12 | +# | |
13 | +AM_PATH_PYTHON(2.4,,:) | |
14 | +AC_PATH_PROGS(PYTHON, python) | |
15 | +AC_MSG_CHECKING(where is python installed) | |
16 | +if test "x${PYTHON}" = x; then | |
17 | + PYTHON="/usr/bin/env python"; | |
18 | +fi | |
19 | +AC_MSG_RESULT(using $PYTHON); | |
20 | + | |
21 | +prefix_orig="$prefix" | |
22 | +prefix=`eval echo "$prefix"` | |
23 | +case $prefix in | |
24 | + NONE) prefix=/usr;; | |
25 | +esac | |
26 | +var(){ | |
27 | + case $1 in | |
28 | + *'${'*) res=`eval echo "$1"`;; | |
29 | + *) res="$1";; | |
30 | + esac | |
31 | + case "$res" in | |
32 | + ""|NONE) echo "$2";; | |
33 | + *) echo "$res";; | |
34 | + esac | |
35 | +} | |
36 | + | |
37 | +# | |
38 | +# Keep copy of original (default) localstatedir | |
39 | +# | |
40 | +localstatedir_orig="$localstatedir" | |
41 | + | |
42 | +exec_prefix=`var "$exec_prefix" "$prefix"` | |
43 | +datadir=`var "$datadir" "$prefix/share"` | |
44 | +config_dir=`var "$config_dir" "/etc/"` | |
45 | + | |
46 | +CONFIG_DIR="$config_dir" | |
47 | +AC_SUBST(CONFIG_DIR) | |
48 | +HA_DATADIR="$datadir" | |
49 | +AC_SUBST(HA_DATADIR) | |
50 | +HA_NOARCHDATAHBDIR="$HA_DATADIR/$PM_PKG" | |
51 | +AC_SUBST(HA_NOARCHDATAHBDIR) | |
52 | +AC_PROG_LN_S | |
53 | + | |
54 | +AC_CONFIG_FILES([Makefile]) | |
55 | +AC_OUTPUT | |
56 | + |
@@ -0,0 +1,416 @@ | ||
1 | +# | |
2 | +# pm_logconv.conf : Config file of pm_logconv for Pacemaker and Heartbeat | |
3 | +# | |
4 | +# support version | |
5 | +# Pacemaker : stable-1.0 (1.0.9 or more) | |
6 | +# Heartbeat : 3.0.3 | |
7 | +# | |
8 | + | |
9 | +[Settings] | |
10 | +#ha_log_path = /var/log/ha-log | |
11 | +#output_path = /var/log/pm_logconv.out | |
12 | +#hostcache_path = /var/lib/heartbeat/hostcache | |
13 | +#syslogformat = True | |
14 | +#reset_interval = 60 | |
15 | +#attribute_pingd = default_ping_set, lt, 100 | |
16 | +#attribute_diskd = diskcheck_status, eq, ERROR | |
17 | +#attribute_diskd_inner = diskcheck_status_internal, eq, ERROR | |
18 | +#logconv_logfacility = daemon | |
19 | +#act_rsc = prmExPostgreSQLDB, prmApPostgreSQLDB | |
20 | + | |
21 | + | |
22 | +### | |
23 | +# For Resource event. | |
24 | +### | |
25 | +#MsgNo.1-1, 2-1, 4-1, 5-1, 17-1, 18-1 | |
26 | +[Resource tries to operation] | |
27 | +pattern_start=crmd,info:,do_lrm_rsc_op: Performing key,op,start | |
28 | +pattern_stop=crmd,info:,do_lrm_rsc_op: Performing key,op,stop | |
29 | +pattern_promote=crmd,info:,do_lrm_rsc_op:,Performing key, op,promote | |
30 | +pattern_demote=crmd,info:,do_lrm_rsc_op:,Performing key,op,demote | |
31 | +func=try_to_operate | |
32 | + | |
33 | +#MsgNo.1-2, 2-2, 4-2, 5-2, 17-2, 18-2 | |
34 | +[Resource operation succeeded] | |
35 | +pattern_start=crmd,info:,process_lrm_event:,LRM operation,start,rc=0,!status=,ok | |
36 | +pattern_stop=crmd,info:,process_lrm_event:,LRM operation,stop,rc=0,!status=,ok | |
37 | +pattern_promote=crmd,info:,process_lrm_event,LRM operation,promote,rc=0,!status=,ok | |
38 | +pattern_demote=crmd,info:,process_lrm_event,LRM operation,demote,rc=0,!status=,ok | |
39 | +func=operation_succeeded | |
40 | + | |
41 | +#MsgNo.1-3, 2-3, 3-1, 4-3, 5-3, 17-3, 19-1 | |
42 | +[Resource operation failed] | |
43 | +pattern_start=crmd,info:,process_lrm_event:,LRM operation,start,!rc=0,!status= | |
44 | +pattern_stop=crmd,info:,process_lrm_event:,LRM operation,stop,!rc=0,!status= | |
45 | +pattern_monitor=crmd,info:,process_lrm_event:,LRM operation,monitor,!monitor_0,!rc=0,!rc=8,!rc=7,!status= | |
46 | +pattern_promote=crmd,info:,process_lrm_event:,LRM operation,promote,!rc=0,!status= | |
47 | +pattern_demote=crmd,info:,process_lrm_event:,LRM operation,demote,!rc=0,!status= | |
48 | +func=operation_failed | |
49 | +loglevel=ERROR | |
50 | + | |
51 | +#MsgNo.1-4, 2-4, 3-3, 4-4, 5-4 | |
52 | +[OCF resource operation timedout] | |
53 | +pattern_start=crmd,ERROR:,process_lrm_event:,LRM operation,start,!status=,Timed Out | |
54 | +pattern_stop=crmd,ERROR:,process_lrm_event:,LRM operation,stop,!status=,Timed Out | |
55 | +pattern_monitor=crmd,ERROR:,process_lrm_event:,LRM operation,monitor,!monitor_0,!status=,Timed Out | |
56 | +pattern_promote=crmd,ERROR:,process_lrm_event,LRM operation,promote,!status=,Timed Out | |
57 | +pattern_demote=crmd,ERROR:,process_lrm_event,LRM operation,demote,!status=,Timed Out | |
58 | +func=operation_timedout_ocf | |
59 | + | |
60 | +#MsgNo.3-2, 19-2 | |
61 | +[Resource failure] | |
62 | +pattern_monitor_rcs=crmd,info:,process_lrm_event:,LRM operation,monitor,!monitor_0,rc=7,!status= | |
63 | +pattern_monitor_stonith=crmd,info:,process_lrm_event:,LRM operation,monitor,!monitor_0,rc=7,!status= | |
64 | +func=detect_rsc_failure | |
65 | +loglevel=ERROR | |
66 | + | |
67 | +### | |
68 | +# For Node status event. | |
69 | +## | |
70 | +#MsgNo.6-1, 6-2 | |
71 | +[Node status updated] | |
72 | +pattern_dead=crmd,notice:,crmd_ha_status_callback:,Status update:,Node,now has status,dead | |
73 | +pattern_active=crmd,notice:,crmd_ha_status_callback:,Status update:,Node,now has status,active | |
74 | +func=node_status_updated | |
75 | + | |
76 | +### | |
77 | +# For Interconnect-LAN status event | |
78 | +# and Network status event (detected by pingd). | |
79 | +### | |
80 | +#MsgNo.7-1 | |
81 | +[Interconnect-LAN status dead] | |
82 | +pattern=heartbeat,info:,Link,dead | |
83 | +func=detect_iconnlan_dead | |
84 | +loglevel=WARN | |
85 | + | |
86 | +#Msg No.7-2 | |
87 | +[Interconnect-LAN or Network status up] | |
88 | +pattern=heartbeat,info:,Link,up | |
89 | +func=detect_network_up | |
90 | + | |
91 | +#MsgNo.8-1 | |
92 | +[Network status dead] | |
93 | +pattern=pingd,info:,stand_alone_ping:,is unreachable | |
94 | +func=detect_node_dead | |
95 | +loglevel=ERROR | |
96 | + | |
97 | +### | |
98 | +# For Disk status event (detected by diskd). | |
99 | +### | |
100 | +#MsgNo.9-1 | |
101 | +[Detect disk error] | |
102 | +pattern=diskd,WARN:,check_status:,disk status is changed,new_status,ERROR | |
103 | +func=detect_disk_error | |
104 | +loglevel=ERROR | |
105 | + | |
106 | +### | |
107 | +# For respawn process event. | |
108 | +### | |
109 | +#MsgNo.10-1 | |
110 | +[Respawn process starts] | |
111 | +pattern=heartbeat,info:,Starting,as,uid,gid | |
112 | +func=respawn_start | |
113 | + | |
114 | +#MsgNo.10-2 | |
115 | +[Respawn process exited abnormally] | |
116 | +pattern=heartbeat,Managed,process,exited with return code | |
117 | +func=respawn_exited_abnormally | |
118 | +loglevel=WARN | |
119 | + | |
120 | +#MsgNo.10-3 | |
121 | +[Respawn process killed] | |
122 | +pattern=heartbeat,WARN:,Managed,process,killed by signal | |
123 | +func=respawn_killed | |
124 | + | |
125 | +#MsgNo.10-4 | |
126 | +[Respawn process dumped core] | |
127 | +pattern=heartbeat,ERROR:,Managed,process,dumped core | |
128 | +func=respawn_dumped_core | |
129 | +loglevel=WARN | |
130 | + | |
131 | +#MsgNo.10-5 | |
132 | +[Respawn process went away] | |
133 | +pattern=heartbeat,ERROR:,Managed,process,went away strangely | |
134 | +func=respawn_went_away | |
135 | +loglevel=WARN | |
136 | + | |
137 | +#MsgNo.10-6 | |
138 | +[Respawn process exited normally] | |
139 | +pattern=heartbeat,info:,killing,process group,with signal | |
140 | +func=respawn_exited_normally | |
141 | + | |
142 | +#MsgNo.10-7 | |
143 | +[Respawning too fast in a short term] | |
144 | +pattern=heartbeat,ERROR:,Client,respawning too fast | |
145 | +func=respawn_too_fast | |
146 | + | |
147 | +### | |
148 | +# For Fail Over. These are only for DC node. | |
149 | +## | |
150 | +#MsgNo.F0-1, F9-1, F10-1 | |
151 | +[Detect calculation starts] | |
152 | +pattern=crmd,info:,do_state_transition:,State transition,-> S_POLICY_ENGINE,!I_SHUTDOWN | |
153 | +func=detect_pe_calc | |
154 | +loglevel=WARN | |
155 | + | |
156 | +#MsgNo.F0-2, F12-1, F12-2 | |
157 | +[FailOver complete] | |
158 | +pattern=crmd,info:,do_state_transition:,State transition,-> S_IDLE | |
159 | +func=detect_fo_complete | |
160 | +loglevel=WARN | |
161 | + | |
162 | +#MsgNo.F1-1, F1-2, F2-1, F2-2, F3-1, F3-2, F4-1, F4-2, F6-1, F6-2 | |
163 | +[Action failure] | |
164 | +pattern=crmd,WARN:,update_failcount:,Updating failcount for | |
165 | +func=dc_detect_failure | |
166 | +loglevel=WARN | |
167 | +fotrigger=1 | |
168 | + | |
169 | +#MsgNo.F7-1, F7-2, F7-3, F7-4, F8-1 | |
170 | +[Node failure] | |
171 | +pattern_shut=crmd,WARN:,match_down_event:,No match for shutdown action on | |
172 | +func=dc_detect_node_failure | |
173 | +fotrigger=3 | |
174 | + | |
175 | +#MsgNo.F11-1 | |
176 | +#The message is not output immediately, output when F/O is complete. | |
177 | +[Add Resource start action] | |
178 | +pattern=pengine,notice:,LogActions: Start | |
179 | +func=add_rsc_start | |
180 | + | |
181 | +#MsgNo.F11-2 | |
182 | +#The message is not output immediately, output when F/O is complete. | |
183 | +[Add Resource stop action] | |
184 | +pattern=pengine,notice:,LogActions: Stop resource | |
185 | +func=add_rsc_stop | |
186 | + | |
187 | +#MsgNo.F11-3, F11-8, F11-9 | |
188 | +#The message is not output immediately, output when F/O is complete. | |
189 | +[Add no action] | |
190 | +pattern_leave_start=pengine,notice:,LogActions: Leave resource | |
191 | +pattern_restart=pengine,notice:,LogActions: Restart resource | |
192 | +func=add_no_action | |
193 | + | |
194 | +#MsgNo.F11-4 | |
195 | +#The message is not output immediately, output when F/O is complete. | |
196 | +[Resource cannot run anywhere] | |
197 | +pattern=pengine,WARN:,native_color:,Resource,cannot run anywhere | |
198 | +func=detect_cannot_run_anywhere | |
199 | + | |
200 | +#MsgNo.F11-5 | |
201 | +#The message is not output immediately, output when F/O is complete. | |
202 | +[Detect resource unmanaged] | |
203 | +pattern=pengine,info:,native_color:,Unmanaged resource,allocated to | |
204 | +func=detect_rsc_unmanaged | |
205 | + | |
206 | +#MsgNo.F11-6 | |
207 | +#The message is not output immediately, output when F/O is complete. | |
208 | +[Add Resource move action] | |
209 | +pattern=pengine,notice:,LogActions: Move resource | |
210 | +func=add_rsc_move | |
211 | + | |
212 | +### | |
213 | +# For DC election. | |
214 | +### | |
215 | +#Msg No.13-2 | |
216 | +[DC election is complete] | |
217 | +pattern=crmd,info:,update_dc:,Set DC to | |
218 | +func=dc_election_complete | |
219 | + | |
220 | +#Msg No.13-5 | |
221 | +[Detect unset DC] | |
222 | +pattern=crmd,info:,update_dc:,Unset DC | |
223 | +func=detect_unset_dc | |
224 | + | |
225 | +### | |
226 | +# For Corosync service shutdown. | |
227 | +### | |
228 | +#Msg No.14-1 (only for DC) | |
229 | +[Corosync on the node in the cluster want to shutdown] | |
230 | +pattern=crmd,info:,handle_shutdown_request:,Creating shutdown request for | |
231 | +func=detect_shutdown_request | |
232 | + | |
233 | +#Msg No.14-2 | |
234 | +[Heartbeat shutdown complete.] | |
235 | +pattern=heartbeat,info:,Heartbeat shutdown complete | |
236 | +func=detect_hb_shutdown | |
237 | + | |
238 | +#Msg No.14-3 | |
239 | +[Pacemaker is shutting down.] | |
240 | +pattern=crmd,info:,crm_shutdown: Requesting shutdown | |
241 | +func=detect_pcmk_shutting_down | |
242 | + | |
243 | +#Msg No.14-4 (for DC node shutdown) | |
244 | +[DC node want to shutdown] | |
245 | +pattern=cib,info:,cib_process_shutdown_req:,Shutdown REQ from | |
246 | +func=detect_dc_shutdown_request | |
247 | + | |
248 | +#Msg No.14-5 | |
249 | +[Send shutdown request to DC.] | |
250 | +pattern=crmd,info:,do_shutdown_req: Sending shutdown request to DC: | |
251 | +func=detect_send_shutdown | |
252 | + | |
253 | +### | |
254 | +# For logging daemon event. | |
255 | +### | |
256 | +#Msg No.15-1 | |
257 | +[Detect logd started] | |
258 | +pattern=logd,info:,logd started with | |
259 | +func=output_original_log | |
260 | + | |
261 | +#Msg No.16-1 | |
262 | +[logd is shutting down.] | |
263 | +pattern=logd,info:,logd_term_write_action:,received | |
264 | +func=output_static_msg | |
265 | + | |
266 | +#Msg No.16-2 | |
267 | +[logd stopped.] | |
268 | +pattern=logd,info:,Exiting write process | |
269 | +func=output_static_msg | |
270 | + | |
271 | +### | |
272 | +# For STONITH resource operation timed out. | |
273 | +### | |
274 | +#For Msg No.17-4, 19-3 | |
275 | +#The message is not output immediately, output when operation complete. | |
276 | +[Resource operation timed out for stonith] | |
277 | +pattern=stonithd,WARN:,process,timed out,try,Killing with signal | |
278 | +func=detect_rscop_timedout_stonithd | |
279 | + | |
280 | +### | |
281 | +# For fence operation. | |
282 | +### | |
283 | +#Msg No.20-1, No21-1 | |
284 | +[fence operation start] | |
285 | +pattern=stonithd,info:,stonith_operate_locally,sending fencing op,for,to | |
286 | +func=fence_op_started | |
287 | + | |
288 | +#Msg No.20-2 | |
289 | +[fence operation succeeded] | |
290 | +pattern=stonithd,info:,Succeeded to STONITH the node | |
291 | +func=fence_op_succeeded | |
292 | + | |
293 | +#Msg No.20-3, 21-3 | |
294 | +[fence operation failed] | |
295 | +pattern=stonithd,info:,failed to STONITH node,with local device | |
296 | +func=fence_op_failed | |
297 | +loglevel=ERROR | |
298 | + | |
299 | +#Msg No.20-4, 21-4 | |
300 | +[fence operation timedout] | |
301 | +pattern=stonithd,ERROR:,Failed to STONITH the node,optype,op_result,TIMEOUT | |
302 | +func=fence_op_timedout | |
303 | + | |
304 | +### | |
305 | +# For attribute event. | |
306 | +### | |
307 | +#Msg No.22-1 | |
308 | +[Detect attribute updated] | |
309 | +pattern=attrd,info:,attrd_perform_update:,Sent update,!fail-count-,!last-failure-,!probe_complete,!shutdown,!master- | |
310 | +func=detect_attr_updated | |
311 | + | |
312 | +#Msg No.22-2 | |
313 | +[Detect attribute deleted] | |
314 | +pattern=attrd,info:,attrd_perform_update:,Sent delete,!delete -,!fail-count-,!last-failure-,!probe_complete,!shutdown,!master- | |
315 | +func=detect_attr_deleted | |
316 | + | |
317 | +### | |
318 | +# For Heartbeat service starts. | |
319 | +### | |
320 | +#Msg No.23-1 | |
321 | +[Detect heartbeat is starting] | |
322 | +pattern=heartbeat,info:,Configuration validated,Starting heartbeat | |
323 | +func=detect_hb_start | |
324 | + | |
325 | +#Msg No.23-3 | |
326 | +#It's just for clear ConvertStatus. Output nothing. | |
327 | +[Detect localhost status is set to up] | |
328 | +pattern=heartbeat,info:,Local status now set to,up | |
329 | +func=detect_localstat_up | |
330 | +ignoremsg=True | |
331 | + | |
332 | +### | |
333 | +# For log message dropping. | |
334 | +### | |
335 | +#Msg No.25-1 | |
336 | +[Detect log dropped] | |
337 | +pattern=ERROR:,cl_log:,messages were dropped | |
338 | +func=output_original_log | |
339 | + | |
340 | +### | |
341 | +# For Core process event. | |
342 | +### | |
343 | +#Msg No.28-1 | |
344 | +[FIFO process start to restart] | |
345 | +pattern=heartbeat,WARN:,Restarting,process | |
346 | +func=output_original_log | |
347 | + | |
348 | +#Msg No.28-2 | |
349 | +[FIFO process restart failed] | |
350 | +pattern=heartbeat,ERROR:,restart failed,Restarting heartbeat | |
351 | +func=output_original_log | |
352 | + | |
353 | +#Msg No.28-3 | |
354 | +[I/O processes failed] | |
355 | +pattern=heartbeat,ERROR:,process died,Beginning communications restart process for comm channel | |
356 | +func=output_original_log | |
357 | +loglevel=WARN | |
358 | + | |
359 | +#Msg No.28-4 | |
360 | +[I/O processes start to restart] | |
361 | +pattern=heartbeat,ERROR:,Both comm processes for channel,have died,Restarting | |
362 | +func=output_original_log | |
363 | +loglevel=WARN | |
364 | + | |
365 | +#Msg No.28-5 | |
366 | +[I/O processes restart succeeded] | |
367 | +pattern=heartbeat,info:,Communications restart succeeded | |
368 | +func=output_original_log | |
369 | + | |
370 | +#Msg No.28-6 | |
371 | +[I/O processes failed to restart] | |
372 | +pattern=heartbeat,ERROR:,Communications restart failed,Will try again later | |
373 | +func=output_original_log | |
374 | + | |
375 | +### | |
376 | +# For pengine event. | |
377 | +### | |
378 | +#Msg No.29-1 | |
379 | +[pengine start] | |
380 | +pattern=crmd,info:,start_subsystem:,Starting sub-system | |
381 | +func=crmd_subsystem_start | |
382 | + | |
383 | +#Msg No.29-2 | |
384 | +[pengine exit] | |
385 | +pattern=crmd,info:,crmdManagedChildDied:,Process,exited \(signal=0,exitcode= | |
386 | +func=crmd_subsystem_exit | |
387 | + | |
388 | +#Msg No.29-3 | |
389 | +[pengine kill] | |
390 | +pattern=crmd,info:,crmdManagedChildDied:,Process,exited \(signal=,exitcode=,!\(signal=0 | |
391 | +func=crmd_subsystem_kill | |
392 | +loglevel=ERROR | |
393 | + | |
394 | +### | |
395 | +# Other process's failure | |
396 | +### | |
397 | +#Msg No.30-1 | |
398 | +[master control process failure] | |
399 | +pattern=heartbeat,CRIT:,Emergency Shutdown:,Master Control process died | |
400 | +func=output_original_log | |
401 | +loglevel=ERROR | |
402 | + | |
403 | +#Msg No.30-2 | |
404 | +[OS reboot because of process's failure] | |
405 | +pattern=heartbeat,EMERG:,Rebooting system,Reason: | |
406 | +func=output_original_log | |
407 | +loglevel=ERROR | |
408 | + | |
409 | +### | |
410 | +# Others. | |
411 | +### | |
412 | +#Msg No.27-1 | |
413 | +[Detect a request for getting DC node state] | |
414 | +pattern=crmd,info:,handle_request:,Current ping state: | |
415 | +func=detect_dcstat_req | |
416 | +ignoremsg=True |
@@ -0,0 +1,3423 @@ | ||
1 | +#!/usr/bin/python | |
2 | +# -*- coding: utf-8 -*- | |
3 | + | |
4 | +# pm_logconv : Pacemaker and Heartbeat log converter | |
5 | +# | |
6 | +# support version | |
7 | +# Pacemaker : stable-1.0 (1.0.9 or more) | |
8 | +# Heartbeat : 3.0.3 | |
9 | +# | |
10 | +# Copyright (C) 2010 NIPPON TELEGRAPH AND TELEPHONE CORPORATION | |
11 | +# | |
12 | +# This program is free software; you can redistribute it and/or modify | |
13 | +# it under the terms of the GNU General Public License as published by | |
14 | +# the Free Software Foundation; either version 2 of the License, or | |
15 | +# (at your option) any later version. | |
16 | +# | |
17 | +# This program is distributed in the hope that it will be useful, | |
18 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | +# GNU General Public License for more details. | |
21 | +# | |
22 | +# You should have received a copy of the GNU General Public License | |
23 | +# along with this program; if not, write to the Free Software | |
24 | +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
25 | + | |
26 | +import os, sys, signal, time, datetime, syslog, types, glob, pickle | |
27 | +import ConfigParser, re, commands, operator, string | |
28 | +from optparse import OptionParser | |
29 | +from stat import ST_INO, ST_NLINK, ST_SIZE, S_IRUSR, S_IWUSR | |
30 | +from socket import gethostname | |
31 | +from errno import ESRCH | |
32 | + | |
33 | +# | |
34 | +# version number of pm_logconv. | |
35 | +# | |
36 | +VERSION = "1.0" | |
37 | + | |
38 | +# | |
39 | +# system's host name. | |
40 | +# | |
41 | +try: | |
42 | + HOSTNAME = gethostname() | |
43 | +except Exception, strerror: | |
44 | + print >> sys.stderr, "Error: gethostname() error occurred.", strerror | |
45 | + sys.exit(1) | |
46 | + | |
47 | +# | |
48 | +# default settings. | |
49 | +# (when not specified with configuration file or command line option.) | |
50 | +# | |
51 | +CONFIGFILE = "/etc/pm_logconv.conf" | |
52 | +HA_LOGFILE = "/var/log/ha-log" | |
53 | +OUTPUTFILE = "/var/log/pm_logconv.out" | |
54 | +SYSLOGFORMAT = True | |
55 | +HOSTCACHE = "/var/lib/heartbeat/hostcache" | |
56 | +HACFFILE = "/etc/ha.d/ha.cf" | |
57 | + | |
58 | +# | |
59 | +# Timeout(ms) for reset log convert status. | |
60 | +# | |
61 | +RESET_INTERVAL = 60 | |
62 | + | |
63 | +# A flag of failer status | |
64 | +# resource failer 1(resource error) | |
65 | +# score failer 2(pingd rsclocation) | |
66 | +# node failer 3(split brain) | |
67 | +FAIL_RSC = "1" | |
68 | +FAIL_SCORE = "2" | |
69 | +FAIL_NODE = "3" | |
70 | + | |
71 | +# A flag of resource status(for failer) | |
72 | +# resource start 1 | |
73 | +# resource move 2 | |
74 | +# resource stop 3 | |
75 | +# resource stopped 4 | |
76 | +FAIL_STR = "1" | |
77 | +FAIL_MOVE = "2" | |
78 | +FAIL_STP = "3" | |
79 | +FAIL_STPD = "4" | |
80 | + | |
81 | +# | |
82 | +# A list of [attribute_name, operation, attribute_value], | |
83 | +# The setting is described in CONFIGFILE. | |
84 | +# These are to decide whether some failure occur or not | |
85 | +# when cluster status changes to S_POLICY_ENGINE. | |
86 | +# | |
87 | +attrRuleList = list() | |
88 | + | |
89 | +# A list of resource-id. | |
90 | +# If the all of specified resources are active, | |
91 | +# it means "F/O succeeded." | |
92 | +# If not, "F/O failed." | |
93 | +# The setting is described in CONFIGFILE. | |
94 | +actRscList = list() | |
95 | + | |
96 | +# | |
97 | +# A list of patterns. | |
98 | +# The setting is described in CONFIGFILE. | |
99 | +# | |
100 | +lconvRuleList = list() | |
101 | + | |
102 | +# | |
103 | +# shutdown flag, when SIGINT or SIGTERM signal is received, set it True. | |
104 | +# | |
105 | +do_shutdown = False | |
106 | + | |
107 | +# | |
108 | +# command name for getting current status of the cluster. | |
109 | +# | |
110 | +CMD_CRM_ATTR = "crm_attribute" | |
111 | + | |
112 | +# | |
113 | +# command name for getting current node status of the cluster. | |
114 | +# | |
115 | +CMD_CRM_NODE = "crm_node" | |
116 | + | |
117 | +# | |
118 | +# command name for getting DC node status. | |
119 | +# | |
120 | +CMD_CRMADMIN = "crmadmin" | |
121 | + | |
122 | +# | |
123 | +# output version number of pm_logconv and exit. | |
124 | +# | |
125 | +def print_version(option, opt, value, parser): | |
126 | + sys.stdout.write("%s\n" % VERSION) | |
127 | + sys.exit(0) | |
128 | + | |
129 | +# | |
130 | +# signal handler method. only set True to the shutdown flag. | |
131 | +# | |
132 | +def shutdown_logconv(signum, frame): | |
133 | + global do_shutdown | |
134 | + pm_log.info("shutdown_logconv: received signal [%d], " \ | |
135 | + "scheduling shutdown.." % signum) | |
136 | + do_shutdown = True | |
137 | + | |
138 | +# | |
139 | +# set the signal handler. | |
140 | +# | |
141 | +signal.signal(signal.SIGINT, shutdown_logconv) | |
142 | +signal.signal(signal.SIGTERM, shutdown_logconv) | |
143 | + | |
144 | + | |
145 | +class LogconvLog: | |
146 | + LOG_EMERG = 0 | |
147 | + LOG_ALERT = 1 | |
148 | + LOG_CRIT = 2 | |
149 | + LOG_ERR = 3 | |
150 | + LOG_WARNING = 4 | |
151 | + LOG_NOTICE = 5 | |
152 | + LOG_INFO = 6 | |
153 | + LOG_DEBUG = 7 | |
154 | + | |
155 | + syspriority = [ syslog.LOG_EMERG, syslog.LOG_ALERT, syslog.LOG_CRIT, | |
156 | + syslog.LOG_ERR, syslog.LOG_WARNING, syslog.LOG_NOTICE, | |
157 | + syslog.LOG_INFO, syslog.LOG_DEBUG ] | |
158 | + | |
159 | + prioritystr = [ "EMERG", "ALERT", "CRIT", "ERROR", "WARN", | |
160 | + "notice", "info", "debug" ] | |
161 | + | |
162 | + DEFAULT_LOGOPT = syslog.LOG_CONS | |
163 | + DEFAULT_FACILITY = syslog.LOG_DAEMON | |
164 | + | |
165 | + facility_map = { | |
166 | + "kern": syslog.LOG_KERN, | |
167 | + "user": syslog.LOG_USER, | |
168 | + "mail": syslog.LOG_MAIL, | |
169 | + "daemon": syslog.LOG_DAEMON, | |
170 | + "auth": syslog.LOG_AUTH, | |
171 | + "syslog": syslog.LOG_SYSLOG, | |
172 | + "lpr": syslog.LOG_LPR, | |
173 | + "news": syslog.LOG_NEWS, | |
174 | + "uucp": syslog.LOG_UUCP, | |
175 | + "cron": syslog.LOG_CRON, | |
176 | + "authpriv": 10<<3, | |
177 | + "ftp": 11<<3, | |
178 | + "local0": syslog.LOG_LOCAL0, | |
179 | + "local1": syslog.LOG_LOCAL1, | |
180 | + "local2": syslog.LOG_LOCAL2, | |
181 | + "local3": syslog.LOG_LOCAL3, | |
182 | + "local4": syslog.LOG_LOCAL4, | |
183 | + "local5": syslog.LOG_LOCAL5, | |
184 | + "local6": syslog.LOG_LOCAL6, | |
185 | + "local7": syslog.LOG_LOCAL7, | |
186 | + } | |
187 | + | |
188 | + facilitystr_map = { | |
189 | + syslog.LOG_KERN: "kern", | |
190 | + syslog.LOG_USER: "user", | |
191 | + syslog.LOG_MAIL: "mail", | |
192 | + syslog.LOG_DAEMON: "daemon", | |
193 | + syslog.LOG_AUTH: "auth", | |
194 | + syslog.LOG_SYSLOG: "syslog", | |
195 | + syslog.LOG_LPR: "lpr", | |
196 | + syslog.LOG_NEWS: "news", | |
197 | + syslog.LOG_UUCP: "uucp", | |
198 | + syslog.LOG_CRON: "cron", | |
199 | + 10<<3: "authpriv", | |
200 | + 11<<3: "ftp", | |
201 | + syslog.LOG_LOCAL0: "local0", | |
202 | + syslog.LOG_LOCAL1: "local1", | |
203 | + syslog.LOG_LOCAL2: "local2", | |
204 | + syslog.LOG_LOCAL3: "local3", | |
205 | + syslog.LOG_LOCAL4: "local4", | |
206 | + syslog.LOG_LOCAL5: "local5", | |
207 | + syslog.LOG_LOCAL6: "local6", | |
208 | + syslog.LOG_LOCAL7: "local7", | |
209 | + } | |
210 | + | |
211 | + facilitystr = facilitystr_map[DEFAULT_FACILITY] | |
212 | + | |
213 | + def __init__(self, priority, path): | |
214 | + self.pid = os.getpid() | |
215 | + | |
216 | + if not isinstance(priority, int) and not isinstance(priority, long): | |
217 | + self.priority = self.LOG_INFO | |
218 | + else: | |
219 | + self.priority = priority | |
220 | + | |
221 | + if not isinstance(path, types.StringTypes): | |
222 | + self.output = None | |
223 | + else: | |
224 | + self.output = path | |
225 | + | |
226 | + self.facility = self.DEFAULT_FACILITY | |
227 | + syslog.openlog("pm_logconv", self.DEFAULT_LOGOPT, self.facility) | |
228 | + | |
229 | + def __setattr__(self, name, val): | |
230 | + if name != "LOG_EMERG" and name != "LOG_ALERT" and \ | |
231 | + name != "LOG_CRIT" and name != "LOG_ERR" and \ | |
232 | + name != "LOG_WARNING" and name != "LOG_NOTICE" and \ | |
233 | + name != "LOG_INFO" and name != "LOG_DEBUG" and \ | |
234 | + name != "DEFAULT_LOGOPT" and name != "DEFAULT_FACILITY": | |
235 | + self.__dict__[name] = val | |
236 | + | |
237 | + def set_priority(self, priority): | |
238 | + if not isinstance(priority, int) and not isinstance(priority, long): | |
239 | + return False | |
240 | + if self.LOG_EMERG < priority and self.DEBUG > priority: | |
241 | + return False | |
242 | + self.priority = priority | |
243 | + return True | |
244 | + | |
245 | + def set_output(self, path): | |
246 | + if not isinstance(path, types.StringTypes): | |
247 | + return False | |
248 | + self.output = path | |
249 | + return True | |
250 | + | |
251 | + def set_facility(self, facility): | |
252 | + # FYI: LOG_AUTHPRIV : 10<<3 | |
253 | + # LOG_FTP : 11<<3 | |
254 | + if self.facility == facility: | |
255 | + return True | |
256 | + if self.facilitystr_map.has_key(facility): | |
257 | + pm_log.notice("syslog facility changed [%s] to [%s]" | |
258 | + % (self.facilitystr, self.facilitystr_map[facility])) | |
259 | + syslog.closelog() | |
260 | + self.facility = facility | |
261 | + syslog.openlog("pm_logconv", self.DEFAULT_LOGOPT, self.facility) | |
262 | + self.facilitystr = self.facilitystr_map[facility] | |
263 | + return True | |
264 | + return False | |
265 | + | |
266 | + def emerg(self, message): | |
267 | + if self.output == None or self.priority >= self.LOG_EMERG: | |
268 | + return self.logging(self.LOG_EMERG, message) | |
269 | + return True | |
270 | + | |
271 | + def alert(self, message): | |
272 | + if self.output == None or self.priority >= self.LOG_ALERT: | |
273 | + return self.logging(self.LOG_ALERT, message) | |
274 | + return True | |
275 | + | |
276 | + def crit(self, message): | |
277 | + if self.output == None or self.priority >= self.LOG_CRIT: | |
278 | + return self.logging(self.LOG_CRIT, message) | |
279 | + return True | |
280 | + | |
281 | + def error(self, message): | |
282 | + if self.output == None or self.priority >= self.LOG_ERR: | |
283 | + return self.logging(self.LOG_ERR, message) | |
284 | + return True | |
285 | + | |
286 | + def warn(self, message): | |
287 | + if self.output == None or self.priority >= self.LOG_WARNING: | |
288 | + return self.logging(self.LOG_WARNING, message) | |
289 | + return True | |
290 | + | |
291 | + def notice(self, message): | |
292 | + if self.output == None or self.priority >= self.LOG_NOTICE: | |
293 | + return self.logging(self.LOG_NOTICE, message) | |
294 | + return True | |
295 | + | |
296 | + def info(self, message): | |
297 | + if self.output == None or self.priority >= self.LOG_INFO: | |
298 | + return self.logging(self.LOG_INFO, message) | |
299 | + return True | |
300 | + | |
301 | + def debug(self, message): | |
302 | + if self.output == None or self.priority >= self.LOG_DEBUG: | |
303 | + return self.logging(self.LOG_DEBUG, message) | |
304 | + return True | |
305 | + | |
306 | + def logging(self, priority, message): | |
307 | + try: | |
308 | + if not isinstance(priority, int) and not isinstance(priority, long): | |
309 | + return False | |
310 | + if not isinstance(message, types.StringTypes): | |
311 | + return False | |
312 | + | |
313 | + if self.output == None: | |
314 | + syslog.syslog(self.syspriority[priority], "[%d]: %-7s %s" % | |
315 | + (self.pid, self.prioritystr[priority] + ':', message.rstrip())) | |
316 | + else: | |
317 | + t = datetime.datetime.today() | |
318 | + tfmt = "%s %2d %s" % \ | |
319 | + (t.strftime('%b'), int(t.strftime('%d')), t.strftime('%X')) | |
320 | + f = open(self.output, 'a') | |
321 | + f.write("%s %s [%d]: %-7s %s\n" % (tfmt, HOSTNAME, self.pid, | |
322 | + self.prioritystr[priority] + ':', message.rstrip())) | |
323 | + f.close() | |
324 | + return True | |
325 | + except Exception, strerror: | |
326 | + print >> sys.stderr, "Error: logging() error occurred.", strerror | |
327 | + sys.exit(1) | |
328 | + | |
329 | +class PIDFile: | |
330 | + ''' | |
331 | + status of the PID file operation. | |
332 | + ''' | |
333 | + SYSTEM_ERROR = -1 | |
334 | + FILE_NOTEXIST = -2 | |
335 | + FILE_INVALID = -3 | |
336 | + NOTRUNNING = -4 | |
337 | + | |
338 | + def __init__(self, path): | |
339 | + self.path = path | |
340 | + | |
341 | + ''' | |
342 | + status is set as read-only. | |
343 | + ''' | |
344 | + def __setattr__(self, name, val): | |
345 | + if name != "SYSTEM_ERROR" and name != "FILE_NOTEXIST" and \ | |
346 | + name != "FILE_INVALID" and name != "NOTRUNNING": | |
347 | + self.__dict__[name] = val | |
348 | + | |
349 | + ''' | |
350 | + check whether the process of the PID file has running. | |
351 | + return 0 > : process is running. | |
352 | + SYSTEM_ERROR : system error occurred. | |
353 | + NOTRUNNING : process is NOT running. | |
354 | + ''' | |
355 | + def is_running(self, pid, cmdline): | |
356 | + try: | |
357 | + os.kill(pid, 0) | |
358 | + except Exception, (errNo, strerror): | |
359 | + if errNo == ESRCH: | |
360 | + pm_log.debug("is_running: pm_logconv isn't running.") | |
361 | + return self.NOTRUNNING | |
362 | + else: | |
363 | + pm_log.error("is_running: kill(%d, 0) error occurred." % pid) | |
364 | + pm_log.debug("is_running: kill(%d, 0) error occurred. [%s]" | |
365 | + % (pid, strerror)) | |
366 | + return self.SYSTEM_ERROR | |
367 | + | |
368 | + # check to make sure pid hasn't been reused by another process. | |
369 | + try: | |
370 | + proc_path = "/proc/%d/cmdline" % pid | |
371 | + f = open(proc_path, 'r') | |
372 | + cmdline_now = f.readline().replace('\0', ' ').strip() | |
373 | + f.close() | |
374 | + | |
375 | + pm_log.debug("is_running: tracked[%s], /proc/%d/cmdline[%s]" | |
376 | + % (cmdline, pid, cmdline_now)) | |
377 | + if cmdline != cmdline_now: | |
378 | + return self.NOTRUNNING | |
379 | + except Exception, strerror: | |
380 | + pm_log.error("is_running: couldn't read from '%s'." % proc_path) | |
381 | + pm_log.debug("is_running: couldn't read from '%s'. %s" | |
382 | + % (proc_path, strerror)) | |
383 | + return self.SYSTEM_ERROR | |
384 | + return pid | |
385 | + | |
386 | + ''' | |
387 | + read PID file. | |
388 | + return 0 > : process is running. return running process's PID. | |
389 | + SYSTEM_ERROR : system error occurred. | |
390 | + FILE_NOTEXIST : PID file doesn't exist. | |
391 | + FILE_INVALID : PID file is broken... | |
392 | + NOTRUNNING : succeeded. process is NOT running. | |
393 | + ''' | |
394 | + def read(self): | |
395 | + try: | |
396 | + if os.path.exists(self.path): | |
397 | + f = open(self.path, 'r') | |
398 | + pid = f.readline().strip() | |
399 | + cmdline = f.readline().strip('\n') | |
400 | + f.close() | |
401 | + | |
402 | + if pid.isdigit() and int(pid) != os.getpid(): | |
403 | + return self.is_running(int(pid), cmdline) | |
404 | + else: | |
405 | + pm_log.warn("PIDFile.read: PID file is screwed up.") | |
406 | + return self.FILE_INVALID | |
407 | + else: | |
408 | + pm_log.info("PIDFile.read: PID file doesn't exist.") | |
409 | + return self.FILE_NOTEXIST | |
410 | + except Exception, strerror: | |
411 | + pm_log.error("PIDFile.read: I/O error occurred.") | |
412 | + pm_log.debug("PIDFile.read: I/O error occurred. [%s]" % strerror) | |
413 | + return self.SYSTEM_ERROR | |
414 | + | |
415 | + ''' | |
416 | + lock PID file. | |
417 | + return 0 : succeeded. | |
418 | + 0 > : return already running process's PID. | |
419 | + SYSTEM_ERROR : system error occurred. | |
420 | + ''' | |
421 | + def lock(self): | |
422 | + try: | |
423 | + ret = self.read() | |
424 | + if ret > 0 or ret == self.SYSTEM_ERROR: | |
425 | + return ret | |
426 | + elif ret == self.FILE_NOTEXIST: | |
427 | + pass | |
428 | + elif ret == self.FILE_INVALID or ret == self.NOTRUNNING: | |
429 | + os.remove(self.path) | |
430 | + else: | |
431 | + return self.SYSTEM_ERROR | |
432 | + except Exception, strerror: | |
433 | + pm_log.error("PIDFile.lock: I/O error occurred.") | |
434 | + pm_log.debug("PIDFile.lock: I/O error occurred. [%s]" % strerror) | |
435 | + return self.SYSTEM_ERROR | |
436 | + | |
437 | + try: | |
438 | + pid = os.getpid() | |
439 | + f = open("/proc/%d/cmdline" % pid, 'r') | |
440 | + cmdline = f.readline().replace('\0', ' ').strip() | |
441 | + f.close() | |
442 | + | |
443 | + tfile = ("%s.%d" % (self.path, pid)) | |
444 | + f = open(tfile, 'w') | |
445 | + f.write("%d\n%s\n" % (pid, cmdline)) | |
446 | + f.close() | |
447 | + | |
448 | + os.link(tfile, self.path) | |
449 | + nlink = os.stat(tfile)[ST_NLINK] | |
450 | + os.remove(tfile) | |
451 | + except Exception, strerror: | |
452 | + pm_log.error("PIDFile.lock: I/O error occurred.") | |
453 | + pm_log.debug("PIDFile.lock: I/O error occurred. [%s]" % strerror) | |
454 | + | |
455 | + try: | |
456 | + f.close() | |
457 | + os.remove(tfile) | |
458 | + except: | |
459 | + pass | |
460 | + return self.SYSTEM_ERROR | |
461 | + | |
462 | + if nlink < 2: | |
463 | + # somehow, it didn't get through - NFS trouble? | |
464 | + return self.SYSTEM_ERROR | |
465 | + return 0 | |
466 | + | |
467 | +class ConvertStatus: | |
468 | + def __init__(self): | |
469 | + self.ino = 0 | |
470 | + self.offset = 0 | |
471 | + self.FAILURE_OCCURRED = False | |
472 | + self.IN_CALC = False | |
473 | + self.ACTRSC_MOVE = False | |
474 | + self.IN_FO_PROCESS = False | |
475 | + self.timedoutRscopSet = set() | |
476 | + self.shutNodeSet = set() | |
477 | + | |
478 | +cstat = ConvertStatus() | |
479 | + | |
480 | +class StatusFile: | |
481 | + def __init__(self, path): | |
482 | + self.path = path | |
483 | + self.w_ino = 0 | |
484 | + self.w_offset = 0 | |
485 | + self.in_calc = False | |
486 | + | |
487 | + ''' | |
488 | + read from status(read position of ha-log and status of convert) file. | |
489 | + ''' | |
490 | + def read(self): | |
491 | + try: | |
492 | + if os.path.exists(self.path): | |
493 | + f = os.open(self.path, os.O_RDONLY) | |
494 | + c = pickle.loads(os.read(f, os.stat(self.path)[ST_SIZE])) | |
495 | + os.close(f) | |
496 | + cstat.ino = self.w_ino = c.ino | |
497 | + cstat.offset = self.w_offset = c.offset | |
498 | + cstat.FAILURE_OCCURRED = c.FAILURE_OCCURRED | |
499 | + cstat.IN_CALC = self.in_calc = c.IN_CALC | |
500 | + cstat.ACTRSC_MOVE = c.ACTRSC_MOVE | |
501 | + cstat.IN_FO_PROCESS = c.IN_FO_PROCESS | |
502 | + cstat.timedoutRscopSet = c.timedoutRscopSet | |
503 | + cstat.shutNodeSet = c.shutNodeSet | |
504 | + else: | |
505 | + pm_log.info("StatusFile.read: status file doesn't exist.") | |
506 | + self.clear_cstat() | |
507 | + pm_log.debug("StatusFile.read: [%d:%d], FAIL[%s], IN_CALC[%s], "\ | |
508 | + "RSC_MOVE[%s], IN_FO[%s], Rscop%s, Node%s" % | |
509 | + (cstat.ino, cstat.offset, cstat.FAILURE_OCCURRED, | |
510 | + cstat.IN_CALC, cstat.ACTRSC_MOVE, cstat.IN_FO_PROCESS, | |
511 | + list(cstat.timedoutRscopSet), list(cstat.shutNodeSet))) | |
512 | + return True | |
513 | + except Exception, strerror: | |
514 | + pm_log.error("StatusFile.read: I/O error occurred.") | |
515 | + pm_log.debug("StatusFile.read: I/O error occurred. [%s]" % strerror) | |
516 | + self.clear_cstat() | |
517 | + return False | |
518 | + | |
519 | + ''' | |
520 | + write to status(reading ha-log's position and status of convert) file. | |
521 | + ''' | |
522 | + def write(self): | |
523 | + if cstat.IN_CALC: | |
524 | + if self.in_calc: | |
525 | + return True | |
526 | + self.in_calc = True | |
527 | + else: | |
528 | + self.in_calc = False | |
529 | + self.w_ino = cstat.ino | |
530 | + self.w_offset = cstat.offset | |
531 | + | |
532 | + try: | |
533 | + # current implementation writes to the statfile with os.write(). | |
534 | + # since between built-in function write() and close(), file becomes empty. | |
535 | + f = os.open(self.path, os.O_WRONLY | os.O_CREAT, S_IRUSR | S_IWUSR) | |
536 | + l = os.write(f, pickle.dumps(cstat, pickle.HIGHEST_PROTOCOL)) | |
537 | + os.ftruncate(f, l) | |
538 | + os.close(f) | |
539 | + pm_log.debug("StatusFile.write: [%d:%d], FAIL[%s], IN_CALC[%s], "\ | |
540 | + "RSC_MOVE[%s], IN_FO[%s], Rscop%s, Node%s" % | |
541 | + (cstat.ino, cstat.offset, cstat.FAILURE_OCCURRED, | |
542 | + cstat.IN_CALC, cstat.ACTRSC_MOVE, cstat.IN_FO_PROCESS, | |
543 | + list(cstat.timedoutRscopSet), list(cstat.shutNodeSet))) | |
544 | + return True | |
545 | + except Exception, strerror: | |
546 | + pm_log.error("StatusFile.write: I/O error occurred.") | |
547 | + pm_log.debug("StatusFile.write: I/O error occurred. [%s]" % strerror) | |
548 | + return False | |
549 | + | |
550 | + def clear_cstat(self): | |
551 | + global cstat | |
552 | + pm_log.debug("clear_cstat: called.") | |
553 | + cstat = ConvertStatus() | |
554 | + self.w_ino = cstat.ino | |
555 | + self.w_offset = cstat.offset | |
556 | + self.in_calc = cstat.IN_CALC | |
557 | + return | |
558 | + | |
559 | +statfile = None | |
560 | + | |
561 | +class ParseConfigFile: | |
562 | + ''' | |
563 | + Initialization to parse config file. | |
564 | + Open the config file. Its fd should be close in __del__(). | |
565 | + ''' | |
566 | + def __init__(self, config_file): | |
567 | + self.SEC_SETTINGS = "Settings" | |
568 | + self.OPT_HA_LOG_PATH = "ha_log_path" | |
569 | + self.OPT_HACF_PATH = "hacf_path" | |
570 | + self.OPT_OUTPUT_PATH = "output_path" | |
571 | + self.OPT_DATEFORMAT = "syslogformat" | |
572 | + self.OPT_HOSTCACHE = "hostcache_path" | |
573 | + self.OPT_MANAGE_ATTR = "attribute" | |
574 | + self.OPT_PATTERN = "pattern" | |
575 | + self.OPT_RESET_INTERVAL = "reset_interval" | |
576 | + self.OPT_FUNCNAME = "func" | |
577 | + self.OPT_LOGLEVEL = "loglevel" | |
578 | + self.OPT_FOTRIGGER = "fotrigger" | |
579 | + self.OPT_IGNOREMSG = "ignoremsg" | |
580 | + | |
581 | + self.OPT_LOGFACILITY = "logconv_logfacility" | |
582 | + self.logfacility = None | |
583 | + | |
584 | + self.OPT_ACTRSC = "act_rsc" | |
585 | + | |
586 | + self.fp = None | |
587 | + self.cf = ConfigParser.RawConfigParser() | |
588 | + # open the config file to read. | |
589 | + if not os.path.exists(config_file): | |
590 | + pm_log.error("ParseConfigFile.__init__(): " + | |
591 | + "config file [%s] does not exist." % (config_file)) | |
592 | + #__init__ should return None... | |
593 | + sys.exit(1) | |
594 | + try: | |
595 | + self.fp = open(config_file) | |
596 | + self.cf.readfp(self.fp) | |
597 | + except Exception, strerror: | |
598 | + pm_log.error("ParseConfigFile.__init__(): " + | |
599 | + "failed to read config file [%s]." % (config_file)) | |
600 | + pm_log.debug("ParseConfigFile.__init__(): %s" % (strerror)) | |
601 | + #__init__ should return None... | |
602 | + sys.exit(1) | |
603 | + | |
604 | + def __del__(self): | |
605 | + if self.fp is not None: | |
606 | + self.fp.close() | |
607 | + | |
608 | + def get_optval(self, secname, optname): | |
609 | + optval = None | |
610 | + try: | |
611 | + optval = self.cf.get(secname, optname) | |
612 | + except Exception, strerror: | |
613 | + pm_log.warn("get_optval(): " + | |
614 | + "failed to get value of \"%s\" in [%s] section. " % | |
615 | + (optname, secname)) | |
616 | + pm_log.debug("get_optval(): %s" % (strerror)) | |
617 | + return None | |
618 | + | |
619 | + if optval == "": | |
620 | + pm_log.warn("get_optval(): " + | |
621 | + "the value of \"%s\" in [%s] section is empty. " % | |
622 | + (optname, secname)) | |
623 | + return None | |
624 | + return optval | |
625 | + | |
626 | + ''' | |
627 | + Parse [Settings] section. | |
628 | + return 0 : succeeded. | |
629 | + 0 > : error occurs. | |
630 | + ''' | |
631 | + def parse_basic_settings(self): | |
632 | + global HA_LOGFILE | |
633 | + global HACFFILE | |
634 | + global OUTPUTFILE | |
635 | + global SYSLOGFORMAT | |
636 | + global HOSTCACHE | |
637 | + global RESET_INTERVAL | |
638 | + global attrRuleList | |
639 | + global actRscList | |
640 | + | |
641 | + # Get all options in the section. | |
642 | + try: | |
643 | + setting_opts = self.cf.options(self.SEC_SETTINGS) | |
644 | + except: | |
645 | + pm_log.warn("parse_basic_settings(): " + | |
646 | + "[%s] section does not exist. " % (self.SEC_SETTINGS)) | |
647 | + return (-1) | |
648 | + | |
649 | + for optname in setting_opts: | |
650 | + optval = self.get_optval(self.SEC_SETTINGS, optname) | |
651 | + if not optval: | |
652 | + pm_log.warn("parse_basic_settings(): " + | |
653 | + "Ignore the setting of \"%s\"." % (optname)) | |
654 | + continue # To the next option in [Settings]. | |
655 | + | |
656 | + if optname == self.OPT_HA_LOG_PATH: | |
657 | + HA_LOGFILE = optval | |
658 | + elif optname == self.OPT_HACF_PATH: | |
659 | + HACFFILE = optval | |
660 | + elif optname == self.OPT_OUTPUT_PATH: | |
661 | + OUTPUTFILE = optval | |
662 | + elif optname == self.OPT_DATEFORMAT: | |
663 | + if optval.lower() == "true": | |
664 | + SYSLOGFORMAT = True | |
665 | + elif optval.lower() == "false": | |
666 | + SYSLOGFORMAT = False | |
667 | + else: | |
668 | + pm_log.warn("parse_basic_settings(): " + | |
669 | + "the value of \"%s\" is invalid. " % (optname) + | |
670 | + "Ignore the setting.") | |
671 | + elif optname == self.OPT_HOSTCACHE: | |
672 | + HOSTCACHE = optval | |
673 | + elif optname == self.OPT_RESET_INTERVAL: | |
674 | + try: | |
675 | + tmpval = int(optval) | |
676 | + # 1 to 32bit integer max value | |
677 | + if tmpval > 0 and tmpval <= 2147483647: | |
678 | + RESET_INTERVAL = tmpval | |
679 | + else: | |
680 | + raise | |
681 | + except: | |
682 | + pm_log.warn("parse_basic_settings(): " + | |
683 | + "the value of \"%s\" is invalid. " % (optname) + | |
684 | + "set an default value(60).") | |
685 | + elif optname.startswith(self.OPT_MANAGE_ATTR): | |
686 | + attrRule = optval.split(',') | |
687 | + if len(attrRule) != 3: | |
688 | + pm_log.warn("parse_basic_settings(): " + | |
689 | + "the format of \"%s\" is invalid. " % (optname) + | |
690 | + "Ignore the setting.") | |
691 | + continue # To the next option in [Settings]. | |
692 | + (attrname, op, attrval) = tuple(attrRule) | |
693 | + attrname = attrname.strip() | |
694 | + op = op.strip() | |
695 | + attrval = attrval.strip() | |
696 | + if attrname == "" or op == "" or attrval == "": | |
697 | + pm_log.warn("parse_basic_settings(): " + | |
698 | + "the value of \"%s\" is invalid. " % (optname) + | |
699 | + "Ignore the setting.") | |
700 | + continue # To the next option in [Settings]. | |
701 | + | |
702 | + ''' | |
703 | + op string should be [lt|gt|lte|gte|eq|ne] in cib.xml. | |
704 | + However, with operator module of Python, | |
705 | + "lte" is expressed "le", and "gte" is "ge". | |
706 | + Here, replace op string to use it as function name. | |
707 | + ''' | |
708 | + opList = ["lt", "gt", "le", "ge", "eq", "ne"] | |
709 | + opmatch = False | |
710 | + for opstr in opList: | |
711 | + if op == opstr: | |
712 | + opmatch = True | |
713 | + if not opmatch: | |
714 | + if op == "lte": | |
715 | + op = "le" | |
716 | + elif op == "gte": | |
717 | + op = "ge" | |
718 | + else: | |
719 | + pm_log.warn("parse_basic_settings(): " + | |
720 | + "operation \"%s\" (in \"%s\") is invalid. " % | |
721 | + (op, optname) + | |
722 | + "Ignore the setting.") | |
723 | + continue # To the next option in [Settings]. | |
724 | + | |
725 | + attrRule = [attrname, op, attrval] | |
726 | + attrRuleList.append(attrRule) | |
727 | + elif optname == self.OPT_LOGFACILITY: | |
728 | + if LogconvLog.facility_map.has_key(optval.lower()): | |
729 | + self.logfacility = LogconvLog.facility_map[optval.lower()] | |
730 | + else: | |
731 | + pm_log.warn("parse_basic_settings(): " + | |
732 | + "the value of \"%s\" is invalid. " % (optname) + | |
733 | + "Ignore the setting.") | |
734 | + elif optname == self.OPT_ACTRSC: | |
735 | + for rstr in optval.split(','): | |
736 | + rstr = rstr.strip() | |
737 | + if rstr != "": | |
738 | + if rstr in actRscList: | |
739 | + pm_log.warn("parse_basic_settings(): " + | |
740 | + "resource id \"%s\" is written redundantly. " % | |
741 | + (rstr) + | |
742 | + "Ignore the redundancy.") | |
743 | + else: | |
744 | + actRscList.append(rstr) | |
745 | + # __if optname == xxx: | |
746 | + # __for optname in setting_opts: | |
747 | + | |
748 | + return 0 | |
749 | + | |
750 | + ''' | |
751 | + Parse sections for log-convertion. | |
752 | + return 0 : succeeded. | |
753 | + 0 > : error occurs. | |
754 | + ''' | |
755 | + def parse_logconv_settings(self): | |
756 | + logconv_sections = self.cf.sections() | |
757 | + try: | |
758 | + logconv_sections.remove(self.SEC_SETTINGS) | |
759 | + except: | |
760 | + pm_log.warn("parse_logconv_settings(): " + | |
761 | + "[%s] section does not exist. " % (self.SEC_SETTINGS)) | |
762 | + | |
763 | + # | |
764 | + # Parse each section. | |
765 | + # | |
766 | + for secname in logconv_sections: | |
767 | + # Get all options in the section. | |
768 | + try: | |
769 | + logconv_opts = self.cf.options(secname) | |
770 | + except: | |
771 | + pm_log.warn("parse_logconv_settings(): " + | |
772 | + "[%s] section does not exist. " % (secname) + | |
773 | + "Ignore this section.") | |
774 | + continue #To the next section. | |
775 | + | |
776 | + lconvfrm = LogconvFrame() | |
777 | + lconvfrm.rulename = secname | |
778 | + for optname in logconv_opts: | |
779 | + optval = self.get_optval(secname, optname) | |
780 | + if not optval: | |
781 | + pm_log.warn("parse_logconv_settings(): " + | |
782 | + "Ignore the setting of \"%s\"." % (optname)) | |
783 | + continue # To the next option. | |
784 | + | |
785 | + if optname == self.OPT_FUNCNAME: | |
786 | + defined = hasattr(LogConvertFuncs, optval) | |
787 | + if defined == False: | |
788 | + pm_log.error("parse_logconv_settings(): " + | |
789 | + "function %s() specified in " % (optval) + | |
790 | + "[%s] section is not defined." % (secname)) | |
791 | + break # Break off parsing this section. | |
792 | + lconvfrm.func = optval | |
793 | + elif optname == self.OPT_LOGLEVEL: | |
794 | + lconvfrm.loglevel = optval | |
795 | + elif optname == self.OPT_FOTRIGGER: | |
796 | + lconvfrm.fotrigger = optval | |
797 | + elif optname == self.OPT_IGNOREMSG: | |
798 | + if optval.lower() == "true": | |
799 | + lconvfrm.ignoremsg = True | |
800 | + elif optval.lower() == "false": | |
801 | + lconvfrm.ignoremsg = False | |
802 | + else: | |
803 | + pm_log.warn("parse_logconv_settings(): " + | |
804 | + "the value of \"%s\" is invalid. " % (optname) + | |
805 | + "Ignore the setting.") | |
806 | + elif optname.startswith(self.OPT_PATTERN): | |
807 | + pstrList = list() | |
808 | + tmpList = list() | |
809 | + pstrList = self.parse_ptn_strings(optval) | |
810 | + if len(pstrList) <= 0: | |
811 | + pm_log.error("parse_logconv_settings(): " + | |
812 | + "match pattern string of \"%s\" is empty." % | |
813 | + (optname)) | |
814 | + break # Break off parsing this section. | |
815 | + tmpList = self.compile_ptn_strings(pstrList) | |
816 | + if tmpList is None: | |
817 | + pm_log.error("parse_logconv_settings(): " + | |
818 | + "failed to compile the pattern string in \"%s\"." % | |
819 | + (optname)) | |
820 | + break # Break off parsing this section. | |
821 | + lconvfrm.ptnList.append(tmpList) | |
822 | + else: | |
823 | + pm_log.debug("parse_logconv_settings(): " + | |
824 | + "\"%s\" is not valid option string." % (optname) + | |
825 | + "Ignore the setting.") | |
826 | + # __for optname in logconv_opts: | |
827 | + | |
828 | + if len(lconvfrm.ptnList) == 0 or lconvfrm.func == None: | |
829 | + pm_log.warn("parse_logconv_settings(): " + | |
830 | + "\"%s\" and \"%s*\" setting is required in section [%s]. " % | |
831 | + (self.OPT_FUNCNAME, self.OPT_PATTERN, secname) + | |
832 | + "Ignore the section.") | |
833 | + del lconvfrm | |
834 | + else: | |
835 | + lconvRuleList.append(lconvfrm) | |
836 | + #To the next section. | |
837 | + #__for secname in logconv_sections: | |
838 | + return 0 | |
839 | + | |
840 | + ''' | |
841 | + Parse match pattern strings (written in a line) and | |
842 | + make a list of them. | |
843 | + Strings are set apart by ','. | |
844 | + arg1 : match pattern strings. | |
845 | + return: a list of pattern strings. | |
846 | + ''' | |
847 | + def parse_ptn_strings(self, pstrings): | |
848 | + pstrList = list() | |
849 | + for pstr in pstrings.split(','): | |
850 | + pstr = pstr.strip() | |
851 | + if pstr != "": | |
852 | + pstrList.append(pstr) | |
853 | + return pstrList | |
854 | + | |
855 | + ''' | |
856 | + Compile each pattern string. | |
857 | + arg1 : a list of pattern strings (made with parse_ptn_strings()). | |
858 | + return: a list of compiled objects. | |
859 | + ''' | |
860 | + def compile_ptn_strings(self, pstrList): | |
861 | + compiledList = list() | |
862 | + for pstr in pstrList: | |
863 | + #If it is a negative pattern, compile is as so. | |
864 | + if pstr.startswith('!'): | |
865 | + pstr = ur"^(?!.*" + pstr.lstrip('!') + ur").*$" | |
866 | + compiledList.append(re.compile(pstr)) | |
867 | + return compiledList | |
868 | + | |
869 | +''' | |
870 | + Class to hold rules to convert log message. | |
871 | +''' | |
872 | +class LogconvFrame: | |
873 | + ''' | |
874 | + rulename : convert rule name. set section name. | |
875 | + ptnList : list of compiled object list of match patterns | |
876 | + (list of lists). | |
877 | + func : function name to convert log message which matches the rule. | |
878 | + loglevel : log level of converted log. | |
879 | + fotrigger: the log message is trigger of F/O or not. [True|False] | |
880 | + ignoremsg: wheter set the time of output log message for auto reset | |
881 | + function. [True|False] | |
882 | + ''' | |
883 | + def __init__(self, rulename=None, ptnList=None, func=None, loglevel=None, | |
884 | + fotrigger=False, ignoremsg=False): | |
885 | + self.rulename = rulename | |
886 | + self.ptnList = ptnList | |
887 | + self.ptnList = list() | |
888 | + self.func = func | |
889 | + self.loglevel = loglevel | |
890 | + self.fotrigger = fotrigger | |
891 | + self.ignoremsg = ignoremsg | |
892 | + | |
893 | + ''' | |
894 | + Only for debug. | |
895 | + ''' | |
896 | + def print_frmval(self): | |
897 | + print self.rulename | |
898 | + print self.ptnList | |
899 | + print self.func | |
900 | + print self.loglevel | |
901 | + print self.fotrigger | |
902 | + print self.ignoremsg | |
903 | + | |
904 | +class LogConvert: | |
905 | + PIDFILE = "/var/run/pm_logconv.pid" | |
906 | + STATFILE = "/var/run/pm_logconv.stat" | |
907 | + | |
908 | + def __init__(self): | |
909 | + self.daemonize = False | |
910 | + self.stop_logconv = False | |
911 | + self.ask_status = False | |
912 | + self.is_continue = False | |
913 | + self.is_present = False | |
914 | + self.configfile = CONFIGFILE | |
915 | + now = datetime.datetime.now() | |
916 | + self.last_logoutput_t = now | |
917 | + self.last_reset_t = now | |
918 | + | |
919 | + # Get obj of functions to convert log. | |
920 | + self.funcs = LogConvertFuncs() | |
921 | + signal.signal(signal.SIGUSR1, self.check_dc_and_reset) | |
922 | + | |
923 | + if not self.parse_args(): | |
924 | + sys.exit(1) | |
925 | + | |
926 | + pm_log.debug("option: daemon[%d], stop[%d], status[%d], continue[%d], " \ | |
927 | + "present[%d], config[%s], facility[%s]" % (self.daemonize, self.stop_logconv, | |
928 | + self.ask_status, self.is_continue, self.is_present, self.configfile, pm_log.facilitystr)) | |
929 | + if not self.stop_logconv and not self.ask_status: | |
930 | + pm_log.debug("option: target[%s], output[%s], syslogfmt[%s], ha.cf[%s], hcache[%s], reset_interval[%d], actrsc%s" % (HA_LOGFILE, OUTPUTFILE, SYSLOGFORMAT, HACFFILE, HOSTCACHE, RESET_INTERVAL, actRscList)) | |
931 | + | |
932 | + ''' | |
933 | + PID and status(read position of ha-log and status of convert) file path | |
934 | + is set as read-only. | |
935 | + ''' | |
936 | + def __setattr__(self, name, val): | |
937 | + if name != "PIDFILE" and name != "STATFILE": | |
938 | + self.__dict__[name] = val | |
939 | + | |
940 | + ''' | |
941 | + parse options - command line option and configure file. | |
942 | + ''' | |
943 | + def parse_args(self): | |
944 | + myusage = "\n%prog [options]" | |
945 | + psr = OptionParser(usage=myusage) | |
946 | + | |
947 | + psr.add_option("-d", action="store_true", dest="daemonize", | |
948 | + default=False, help="make the program a daemon") | |
949 | + psr.add_option("-k", action="store_true", dest="stop_logconv", | |
950 | + default=False, help="stop the pm_logconv if it is already running") | |
951 | + psr.add_option("-s", action="store_true", dest="ask_status", | |
952 | + default=False, help="return pm_logconv status") | |
953 | + psr.add_option("-c", action="store_true", dest="is_continue", | |
954 | + default=False, help="start with a continuous mode (\"-p\" option is mutually exclusive)") | |
955 | + psr.add_option("-p", action="store_true", dest="is_present", | |
956 | + default=False, help="start with a present mode (\"-c\" option is mutually exclusive)") | |
957 | + psr.add_option("-f", dest="config_file", default=CONFIGFILE, | |
958 | + help="the specified configuration file is used") | |
959 | + psr.add_option("-v", "--version", action="callback", callback=print_version, | |
960 | + help="print out this program's version and exit") | |
961 | + | |
962 | + opts = psr.parse_args(sys.argv)[0] | |
963 | + | |
964 | + args = '' | |
965 | + for arg in sys.argv: | |
966 | + args = args + arg + ' ' | |
967 | + pm_log.info("starting... [%s]" % args[:len(args)-1]) | |
968 | + | |
969 | + self.daemonize = opts.daemonize | |
970 | + self.stop_logconv = opts.stop_logconv | |
971 | + self.ask_status = opts.ask_status | |
972 | + self.is_continue = opts.is_continue | |
973 | + self.is_present = opts.is_present | |
974 | + self.configfile = opts.config_file | |
975 | + | |
976 | + ''' | |
977 | + Parse config file. | |
978 | + ''' | |
979 | + pcfobj = ParseConfigFile(self.configfile) | |
980 | + # Parse pm_logconv's basic settings. | |
981 | + pcfobj.parse_basic_settings() | |
982 | + | |
983 | + if pcfobj.logfacility != None: | |
984 | + pm_log.set_facility(pcfobj.logfacility) | |
985 | + pm_log.info("starting... [%s]" % args[:len(args)-1]) | |
986 | + | |
987 | + # check command line option. | |
988 | + true_opts = 0 | |
989 | + for opt in (self.daemonize, self.stop_logconv, self.ask_status): | |
990 | + if opt: | |
991 | + true_opts = true_opts + 1 | |
992 | + if true_opts > 1: | |
993 | + pm_log.error("parse_args: option -d, -k, " \ | |
994 | + "and -s cannot be specified at the same time.") | |
995 | + return False | |
996 | + | |
997 | + if (self.stop_logconv or self.ask_status) and self.is_continue: | |
998 | + pm_log.error("parse_args: option -k and -s cannot be specified with -c.") | |
999 | + return False | |
1000 | + | |
1001 | + if (self.stop_logconv or self.ask_status) and self.is_present: | |
1002 | + pm_log.error("parse_args: option -k and -s cannot be specified with -p.") | |
1003 | + return False | |
1004 | + | |
1005 | + if self.is_continue and self.is_present: | |
1006 | + pm_log.error("parse_args: options -c and -p are mutually exclusive.") | |
1007 | + return False | |
1008 | + | |
1009 | + if not self.is_continue and not self.is_present: | |
1010 | + # check Heartbeat active or dead. | |
1011 | + ret = self.funcs.is_heartbeat() | |
1012 | + if ret == None: | |
1013 | + return False | |
1014 | + elif ret: | |
1015 | + self.is_continue = True | |
1016 | + else: | |
1017 | + self.is_present = True | |
1018 | + | |
1019 | + # check file path. isn't the same path specified? | |
1020 | + try: | |
1021 | + fileList = list() | |
1022 | + if not self.stop_logconv and not self.ask_status: | |
1023 | + fileList.append((OUTPUTFILE, "output file for converted message")) | |
1024 | + fileList.append((HA_LOGFILE, "Pacemaker and Heartbeat log file")) | |
1025 | + fileList.append((HACFFILE, "Heartbeat's configuration file")) | |
1026 | + fileList.append((HOSTCACHE, "Heartbeat's hostcache file")) | |
1027 | + fileList.append((self.STATFILE, | |
1028 | + "pm_logconv's status file (can't specify by user)")) | |
1029 | + fileList.append((self.configfile, "pm_logconv's configuration file")) | |
1030 | + fileList.append((self.PIDFILE, | |
1031 | + "pm_logconv's PID file (can't specify by user)")) | |
1032 | + | |
1033 | + for i in range(0, len(fileList) - 1): | |
1034 | + for j in range(i + 1, len(fileList)): | |
1035 | + pathi, desci = tuple(fileList[i]) | |
1036 | + pathj, descj = tuple(fileList[j]) | |
1037 | + pm_log.debug("path check: [%s] [%s]" | |
1038 | + % (os.path.realpath(pathi), os.path.realpath(pathj))) | |
1039 | + if os.path.realpath(pathi) == os.path.realpath(pathj): | |
1040 | + pm_log.error("parse_args: specified same path [%s] " \ | |
1041 | + "as \"%s\" and \"%s\"." % (pathi, desci, descj)) | |
1042 | + return False | |
1043 | + except Exception, strerror: | |
1044 | + pm_log.error("checking path: error occurred.") | |
1045 | + pm_log.debug("checking path: error occurred. [%s]" % strerror) | |
1046 | + return False | |
1047 | + | |
1048 | + if not self.stop_logconv and not self.ask_status: | |
1049 | + # Parse settings for log convertion. | |
1050 | + pcfobj.parse_logconv_settings() | |
1051 | + return True | |
1052 | + | |
1053 | + ''' | |
1054 | + run in the background as a daemon, if option -d is specified. | |
1055 | + and create PID file. | |
1056 | + ''' | |
1057 | + def make_daemon(self, pidfile): | |
1058 | + if self.daemonize: | |
1059 | + try: | |
1060 | + pid = os.fork() | |
1061 | + if pid > 0: | |
1062 | + sys.exit(0) | |
1063 | + pm_log.debug("make_daemon: fork() #1 succeeded. pid[%d]" % os.getpid()) | |
1064 | + pm_log.pid = os.getpid() | |
1065 | + except OSError, strerror: | |
1066 | + pm_log.error("make_daemon: fork() #1 error occurred.") | |
1067 | + pm_log.debug("make_daemon: fork() #1 error occurred. [%s]" % strerror) | |
1068 | + sys.exit(1) | |
1069 | + | |
1070 | + try: | |
1071 | + os.setsid() | |
1072 | + except OSError, strerror: | |
1073 | + pm_log.error("make_daemon: setsid() error occurred.") | |
1074 | + pm_log.debug("make_daemon: setsid() error occurred. [%s]" % strerror) | |
1075 | + sys.exit(1) | |
1076 | + | |
1077 | + try: | |
1078 | + pid = os.fork() | |
1079 | + if pid > 0: | |
1080 | + sys.exit(0) | |
1081 | + pm_log.debug("make_daemon: fork() #2 succeeded. pid[%d]" % os.getpid()) | |
1082 | + pm_log.pid = os.getpid() | |
1083 | + except OSError, strerror: | |
1084 | + pm_log.error("make_daemon: fork() #2 error occurred.") | |
1085 | + pm_log.debug("make_daemon: fork() #2 error occurred. [%s]" % strerror) | |
1086 | + sys.exit(1) | |
1087 | + | |
1088 | + ret = pidfile.lock() | |
1089 | + if ret > 0: | |
1090 | + print >> sys.stderr, "pm_logconv: already running [pid %d]" % ret | |
1091 | + pm_log.info("make_daemon: pm_logconv is already running [pid %d]" % ret) | |
1092 | + sys.exit(0) | |
1093 | + elif ret == pidfile.SYSTEM_ERROR: | |
1094 | + pm_log.info("make_daemon: couldn't start pm_logconv.") | |
1095 | + sys.exit(1) | |
1096 | + | |
1097 | + if self.daemonize: | |
1098 | + try: | |
1099 | + os.chdir("/") | |
1100 | + os.umask(0) | |
1101 | + sys.stdin.close(); sys.stdin = None | |
1102 | + sys.stdout.close(); sys.stdout = None | |
1103 | + sys.stderr.close(); sys.stderr = None | |
1104 | + os.close(0) | |
1105 | + os.close(1) | |
1106 | + os.close(2) | |
1107 | + except: | |
1108 | + pass | |
1109 | + return True | |
1110 | + | |
1111 | + ''' | |
1112 | + stop running pm_logconv. | |
1113 | + return 0 : succeeded. or already stopped. | |
1114 | + 1 : error occurred. it may not have stopped... | |
1115 | + ''' | |
1116 | + def logconv_stop(self, pidfile): | |
1117 | + logconv_pid = pidfile.read() | |
1118 | + if logconv_pid <= 0: | |
1119 | + if logconv_pid == pidfile.SYSTEM_ERROR: | |
1120 | + pm_log.info("logconv_stop: couldn't try to stop pm_logconv.") | |
1121 | + return 1 | |
1122 | + elif logconv_pid == pidfile.FILE_NOTEXIST: | |
1123 | + pm_log.info("logconv_stop: couldn't try to stop pm_logconv.") | |
1124 | + return 0 | |
1125 | + elif logconv_pid == pidfile.FILE_INVALID: | |
1126 | + pm_log.info("logconv_stop: couldn't try to stop pm_logconv.") | |
1127 | + return 1 | |
1128 | + elif logconv_pid == pidfile.NOTRUNNING: | |
1129 | + pm_log.info("logconv_stop: pm_logconv already stopped.") | |
1130 | + return 0 | |
1131 | + return 1 | |
1132 | + | |
1133 | + pm_log.info("logconv_stop: stopping pm_logconv with pid [%d]." % logconv_pid) | |
1134 | + try: | |
1135 | + os.kill(logconv_pid, signal.SIGTERM) | |
1136 | + | |
1137 | + # wait for the running pm_logconv to die. | |
1138 | + pm_log.info("logconv_stop: waiting for pid [%d] to exit." % logconv_pid) | |
1139 | + | |
1140 | + while 1: | |
1141 | + os.kill(logconv_pid, 0) | |
1142 | + time.sleep(1) | |
1143 | + except Exception, (errNo, strerror): | |
1144 | + if errNo != ESRCH: | |
1145 | + pm_log.warn("logconv_stop: pid %d not killed." % logconv_pid) | |
1146 | + pm_log.debug("logconv_stop: pid %d not killed. [%s]" | |
1147 | + % (logconv_pid, strerror)) | |
1148 | + return 1 | |
1149 | + else: | |
1150 | + pm_log.info("logconv_stop: pid %ld exited." % logconv_pid) | |
1151 | + return 0 | |
1152 | + | |
1153 | + ''' | |
1154 | + get file descriptor which matched the contents of the status file | |
1155 | + (read position of ha-log). | |
1156 | + ''' | |
1157 | + def get_fd(self, statfile): | |
1158 | + try: | |
1159 | + if self.is_continue: | |
1160 | + if statfile.read() and cstat.ino == 0: | |
1161 | + pm_log.error("get_fd: status file doesn't exist.") | |
1162 | + | |
1163 | + if cstat.ino > 0: | |
1164 | + if os.path.exists(HA_LOGFILE) and \ | |
1165 | + cstat.ino == os.stat(HA_LOGFILE)[ST_INO]: | |
1166 | + log = HA_LOGFILE | |
1167 | + else: | |
1168 | + # ha-log's inode didn't match, logrotate? | |
1169 | + # look for the file which inode matches. | |
1170 | + for log in glob.glob(HA_LOGFILE + "?*"): | |
1171 | + if cstat.ino == os.stat(log)[ST_INO]: | |
1172 | + break | |
1173 | + else: | |
1174 | + pm_log.warn("get_fd: Pacemaker and Heartbeat log" \ | |
1175 | + "(inode:%d) doesn't exist." % cstat.ino) | |
1176 | + log = None | |
1177 | + statfile.clear_cstat() | |
1178 | + | |
1179 | + if log != None: | |
1180 | + f = open(log, 'r') | |
1181 | + if os.fstat(f.fileno()).st_size >= cstat.offset: | |
1182 | + f.seek(cstat.offset) | |
1183 | + else: | |
1184 | + pm_log.warn("get_fd: there is possibility that " \ | |
1185 | + "Pacemaker and Heartbeat log was clear.") | |
1186 | + pm_log.debug("get_fd: reset offset, since " \ | |
1187 | + "offset[%d] > file size[%d]" | |
1188 | + % (cstat.offset, os.fstat(f.fileno()).st_size)) | |
1189 | + cstat.offset = 0 | |
1190 | + self.funcs.clear_status() | |
1191 | + pm_log.info("get_fd: target to convert [%s(inode:%d)]" | |
1192 | + % (log, cstat.ino)) | |
1193 | + return f | |
1194 | + | |
1195 | + if os.path.exists(HA_LOGFILE): | |
1196 | + f = open(HA_LOGFILE, 'r') | |
1197 | + if not self.is_continue: | |
1198 | + f.seek(os.fstat(f.fileno()).st_size) | |
1199 | + else: | |
1200 | + while not os.path.exists(HA_LOGFILE): | |
1201 | + if do_shutdown: | |
1202 | + return None | |
1203 | + time.sleep(1) | |
1204 | + f = open(HA_LOGFILE, 'r') | |
1205 | + pm_log.info("get_fd: target to convert [%s(inode:%d)]" | |
1206 | + % (HA_LOGFILE, os.fstat(f.fileno()).st_ino)) | |
1207 | + return f | |
1208 | + except Exception, strerror: | |
1209 | + pm_log.error("get_fd: I/O error occurred.") | |
1210 | + pm_log.debug("get_fd: I/O error occurred. [%s]" % strerror) | |
1211 | + statfile.clear_cstat() | |
1212 | + return None | |
1213 | + | |
1214 | + ''' | |
1215 | + get the Pacemaker and Heartbeat log path, when `logrotate` occurs. | |
1216 | + ''' | |
1217 | + def get_nextlog(self, ino, statfile): | |
1218 | + try: | |
1219 | + for log in glob.glob(HA_LOGFILE + "?*"): | |
1220 | + pm_log.debug("get_nextlog: searching previous target[%s(inode:%d)]" | |
1221 | + % (log, os.stat(log)[ST_INO])) | |
1222 | + if ino == os.stat(log)[ST_INO]: | |
1223 | + pm_log.debug("get_nextlog: searching.. found it[%s].size[%d]" | |
1224 | + % (log, os.stat(log)[ST_SIZE])) | |
1225 | + break | |
1226 | + else: | |
1227 | + pm_log.warn("get_nextlog: target(inode:%d) was lost. " \ | |
1228 | + "there is possibility that file was remove." % ino) | |
1229 | + statfile.clear_cstat() | |
1230 | + return None | |
1231 | + | |
1232 | + except Exception, strerror: | |
1233 | + pm_log.warn("get_nextlog: error occurred.") | |
1234 | + pm_log.debug("get_nextlog: error occurred. [%s]" % strerror) | |
1235 | + statfile.clear_cstat() | |
1236 | + return None | |
1237 | + | |
1238 | + ''' | |
1239 | + Check DC node is idle or not with crmadmin command. | |
1240 | + When DC is idle, crmadmin returns "S_IDLE" status. | |
1241 | + return: True -> DC is idle. | |
1242 | + False -> DC is not idle. | |
1243 | + None -> error occurs. | |
1244 | + cannot execute command or maybe during DC election. | |
1245 | + ''' | |
1246 | + def is_idle(self): | |
1247 | + # Connection timeout (ms). | |
1248 | + # crmadmin command's default value is 30sec. | |
1249 | + TIMEOUT = 30 * 1000 | |
1250 | + | |
1251 | + # Heartbeat status check | |
1252 | + if self.funcs.is_heartbeat() != True: | |
1253 | + return False | |
1254 | + | |
1255 | + # Get DC node name. | |
1256 | + options = ("-D -t %s" % (TIMEOUT)) | |
1257 | + (status, output) = \ | |
1258 | + self.funcs.exec_outside_cmd(CMD_CRMADMIN, options, False) | |
1259 | + if status == None: | |
1260 | + # Failed to exec command. | |
1261 | + pm_log.warn("is_idle(): failed to get DC node name.") | |
1262 | + return None | |
1263 | + if status != 0: | |
1264 | + # Maybe during DC election. | |
1265 | + return False | |
1266 | + try: | |
1267 | + dcnode = output.split()[-1] | |
1268 | + except: | |
1269 | + # Failed to parse output strings. | |
1270 | + pm_log.warn("is_idle(): failed to parse output strings." + | |
1271 | + "(DC node name)") | |
1272 | + return None | |
1273 | + | |
1274 | + # Get DC status. | |
1275 | + options = ("-S %s -t %s" % (dcnode, TIMEOUT)) | |
1276 | + (status, output) = \ | |
1277 | + self.funcs.exec_outside_cmd(CMD_CRMADMIN, options, False) | |
1278 | + if status == None: | |
1279 | + # Failed to exec command. | |
1280 | + pm_log.warn("is_idle(): failed to get DC node status.") | |
1281 | + return None | |
1282 | + if status != 0: | |
1283 | + # Maybe during DC election. | |
1284 | + return False | |
1285 | + try: | |
1286 | + dcstat = output.split()[-2] | |
1287 | + except: | |
1288 | + # Failed to parse output strings. | |
1289 | + pm_log.warn("is_idle(): failed to parse output strings." + | |
1290 | + "DC node status") | |
1291 | + return None | |
1292 | + if dcstat == "S_IDLE": | |
1293 | + return True | |
1294 | + return False | |
1295 | + | |
1296 | + ''' | |
1297 | + Reset log convert status when Pacemaker doesn't output any log message | |
1298 | + over RESET_INTERVAL sec. | |
1299 | + Before reset process, check whether DC node is idle or not. | |
1300 | + arg1 : signal number. for use this func as signal handler. | |
1301 | + arg2 : stac frame. for use this func as signal handler. | |
1302 | + return nothing. | |
1303 | + ''' | |
1304 | + def check_dc_and_reset(self, signum, frame): | |
1305 | + if signum == None: | |
1306 | + now = datetime.datetime.now() | |
1307 | + if ((self.last_logoutput_t + | |
1308 | + datetime.timedelta(seconds=RESET_INTERVAL)) > now) or \ | |
1309 | + ((self.last_reset_t + | |
1310 | + datetime.timedelta(seconds=RESET_INTERVAL)) > now): | |
1311 | + return | |
1312 | + if signum == None: | |
1313 | + self.last_reset_t = datetime.datetime.now() | |
1314 | + pm_log.debug("check_dc_and_reset(): try to reset log convert status.") | |
1315 | + self.funcs.debug_status() | |
1316 | + ret = self.is_idle() | |
1317 | + if ret == True: | |
1318 | + self.funcs.clear_status() | |
1319 | + pm_log.debug("check_dc_and_reset(): " + | |
1320 | + "reset log convert status complete.") | |
1321 | + if statfile: statfile.write() | |
1322 | + elif ret == False: | |
1323 | + pm_log.debug("check_dc_and_reset(): DC node is not idle. " + | |
1324 | + "Avoid to reset log convert status.") | |
1325 | + elif ret == None: | |
1326 | + pm_log.error("check_dc_and_reset(): failed to check DC status. " + | |
1327 | + "Avoid to reset log convert status.") | |
1328 | + return | |
1329 | + | |
1330 | + ''' | |
1331 | + Check a line of log message matched or not matched with each re-objects. | |
1332 | + NOTE: pattern strings which are written in a line (in a option which is | |
1333 | + named "pattern*") are treated as "AND condition". | |
1334 | + If one section has two or more options named "pattern*", | |
1335 | + these are treated as "OR condition". | |
1336 | + ex.) | |
1337 | + pattern1 = aa, bb | |
1338 | + pattern2 = cc, dd | |
1339 | + means | |
1340 | + "if (($0 ~ /aa/) && ($0 ~ /bb/) || ($0 ~ /cc/) && ($0 ~ /dd/))" | |
1341 | + True : matched | |
1342 | + False : not matched | |
1343 | + None : error occurs. | |
1344 | + ''' | |
1345 | + def is_matched(self, logline, lconvfrm): | |
1346 | + matched = False | |
1347 | + for ptnobjList in lconvfrm.ptnList: | |
1348 | + # Matching with each re-object which came from strings | |
1349 | + # written in a option "pattern*" | |
1350 | + matchcnt = 0 | |
1351 | + for ptnobj in ptnobjList: | |
1352 | + try: | |
1353 | + if ptnobj.search(logline) != None: | |
1354 | + matchcnt += 1 | |
1355 | + except Exception, strerror: | |
1356 | + # Error occurs. | |
1357 | + pm_log.debug("is_matched(): %s" % (strerror)) | |
1358 | + return None | |
1359 | + if matchcnt == len(ptnobjList): | |
1360 | + # If the log message matched with all object in a pattern line, | |
1361 | + # it is a target log message to convert. | |
1362 | + matched = True | |
1363 | + break | |
1364 | + # If not matched with objects in a pattern line, | |
1365 | + # continue to check with the next line. | |
1366 | + return matched | |
1367 | + | |
1368 | + ''' | |
1369 | + Check the log message is a target to convert or not | |
1370 | + with all rules which are specified in config file. | |
1371 | + and call specified function when a target log message appears. | |
1372 | + return nothing | |
1373 | + ''' | |
1374 | + def do_ptn_matching(self, logline): | |
1375 | + setdate = True | |
1376 | + for lconvfrm in lconvRuleList: | |
1377 | + matched = self.is_matched(logline, lconvfrm) | |
1378 | + if matched == True: | |
1379 | + logelm = LogElements() | |
1380 | + if logelm.parse_logmsg(logline, self.funcs) != 0: | |
1381 | + pm_log.error("do_ptn_matching(): " + | |
1382 | + "failed to parse log message. [%s]" % (logline)) | |
1383 | + # Set the time of output log message for auto reset. | |
1384 | + self.last_logoutput_t = datetime.datetime.now() | |
1385 | + return # Break off converting this log message. | |
1386 | + # Set original date string and log level. | |
1387 | + outputobj = OutputConvertedLog() | |
1388 | + outputobj.set_datestr(logelm.datestr) | |
1389 | + outputobj.set_orgloglevel(logelm.haloglevel) | |
1390 | + outputobj.set_orglogmsg(logelm.halogmsg) | |
1391 | + | |
1392 | + # Call specified function. | |
1393 | + try: | |
1394 | + pm_log.debug("do_ptn_matching(): execute %s()." % | |
1395 | + (lconvfrm.func)) | |
1396 | + ret = getattr(self.funcs, lconvfrm.func)(\ | |
1397 | + outputobj, logelm, lconvfrm) | |
1398 | + except Exception, strerror: | |
1399 | + pm_log.error("do_ptn_matching(): " + | |
1400 | + "failed to execute %s()." % (lconvfrm.func)) | |
1401 | + pm_log.debug("do_ptn_matching(): %s" % (strerror)) | |
1402 | + continue # To check next rule. | |
1403 | + | |
1404 | + if ret == CONV_OK: | |
1405 | + # convertion succeeded. | |
1406 | + # If the log is a trigger of FailOver, tell to funcs. | |
1407 | + if lconvfrm.fotrigger: | |
1408 | + cstat.FAILURE_OCCURRED = lconvfrm.fotrigger | |
1409 | + # FailOver pattern | |
1410 | + # resource failer + resource move | |
1411 | + # score failer + resource move | |
1412 | + # node failer + resource start | |
1413 | + # resource failer + resource stop | |
1414 | + # score failer + resource stop | |
1415 | + # node failer + resource stopped | |
1416 | + if \ | |
1417 | + (cstat.FAILURE_OCCURRED == FAIL_RSC and cstat.ACTRSC_MOVE == FAIL_MOVE) or \ | |
1418 | + (cstat.FAILURE_OCCURRED == FAIL_SCORE and cstat.ACTRSC_MOVE == FAIL_MOVE) or \ | |
1419 | + (cstat.FAILURE_OCCURRED == FAIL_NODE and cstat.ACTRSC_MOVE == FAIL_STR) or \ | |
1420 | + (cstat.FAILURE_OCCURRED == FAIL_RSC and cstat.ACTRSC_MOVE == FAIL_STP) or \ | |
1421 | + (cstat.FAILURE_OCCURRED == FAIL_SCORE and cstat.ACTRSC_MOVE == FAIL_STP) or \ | |
1422 | + (cstat.FAILURE_OCCURRED == FAIL_NODE and cstat.ACTRSC_MOVE == FAIL_STPD): | |
1423 | + self.funcs.detect_fo_start(outputobj) | |
1424 | + if lconvfrm.ignoremsg: | |
1425 | + setdate = False | |
1426 | + elif ret == CONV_SHUT_NODE: | |
1427 | + continue | |
1428 | + else: | |
1429 | + if ret == CONV_PARSE_ERROR: | |
1430 | + errmsg = ("%s(): " % (lconvfrm.func) + | |
1431 | + "failed to parse log message. [%s]" % | |
1432 | + (logelm.halogmsg)) | |
1433 | + elif ret == CONV_ITEM_EMPTY: | |
1434 | + errmsg = ("%s(): " % (lconvfrm.func) + | |
1435 | + "invalid log message format. [%s]" % | |
1436 | + (logelm.halogmsg)) | |
1437 | + elif ret == CONV_GETINFO_ERROR: | |
1438 | + errmsg = ("%s(): " % (lconvfrm.func) + | |
1439 | + "failed to get some information to output log. " + | |
1440 | + "[%s]" % (logelm.halogmsg)) | |
1441 | + else: | |
1442 | + errmsg = ("%s(): " % (lconvfrm.func) + | |
1443 | + "unknown error occurred. " + | |
1444 | + "[%s]" % (logelm.halogmsg)) | |
1445 | + # When log convertion failed, output original message. | |
1446 | + pm_log.error(errmsg) | |
1447 | + outputobj.output_log(lconvfrm.loglevel, None) | |
1448 | + elif matched == None: | |
1449 | + pm_log.error("do_ptn_matching(): " + | |
1450 | + "pattern matching about [%s] failed." % | |
1451 | + (lconvfrm.rulename)) | |
1452 | + else: | |
1453 | + # Not matched. | |
1454 | + pass | |
1455 | + #__for lconvfrm in lconvRuleList: (check next rule) | |
1456 | + | |
1457 | + # Set the time of output log message for auto reset. | |
1458 | + if setdate: | |
1459 | + self.last_logoutput_t = datetime.datetime.now() | |
1460 | + return | |
1461 | + | |
1462 | + ''' | |
1463 | + read the Pacemaker and Heartbeat log and convert it. | |
1464 | + ''' | |
1465 | + def convert(self): | |
1466 | + global statfile | |
1467 | + try: | |
1468 | + statfile = StatusFile(self.STATFILE) | |
1469 | + logfile = self.get_fd(statfile) | |
1470 | + if logfile == None: | |
1471 | + if do_shutdown: | |
1472 | + return 0 | |
1473 | + return 1 | |
1474 | + cstat.ino = os.fstat(logfile.fileno()).st_ino | |
1475 | + | |
1476 | + while 1: | |
1477 | + logline = logfile.readline() | |
1478 | + cstat.offset = logfile.tell() | |
1479 | + | |
1480 | + if not logline: | |
1481 | + self.check_dc_and_reset(None, None) | |
1482 | + | |
1483 | + if cstat.ino != statfile.w_ino or \ | |
1484 | + cstat.offset != statfile.w_offset: | |
1485 | + statfile.write() | |
1486 | + | |
1487 | + if os.fstat(logfile.fileno()).st_size < cstat.offset: | |
1488 | + pm_log.warn("convert: there is possibility that " \ | |
1489 | + "Pacemaker and Heartbeat log was clear.") | |
1490 | + pm_log.debug("convert: reset offset, since " \ | |
1491 | + "offset[%d] > file size[%d]" % (cstat.offset, | |
1492 | + os.fstat(logfile.fileno()).st_size)) | |
1493 | + logfile.seek(0) | |
1494 | + cstat.offset = 0 | |
1495 | + self.funcs.clear_status() | |
1496 | + statfile.write() | |
1497 | + | |
1498 | + if os.path.exists(HA_LOGFILE) and \ | |
1499 | + cstat.ino == os.stat(HA_LOGFILE)[ST_INO]: | |
1500 | + if do_shutdown: | |
1501 | + logfile.close() | |
1502 | + return 0 | |
1503 | + time.sleep(1) | |
1504 | + continue | |
1505 | + logfile.close() | |
1506 | + | |
1507 | + path = self.get_nextlog(cstat.ino, statfile) | |
1508 | + if path == None: | |
1509 | + path = HA_LOGFILE | |
1510 | + while not os.path.exists(path): | |
1511 | + if do_shutdown: | |
1512 | + return 0 | |
1513 | + time.sleep(1) | |
1514 | + pm_log.info("convert: change target[%s(inode:%d)]" | |
1515 | + % (path, os.stat(path)[ST_INO])) | |
1516 | + logfile = open(path, 'r') | |
1517 | + cstat.ino = os.fstat(logfile.fileno()).st_ino | |
1518 | + else: | |
1519 | + self.do_ptn_matching(logline) | |
1520 | + statfile.write() | |
1521 | + except Exception, strerror: | |
1522 | + pm_log.error("convert: error occurred.") | |
1523 | + pm_log.debug("convert: error occurred. [%s]" % strerror) | |
1524 | + return 1 | |
1525 | + | |
1526 | + ''' | |
1527 | + main method. | |
1528 | + ''' | |
1529 | + def main(self): | |
1530 | + signal.alarm(0) | |
1531 | + pidfile = PIDFile(self.PIDFILE) | |
1532 | + | |
1533 | + if self.ask_status: | |
1534 | + ret = pidfile.read() | |
1535 | + if ret > 0: | |
1536 | + pm_log.info("status: pm_logconv is running [pid = %d]" % ret) | |
1537 | + return 0 | |
1538 | + elif ret == pidfile.FILE_NOTEXIST or ret == pidfile.NOTRUNNING: | |
1539 | + pm_log.info("status: pm_logconv is stopped.") | |
1540 | + return 1 | |
1541 | + else: | |
1542 | + pm_log.info("status: couldn't check status of pm_logconv.") | |
1543 | + return 2 | |
1544 | + | |
1545 | + if self.stop_logconv: | |
1546 | + return self.logconv_stop(pidfile) | |
1547 | + | |
1548 | + self.make_daemon(pidfile) | |
1549 | + time.sleep(1) | |
1550 | + pm_log.info("started: pid[%d], ppid[%d], pgid[%d]" | |
1551 | + % (os.getpid(), os.getppid(), os.getpgrp())) | |
1552 | + return self.convert() | |
1553 | + | |
1554 | +class LogElements: | |
1555 | + def __init__(self, procname=None, datestr=None, | |
1556 | + haloglevel=None, halogmsg=None): | |
1557 | + self.procname = procname | |
1558 | + self.datestr = datestr | |
1559 | + self.haloglevel = haloglevel | |
1560 | + self.halogmsg = halogmsg | |
1561 | + | |
1562 | + ''' | |
1563 | + Divide ha-log message into process-name, date-string, log-level, and | |
1564 | + log-message. | |
1565 | + arg1 : a line of log message. | |
1566 | + return: 0 -> succeeded. | |
1567 | + 0 > -> error occurrs. | |
1568 | + ''' | |
1569 | + def parse_logmsg(self, logline, funcs): | |
1570 | + SYSFMT_PROC_POS = 4 | |
1571 | + SYSFMT_DATE_START_POS = 0 | |
1572 | + SYSFMT_DATE_END_POS = 2 + 1 | |
1573 | + SYSFMT_LOGLV_POS = 6 | |
1574 | + | |
1575 | + HBFMT_PROC_POS = 0 | |
1576 | + HBFMT_DATE_POS = 1 | |
1577 | + HBFMT_LOGLV_POS = 2 | |
1578 | + | |
1579 | + try: | |
1580 | + elementList = logline.split() | |
1581 | + if elementList[0].isalpha(): | |
1582 | + # Case of syslogmsgfmt = True (default) | |
1583 | + pm_log.debug("parse log message as syslog format.") | |
1584 | + self.datestr = ' '.join(elementList[SYSFMT_DATE_START_POS:SYSFMT_DATE_END_POS]) | |
1585 | + self.procname = funcs.trimmark(elementList[SYSFMT_PROC_POS]) | |
1586 | + self.haloglevel = funcs.trimmark(elementList[SYSFMT_LOGLV_POS]) | |
1587 | + msgpos = SYSFMT_LOGLV_POS + 1 | |
1588 | + self.halogmsg = ' '.join(elementList[msgpos:]).strip() | |
1589 | + else: | |
1590 | + # Case of syslogmsgfmt = False | |
1591 | + pm_log.debug("parse log message as ha-log format.") | |
1592 | + self.procname = elementList[HBFMT_PROC_POS].split('[')[0] | |
1593 | + self.datestr = elementList[HBFMT_DATE_POS] | |
1594 | + self.haloglevel = funcs.trimmark(elementList[HBFMT_LOGLV_POS]) | |
1595 | + msgpos = HBFMT_LOGLV_POS + 1 | |
1596 | + self.halogmsg = ' '.join(elementList[msgpos:]) | |
1597 | + | |
1598 | + return 0 | |
1599 | + except Exception, strerror: | |
1600 | + pm_log.debug("parse_logmsg(): %s" % (strerror)) | |
1601 | + return -1 | |
1602 | + | |
1603 | + ''' | |
1604 | + Only for debug. | |
1605 | + ''' | |
1606 | + def print_logelements(self): | |
1607 | + print self.procname | |
1608 | + print self.datestr | |
1609 | + print self.haloglevel | |
1610 | + print self.halogmsg | |
1611 | + | |
1612 | +''' | |
1613 | + Class for output converted log message. | |
1614 | +''' | |
1615 | +class OutputConvertedLog: | |
1616 | + def __init__(self, datestr=None, loglevel=None, logmsg=None): | |
1617 | + self.datestr = datestr | |
1618 | + self.loglevel = loglevel | |
1619 | + self.logmsg = logmsg | |
1620 | + self.monthnumDic = { | |
1621 | + '01':'Jan', | |
1622 | + '02':'Feb', | |
1623 | + '03':'Mar', | |
1624 | + '04':'Apr', | |
1625 | + '05':'May', | |
1626 | + '06':'Jun', | |
1627 | + '07':'Jul', | |
1628 | + '08':'Aug', | |
1629 | + '09':'Sep', | |
1630 | + '10':'Oct', | |
1631 | + '11':'Nov', | |
1632 | + '12':'Dec' | |
1633 | + } | |
1634 | + self.monthstrDic = { | |
1635 | + 'Jan':'01', | |
1636 | + 'Feb':'02', | |
1637 | + 'Mar':'03', | |
1638 | + 'Apr':'04', | |
1639 | + 'May':'05', | |
1640 | + 'Jun':'06', | |
1641 | + 'Jul':'07', | |
1642 | + 'Aug':'08', | |
1643 | + 'Sep':'09', | |
1644 | + 'Oct':'10', | |
1645 | + 'Nov':'11', | |
1646 | + 'Dec':'12' | |
1647 | + } | |
1648 | + | |
1649 | + def set_datestr(self, datestr): | |
1650 | + if SYSLOGFORMAT: | |
1651 | + tmp_datestr = self.to_syslog_dateformat(datestr) | |
1652 | + else: | |
1653 | + tmp_datestr = self.to_halog_dateformat(datestr) | |
1654 | + | |
1655 | + if tmp_datestr != None: | |
1656 | + self.datestr = tmp_datestr | |
1657 | + else: | |
1658 | + pm_log.error("set_datestr(): " + | |
1659 | + "invalid date format. [%s] " % (datestr) + | |
1660 | + "output in original format.") | |
1661 | + self.datestr = datestr | |
1662 | + | |
1663 | + def set_orgloglevel(self, loglevel): | |
1664 | + self.orgloglevel = loglevel | |
1665 | + | |
1666 | + def set_orglogmsg(self, logmsg): | |
1667 | + self.orglogmsg = logmsg | |
1668 | + | |
1669 | + ''' | |
1670 | + Output log message. | |
1671 | + loglevel and log message is variable, but date is not | |
1672 | + (output original log's date). | |
1673 | + arg1 : loglevel string. | |
1674 | + arg2 : log message | |
1675 | + return: 0 -> succeeded. | |
1676 | + 0 > -> error occurrs. | |
1677 | + ''' | |
1678 | + def output_log(self, convloglevel, convlogmsg): | |
1679 | + output_loglevel = self.orgloglevel | |
1680 | + if convloglevel != None: | |
1681 | + output_loglevel = convloglevel | |
1682 | + output_logmsg = self.orglogmsg | |
1683 | + if convlogmsg != None: | |
1684 | + output_logmsg = convlogmsg | |
1685 | + | |
1686 | + try: | |
1687 | + outputstr = ("%s %s %s: %s" % | |
1688 | + (self.datestr, HOSTNAME, output_loglevel, output_logmsg)) | |
1689 | + f = open(OUTPUTFILE, 'a') | |
1690 | + f.write("%s\n" % (outputstr)) | |
1691 | + f.close() | |
1692 | + except Exception, strerror: | |
1693 | + pm_log.error("output_log(): " + | |
1694 | + "failed to output converted log message. [%s]" % | |
1695 | + (outputstr)) | |
1696 | + pm_log.debug("output_log(): %s" % (strerror)) | |
1697 | + return -1 | |
1698 | + return 0 | |
1699 | + | |
1700 | + ''' | |
1701 | + Convert dateformat form ha-log format to syslog format. | |
1702 | + "2009/01/01_00:00:00" -> "Jan 1 00:00:00" | |
1703 | + arg1 : date string of ha-log format. | |
1704 | + return : date string which is converted to syslog format. | |
1705 | + None -> error occurs. | |
1706 | + ''' | |
1707 | + def to_syslog_dateformat(self, orgdatestr): | |
1708 | + DATE_POS = 0 #YYYY/MM/DD | |
1709 | + TIME_POS = 1 #hh:mm:ss | |
1710 | + MONTH_POS = 1 #MM | |
1711 | + DAY_POS = 2 #DD | |
1712 | + | |
1713 | + if orgdatestr.split()[0].isalpha(): | |
1714 | + pm_log.debug("It seems already syslog date format.") | |
1715 | + return orgdatestr | |
1716 | + | |
1717 | + try: | |
1718 | + datestr = orgdatestr.split('_')[DATE_POS].strip() | |
1719 | + timestr = orgdatestr.split('_')[TIME_POS].strip() | |
1720 | + if datestr == "" or timestr == "": | |
1721 | + return None | |
1722 | + | |
1723 | + monthstr = datestr.split('/')[MONTH_POS].strip() | |
1724 | + daystr = datestr.split('/')[DAY_POS].strip().lstrip('0') | |
1725 | + if monthstr == "" or daystr == "": | |
1726 | + return None | |
1727 | + if monthstr in self.monthnumDic == False: | |
1728 | + return None | |
1729 | + monthstr = self.monthnumDic[monthstr] | |
1730 | + syslog_datestr = ("%s %s %s" % (monthstr, daystr, timestr)) | |
1731 | + return syslog_datestr | |
1732 | + except Exception, strerror: | |
1733 | + pm_log.debug("to_syslog_dateformat(): %s" % (strerror)) | |
1734 | + return None | |
1735 | + | |
1736 | + ''' | |
1737 | + Convert dateformat form syslog format to ha-log format. | |
1738 | + "Jan 1 00:00:00" -> "2009/01/01_00:00:00" | |
1739 | + arg1 : date string of syslog format. | |
1740 | + return : date string which is converted to ha-log original format. | |
1741 | + None -> error occurs. | |
1742 | + ''' | |
1743 | + def to_halog_dateformat(self, orgdatestr): | |
1744 | + MONTH_POS = 0 | |
1745 | + DAY_POS = 1 | |
1746 | + TIME_POS = 2 | |
1747 | + | |
1748 | + strList = orgdatestr.split() | |
1749 | + if strList[0].isalpha() == False: | |
1750 | + pm_log.debug("It seems already ha-log date format.") | |
1751 | + return orgdatestr | |
1752 | + try: | |
1753 | + monthstr = strList[MONTH_POS].strip() | |
1754 | + daystr = strList[DAY_POS].strip() | |
1755 | + timestr = strList[TIME_POS].strip() | |
1756 | + if monthstr == "" or daystr == "" or timestr == "": | |
1757 | + return None | |
1758 | + if monthstr in self.monthstrDic == False: | |
1759 | + return None | |
1760 | + monthstr = self.monthstrDic[monthstr] | |
1761 | + now = datetime.datetime.now() | |
1762 | + yearstr = str(now.timetuple().tm_year) | |
1763 | + hblog_datestr = ("%s/%s/%02d_%s" % | |
1764 | + (yearstr, monthstr, int(daystr), timestr)) | |
1765 | + | |
1766 | + # If date string is future, minus year value. | |
1767 | + hblog_date = datetime.datetime(\ | |
1768 | + *time.strptime(hblog_datestr, "%Y/%m/%d_%H:%M:%S")[0:6]) | |
1769 | + if hblog_date > now: | |
1770 | + year = int(yearstr) - 1 | |
1771 | + hblog_datestr = hblog_datestr.replace(yearstr, str(year), 1) | |
1772 | + | |
1773 | + return hblog_datestr | |
1774 | + except Exception, strerror: | |
1775 | + pm_log.debug("to_halog_dateformat(): %s" % (strerror)) | |
1776 | + return None | |
1777 | + | |
1778 | +''' | |
1779 | + Class to hold resource status in F/O process. | |
1780 | +''' | |
1781 | +class RscStat: | |
1782 | + ''' | |
1783 | + rscid : resource id. | |
1784 | + status : [Started on node|Stopped] | |
1785 | + fofailed : True -> F/O failed. ("cannot run anywhere" appeared.) | |
1786 | + False -> "cannot run anywhere" didn't appear. | |
1787 | + unmanaged: True -> resource is unmanaged. | |
1788 | + False -> resource is managed. | |
1789 | + ''' | |
1790 | + def __init__(self, rscid=None, status=None, fofailed=False, | |
1791 | + unmanaged=False): | |
1792 | + self.rscid = rscid | |
1793 | + self.status = status | |
1794 | + self.fofailed = fofailed | |
1795 | + self.unmanaged = unmanaged | |
1796 | + | |
1797 | + ''' operator eq ''' | |
1798 | + def __eq__(self,other): | |
1799 | + return (self.rscid == other.rscid) | |
1800 | + | |
1801 | + ''' replace status and flags''' | |
1802 | + def replace(self,new): | |
1803 | + if new.status: | |
1804 | + self.status = new.status | |
1805 | + if new.fofailed: | |
1806 | + self.fofailed = new.fofailed | |
1807 | + if new.unmanaged: | |
1808 | + self.unmanaged = new.unmanaged | |
1809 | + | |
1810 | + ''' | |
1811 | + Only for debug. | |
1812 | + ''' | |
1813 | + def print_rscstat(self): | |
1814 | + print "rsc:%s\tstatus:%s\tfofailed:%s\tunmanaged:%s\t" % (self.rscid,self.status,self.fofailed,self.unmanaged) | |
1815 | +# print self.rscid | |
1816 | +# print self.status | |
1817 | +# print self.fofailed | |
1818 | +# print self.unmanaged | |
1819 | + | |
1820 | +''' | |
1821 | + Return codes for functions to convert log. | |
1822 | +''' | |
1823 | +CONV_SHUT_NODE = 1 #shutdown list existed. | |
1824 | +CONV_OK = 0 #log conversion succeeded. | |
1825 | +CONV_PARSE_ERROR = -1 #failed to parse log message. | |
1826 | +CONV_ITEM_EMPTY = -2 #parsing succeeded, but some gotten items are empty. | |
1827 | +CONV_GETINFO_ERROR = -3 #failed to get info which is required to conversion. | |
1828 | +''' | |
1829 | + Class for functions to convert log message. | |
1830 | + convert-functions' arguments are: | |
1831 | + arg1: outputobj -> object for output converted log. | |
1832 | + arg2: logelm -> elements which constructs target log. date, msg etc. | |
1833 | + arg3: lconvfrm -> info for conversion. loglevel, F/Otrigger etc. | |
1834 | + return codes are: | |
1835 | + [CONV_OK|CONV_PARSE_ERROR|CONV_ITEM_EMPTY|CONV_GETINFO_ERROR] | |
1836 | + See the head of this file. | |
1837 | +''' | |
1838 | +class LogConvertFuncs: | |
1839 | + LOG_ERR_LV = "ERROR" | |
1840 | + LOG_WARN_LV = "WARN" | |
1841 | + LOG_INFO_LV = "info" | |
1842 | + LOG_DEBUG_LV = "debug" | |
1843 | + | |
1844 | + def __init__(self, rscstatList=None): | |
1845 | + # This list is used only in F/O process. | |
1846 | + # If hg_logconv exits abnormally during parsing F/O process's log, | |
1847 | + # read from start of F/O, so it doesn't need to output status file. | |
1848 | + self.rscstatList = rscstatList | |
1849 | + self.rscstatList = list() | |
1850 | + | |
1851 | + ''' | |
1852 | + Check Heartbeat service is active or dead. | |
1853 | + return: True -> active | |
1854 | + False -> dead | |
1855 | + None -> error occurs. | |
1856 | + ''' | |
1857 | + def is_heartbeat(self): | |
1858 | + # Get DC node name. | |
1859 | + status = self.exec_outside_cmd("service", "heartbeat status", False)[0] | |
1860 | + if status == None: | |
1861 | + # Failed to exec command. | |
1862 | + pm_log.warn("is_heartbeat(): failed to get status.") | |
1863 | + return None | |
1864 | + if status != 0: | |
1865 | + # Maybe during DC election. | |
1866 | + return False | |
1867 | + return True | |
1868 | + | |
1869 | + ''' | |
1870 | + triming mark from value. | |
1871 | + ''' | |
1872 | + def trimmark(self, word, minus=None): | |
1873 | + marklist = "(),.;:[]=<>'" | |
1874 | + if minus: | |
1875 | + markset = set(marklist) - set(minus) | |
1876 | + marklist = "".join(markset) | |
1877 | + trimword = word.translate(string.maketrans("",""),marklist) | |
1878 | + return trimword | |
1879 | + | |
1880 | + ''' | |
1881 | + Check specified strings are empty or not. | |
1882 | + arg* : target strings. | |
1883 | + return : True -> there is at least an empty string | |
1884 | + in specified strings. | |
1885 | + False -> there is no empty string in specified strings. | |
1886 | + ''' | |
1887 | + def is_empty(self, *args): | |
1888 | + for arg in args: | |
1889 | + if arg == "": | |
1890 | + return True | |
1891 | + return False | |
1892 | + | |
1893 | + ''' | |
1894 | + Get node dictionary from hostcache. | |
1895 | + the dic's key is uuid, and its value is nodename. | |
1896 | + return : node dictionary in the cluster. | |
1897 | + None -> error occurs. | |
1898 | + ''' | |
1899 | + def get_nodedic(self): | |
1900 | + HOSTNAME_POS = 0 | |
1901 | + UUID_POS = 1 | |
1902 | + | |
1903 | + nodeDic = dict() | |
1904 | + try: | |
1905 | + f = open (HOSTCACHE, 'r') | |
1906 | + while 1: | |
1907 | + nodeinfo = f.readline() | |
1908 | + if not nodeinfo: | |
1909 | + break | |
1910 | + else: | |
1911 | + nodename = nodeinfo.split()[HOSTNAME_POS] | |
1912 | + uuid = nodeinfo.split()[UUID_POS] | |
1913 | + nodeDic[uuid] = nodename | |
1914 | + f.close() | |
1915 | + except: | |
1916 | + pm_log.error("get_nodedic(): " + | |
1917 | + "failed to get node list from hostcache [%s]." % (HOSTCACHE)) | |
1918 | + return None | |
1919 | + return nodeDic | |
1920 | + | |
1921 | + ''' | |
1922 | + Get nodename from uuid. | |
1923 | + arg1 : target uuid. | |
1924 | + return : name string of the node which has specified uuid. | |
1925 | + None -> error occurs. | |
1926 | + ''' | |
1927 | + def get_nodename(self, uuid): | |
1928 | + nodeDic = self.get_nodedic() | |
1929 | + if nodeDic == None: | |
1930 | + return None | |
1931 | + if uuid not in nodeDic.keys(): | |
1932 | + return None | |
1933 | + return nodeDic[uuid] | |
1934 | + | |
1935 | + ''' | |
1936 | + Parse operation id (resourceid_opname_interval) | |
1937 | + arg1 : operationid | |
1938 | + return : resourceid, opname, interval | |
1939 | + ''' | |
1940 | + def parse_opid(self, opid): | |
1941 | + # please detect parse error in caller. | |
1942 | + tmp = opid.split('_') | |
1943 | + rscid = '_'.join(tmp[:-2]) | |
1944 | + op = tmp[-2] | |
1945 | + interval = tmp[-1] | |
1946 | + return rscid, op, interval | |
1947 | + | |
1948 | + ''' | |
1949 | + Execute commandline command. | |
1950 | + arg1 : command name to execute. | |
1951 | + arg2 : command options. | |
1952 | + arg3 : check return code or not. | |
1953 | + return : [status, output] | |
1954 | + status -> exit status. | |
1955 | + output -> output strings of the command. | |
1956 | + None -> error occurs. | |
1957 | + ''' | |
1958 | + def exec_outside_cmd(self, cmdname, options, checkrc): | |
1959 | + # Get full path of specified command. | |
1960 | + try: | |
1961 | + status, cmdpath = \ | |
1962 | + commands.getstatusoutput("which " + cmdname) | |
1963 | + except Exception, strerror: | |
1964 | + pm_log.error("exec_outside_cmd(): " + | |
1965 | + "failed to execute which command to get command path. " + | |
1966 | + "[%s]" % (cmdname)) | |
1967 | + pm_log.debug("exec_outside_cmd(): %s" % (strerror)) | |
1968 | + return None, None | |
1969 | + if (os.WIFEXITED(status) == False or os.WEXITSTATUS(status) != 0): | |
1970 | + pm_log.error("exec_outside_cmd(): " + | |
1971 | + "failed to get command path. [%s]" % (cmdname)) | |
1972 | + return None, None | |
1973 | + | |
1974 | + # Check whether it is able to execute the command. | |
1975 | + if os.access(cmdpath, os.F_OK | os.X_OK) == False: | |
1976 | + return None, None | |
1977 | + | |
1978 | + # Execute command. | |
1979 | + exec_cmd = ("%s %s" % (cmdpath, options)) | |
1980 | + pm_log.debug("exec_outside_cmd(): " + | |
1981 | + "execute command. [%s]" % (exec_cmd)) | |
1982 | + try: | |
1983 | + status, output = commands.getstatusoutput(exec_cmd) | |
1984 | + except Exception, strerror: | |
1985 | + pm_log.error("exec_outside_cmd(): " + | |
1986 | + "failed to exec command. [%s]" % (exec_cmd)) | |
1987 | + pm_log.debug("exec_outside_cmd(): %s" % (strerror)) | |
1988 | + return None, None | |
1989 | + | |
1990 | + # Check return status. | |
1991 | + if os.WIFEXITED(status) == False: | |
1992 | + pm_log.error("exec_outside_cmd(): " + | |
1993 | + "command [%s] exited abnormally. (status=%s)" % | |
1994 | + (exec_cmd, status)) | |
1995 | + return None, None | |
1996 | + rc = os.WEXITSTATUS(status) | |
1997 | + if checkrc == True and rc != 0: | |
1998 | + pm_log.warn("exec_outside_cmd(): " + | |
1999 | + "command [%s] returns error. (rc=%s, msg=\"%s\")" % | |
2000 | + (exec_cmd, rc, output)) | |
2001 | + return None, None | |
2002 | + return rc, output | |
2003 | + | |
2004 | + ''' | |
2005 | + Compare specified attribute's value with specified value. | |
2006 | + Operations to compare is [lt|gt|le|ge|eq|ne]. | |
2007 | + arg1 : target attribute name. | |
2008 | + arg2 : operation to compare. | |
2009 | + arg3 : the value to compare with current attribute value. | |
2010 | + arg4 : node name which has the attribute. | |
2011 | + return : (result_of_comparision, current_attr_val) | |
2012 | + result_of_comparision: | |
2013 | + True -> matched. | |
2014 | + False -> not matched. | |
2015 | + None -> error occurs or attribute doesn't exist. | |
2016 | + ''' | |
2017 | + def check_attribute(self, attrname, op, attrval, node): | |
2018 | + | |
2019 | + # Execute command. | |
2020 | + options = ("-G -U %s -t status -n %s" % (node, attrname)) | |
2021 | + (status, output) = \ | |
2022 | + self.exec_outside_cmd(CMD_CRM_ATTR, options, False) | |
2023 | + if status == None: | |
2024 | + # Failed to exec command, or | |
2025 | + # The node is dead, or | |
2026 | + # Specified attribute doesn't exist. | |
2027 | + pm_log.warn("check_attribute(): " + | |
2028 | + "failed to get %s's value." % (attrname)) | |
2029 | + return None, None | |
2030 | + | |
2031 | + pm_log.debug("check_attribute(): " + | |
2032 | + "%s's status[%s] output[%s] node[%s] attr[%s]" % | |
2033 | + (CMD_CRM_ATTR, status, output, node, attrname)) | |
2034 | + | |
2035 | + if status != 0: | |
2036 | + # crm_attribute returns error value. | |
2037 | + # Maybe local node is shutting down. | |
2038 | + return None, None | |
2039 | + # In normal case, crm_attribute command shows like the following. | |
2040 | + # " name=default_ping_set value=100" | |
2041 | + # So parse it to get current attribute value. | |
2042 | + try: | |
2043 | + valuepos = output.index('value=') | |
2044 | + currentval = output[valuepos + len('value='):].strip() | |
2045 | + result = getattr(operator, op)(currentval, attrval) | |
2046 | + except: | |
2047 | + pm_log.error("check_attribute(): " + | |
2048 | + "failed to comparison %s's value. " % (attrname) + | |
2049 | + "(currentval=%s, op=%s, specifiedval=%s)" % | |
2050 | + (currentval, op, attrval)) | |
2051 | + return None, None | |
2052 | + return result, currentval | |
2053 | + | |
2054 | + ''' | |
2055 | + Check the specified node is ping node or not. | |
2056 | + To get ping node information, parse ha.cf. | |
2057 | + arg1 : target node name. | |
2058 | + return : True -> the node is ping node. | |
2059 | + False -> the node is not ping node. | |
2060 | + None -> error occurs. | |
2061 | + ''' | |
2062 | + def is_pingnode(self, nodename): | |
2063 | + pingnodeList = list() | |
2064 | + # parse ha.cf to get ping nodes. | |
2065 | + try: | |
2066 | + if os.access(HACFFILE, os.F_OK | os.R_OK) == False: | |
2067 | + pm_log.error("is_pingnode(): " + | |
2068 | + "failed to read ha.cf file. [%s]" % (HACFFILE)) | |
2069 | + return None | |
2070 | + | |
2071 | + cf = open(HACFFILE, 'r') | |
2072 | + for line in cf: | |
2073 | + wordList = line.split() | |
2074 | + if len(wordList) < 1: | |
2075 | + # Ignore empty line. | |
2076 | + continue | |
2077 | + if wordList[0] == "ping": | |
2078 | + pingnodeList.extend(wordList[1:]) | |
2079 | + elif wordList[0] == "ping_group": | |
2080 | + pingnodeList.extend(wordList[2:]) | |
2081 | + else: | |
2082 | + pass | |
2083 | + cf.close() | |
2084 | + except: | |
2085 | + pm_log.error("is_pingnode(): " + | |
2086 | + "failed to parse ha.cf file. [%s]" % (HACFFILE)) | |
2087 | + return None | |
2088 | + | |
2089 | + if nodename in pingnodeList: | |
2090 | + return True | |
2091 | + | |
2092 | + return False | |
2093 | + | |
2094 | + ''' | |
2095 | + Get online node from command. | |
2096 | + return : active node in the cluster. | |
2097 | + None -> error occurs. | |
2098 | + ''' | |
2099 | + def get_onlinenode(self): | |
2100 | + onlineset = set() | |
2101 | + ret = self.is_heartbeat() | |
2102 | + if ret == None: | |
2103 | + return ret | |
2104 | + elif ret == False: | |
2105 | + return onlineset | |
2106 | + options = ("-p") | |
2107 | + (status, nodelist) = self.exec_outside_cmd(CMD_CRM_NODE, options, False) | |
2108 | + if status == None: | |
2109 | + # Failed to exec command. | |
2110 | + pm_log.warn("get_onlinenode(): failed to get active nodelist.") | |
2111 | + return None | |
2112 | + | |
2113 | + for nodename in nodelist.split(): | |
2114 | + options = ("-N %s -n standby -G -l forever -d off" % (nodename)) | |
2115 | + (status, output) = self.exec_outside_cmd(CMD_CRM_ATTR, options, False) | |
2116 | + if status == None: | |
2117 | + # Failed to exec command. | |
2118 | + pm_log.warn("get_onlinenode(): failed to get online nodelist.") | |
2119 | + return None | |
2120 | + standby = output[output.index("value"):] | |
2121 | + if standby.split("=")[1] == "off": | |
2122 | + onlineset.add(nodename) | |
2123 | + pm_log.debug("get_onlinenode(): node %s is online node." % (list(onlineset))) | |
2124 | + return onlineset | |
2125 | + | |
2126 | + ''' | |
2127 | + Set specified values to RscStat object list. | |
2128 | + If the same rscid is already in the list, update the elements' value. | |
2129 | + If not, append the new RscStat object to the list. | |
2130 | + When the arg's value is None, don't update the element's value. | |
2131 | + | |
2132 | + arg1 : resource id. | |
2133 | + arg2 : the rsc's status. [Started on node|Stopped] | |
2134 | + arg3 : the rsc's F/O failed or not. (depends on "cannot run anywhere") | |
2135 | + arg4 : the rsc is managed or not. | |
2136 | + return Nothing. | |
2137 | + ''' | |
2138 | + def set_rscstat(self, rscid, statstr, fofailed, unmanaged): | |
2139 | + newrsc = RscStat(rscid,statstr,fofailed,unmanaged) | |
2140 | + if newrsc in self.rscstatList: | |
2141 | + idx = self.rscstatList.index(newrsc) | |
2142 | + self.rscstatList[idx].replace(newrsc) | |
2143 | + else: | |
2144 | + self.rscstatList.append(newrsc) | |
2145 | + | |
2146 | + ''' | |
2147 | + Debug print for ConvertStatus (exclude ino and offset). | |
2148 | + ''' | |
2149 | + def debug_status(self): | |
2150 | + pm_log.debug("debug_status(): FAIL[%s], IN_CALC[%s], "\ | |
2151 | + "RSC_MOVE[%s], IN_FO[%s], Rscop%s, Node%s" % | |
2152 | + (cstat.FAILURE_OCCURRED, cstat.IN_CALC, | |
2153 | + cstat.ACTRSC_MOVE, cstat.IN_FO_PROCESS, | |
2154 | + list(cstat.timedoutRscopSet), list(cstat.shutNodeSet))) | |
2155 | + | |
2156 | + ''' | |
2157 | + Clear ConvertStatus (exclude ino and offset). | |
2158 | + ''' | |
2159 | + def clear_status(self): | |
2160 | + pm_log.debug("clear_status():" + | |
2161 | + "clear convert status (exclude ino and offset).") | |
2162 | + self.debug_status() | |
2163 | + cstat.FAILURE_OCCURRED = False | |
2164 | + cstat.IN_CALC = False | |
2165 | + cstat.ACTRSC_MOVE = False | |
2166 | + cstat.IN_FO_PROCESS = False | |
2167 | + cstat.timedoutRscopSet = set() | |
2168 | + cstat.shutNodeSet = set() | |
2169 | + self.debug_status() | |
2170 | + | |
2171 | + ########## | |
2172 | + # General-purpose functions. | |
2173 | + ########## | |
2174 | + ''' | |
2175 | + Output original ha-log message. | |
2176 | + ''' | |
2177 | + def output_original_log(self, outputobj, logelm, lconvfrm): | |
2178 | + # Output original log message | |
2179 | + outputobj.output_log(lconvfrm.loglevel, None) | |
2180 | + return CONV_OK | |
2181 | + | |
2182 | + ''' | |
2183 | + Output static message. | |
2184 | + This function just outputs section name. | |
2185 | + ''' | |
2186 | + def output_static_msg(self, outputobj, logelm, lconvfrm): | |
2187 | + # Output rulename (= section name). | |
2188 | + outputobj.output_log(lconvfrm.loglevel, lconvfrm.rulename) | |
2189 | + return CONV_OK | |
2190 | + | |
2191 | + ########## | |
2192 | + # For Resource event. | |
2193 | + ########## | |
2194 | + ''' | |
2195 | + Convert log message which means HB tries to operate. | |
2196 | + This function is common for OCF resource's start, stop, promote, demote | |
2197 | + and STONITH resource's start, stop. | |
2198 | + NOTE: monitor operation is not a target. | |
2199 | + | |
2200 | + MsgNo.1-1) | |
2201 | + Jan 6 14:16:27 x3650a crmd: [9874]: info: do_lrm_rsc_op: Performing key=17:2:0:dae9d86d-9c4b-44f2-822c-b559db044ba2 op=prmApPostgreSQLDB_start_0 ) | |
2202 | + MsgNo.2-1) | |
2203 | + Jan 6 15:05:00 x3650a crmd: [9874]: info: do_lrm_rsc_op: Performing key=20:7:0:dae9d86d-9c4b-44f2-822c-b559db044ba2 op=prmApPostgreSQLDB_stop_0 ) | |
2204 | + MsgNo.4-1) | |
2205 | + Jan 12 18:34:51 x3650a crmd: [15901]: info: do_lrm_rsc_op: Performing key=32:13:0:9d68ec4b-527f-4dda-88b3-9203fef16f56 op=prmStateful:1_promote_0 ) | |
2206 | + MsgNo.5-1) | |
2207 | + Jan 12 18:34:49 x3650a crmd: [3464]: info: do_lrm_rsc_op: Performing key=35:11:0:9d68ec4b-527f-4dda-88b3-9203fef16f56 op=prmStateful:0_demote_0 ) | |
2208 | + MsgNo.17-1) | |
2209 | + Jan 7 10:21:41 x3650a crmd: [25493]: info: do_lrm_rsc_op: Performing key=35:1:0:683d57a3-6623-46ae-bbc9-6b7930aec9c2 op=prmStonith2-3_start_0 ) | |
2210 | + MsgNo.18-1) | |
2211 | + Jan 7 10:22:11 x3650a crmd: [25493]: info: do_lrm_rsc_op: Performing key=30:5:0:683d57a3-6623-46ae-bbc9-6b7930aec9c2 op=prmStonith2-3_stop_0 ) | |
2212 | + ''' | |
2213 | + def try_to_operate(self, outputobj, logelm, lconvfrm): | |
2214 | + try: | |
2215 | + # In the case of example above, tmp's value is | |
2216 | + # "op=master_slave_Stateful0:1_promote_0". | |
2217 | + tmp = logelm.halogmsg.split()[3] | |
2218 | + # remove "op=" at the head. | |
2219 | + opid = tmp[3:] | |
2220 | + rscid, op = self.parse_opid(opid)[:2] | |
2221 | + except: | |
2222 | + return CONV_PARSE_ERROR | |
2223 | + if self.is_empty(rscid, op): | |
2224 | + return CONV_ITEM_EMPTY | |
2225 | + | |
2226 | + convertedlog = ("Resource %s tries to %s." % (rscid, op)) | |
2227 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
2228 | + return CONV_OK | |
2229 | + | |
2230 | + ''' | |
2231 | + Convert log message which means HB succeeded in operation. | |
2232 | + This function is common for OCF resource's start, stop, promote, demote | |
2233 | + and STONITH resource's start, stop. | |
2234 | + NOTE: monitor operation is not a target. | |
2235 | + | |
2236 | + MsgNo.1-2) | |
2237 | + Jan 6 14:16:28 x3650a crmd: [9874]: info: process_lrm_event: LRM operation prmApPostgreSQLDB_start_0 (call=25, rc=0, cib-update=69, confirmed=true) ok | |
2238 | + MsgNo.2-2) | |
2239 | + Jan 6 15:05:01 x3650a crmd: [9874]: info: process_lrm_event: LRM operation prmApPostgreSQLDB_stop_0 (call=27, rc=0, cib-update=79, confirmed=true) ok | |
2240 | + MsgNo.4-2) | |
2241 | + Jan 12 18:34:51 x3650a crmd: [15901]: info: process_lrm_event: LRM operation prmStateful:1_promote_0 (call=18, rc=0, cib-update=27, confirmed=true) ok | |
2242 | + MsgNo.5-2) | |
2243 | + Jan 12 18:34:49 x3650a crmd: [3464]: info: process_lrm_event: LRM operation prmStateful:0_demote_0 (call=37, rc=0, cib-update=79, confirmed=true) ok | |
2244 | + MsgNo.17-2) | |
2245 | + Jan 7 10:21:41 x3650a crmd: [25493]: info: process_lrm_event: LRM operation prmStonith2-3_start_0 (call=11, rc=0, cib-update=42, confirmed=true) ok | |
2246 | + MsgNo.18-2) | |
2247 | + Jan 7 10:22:11 x3650a crmd: [25493]: info: process_lrm_event: LRM operation prmStonith2-3_stop_0 (call=34, rc=0, cib-update=71, confirmed=true) ok | |
2248 | + ''' | |
2249 | + def operation_succeeded(self, outputobj, logelm, lconvfrm): | |
2250 | + completeopDic = { | |
2251 | + 'start' : 'started', | |
2252 | + 'stop' : 'stopped', | |
2253 | + 'promote': 'promoted', | |
2254 | + 'demote' : 'demoted' | |
2255 | + } | |
2256 | + try: | |
2257 | + wordlist = logelm.halogmsg.split() | |
2258 | + rscid, op = self.parse_opid(wordlist[3])[:2] | |
2259 | + rcstr = self.trimmark(wordlist[5],"=") | |
2260 | + except: | |
2261 | + return CONV_PARSE_ERROR | |
2262 | + if self.is_empty(rscid, op, rcstr): | |
2263 | + return CONV_ITEM_EMPTY | |
2264 | + | |
2265 | + if op in completeopDic.keys(): | |
2266 | + opstr = completeopDic[op] | |
2267 | + else: | |
2268 | + #Just in case. It shuoldn't occur unless cf file is modified. | |
2269 | + opstr = ("%s ok" % (op)) | |
2270 | + convertedlog = ("Resource %s %s. (%s)" % (rscid, opstr, rcstr)) | |
2271 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
2272 | + return CONV_OK | |
2273 | + | |
2274 | + ''' | |
2275 | + Convert log message which means HB failed to do the operation. | |
2276 | + This function is common for OCF resource's start, stop, | |
2277 | + monitor (exclude rc=OCF_NOT_RUNNING), promote, demote, | |
2278 | + and STONITH resource's start, stop. | |
2279 | + MsgNo.1-3) | |
2280 | + Jan 6 15:22:45 x3650a crmd: [26989]: info: process_lrm_event: LRM operation prmApPostgreSQLDB_start_0 (call=25, rc=1, cib-update=58, confirmed=true) unknown error | |
2281 | + MsgNo.2-3) | |
2282 | + Jan 6 18:11:34 x3650a crmd: [4144]: info: process_lrm_event: LRM operation prmApPostgreSQLDB_stop_0 (call=27, rc=1, cib-update=76, confirmed=true) unknown error | |
2283 | + MsgNo.3-1) | |
2284 | + Jan 6 19:23:01 x3650a crmd: [19038]: info: process_lrm_event: LRM operation prmExPostgreSQLDB_monitor_10000 (call=16, rc=1, cib-update=72, confirmed=false) unknown error | |
2285 | + MsgNo.4-3) | |
2286 | + Jan 6 15:22:45 x3650a crmd: [26989]: info: process_lrm_event: LRM operation prmStateful:1_promote_0 (call=25, rc=1, cib-update=58, confirmed=true) unknown error | |
2287 | + MsgNo.5-3) | |
2288 | + Jan 6 15:22:45 x3650a crmd: [26989]: info: process_lrm_event: LRM operation prmStateful:1_demote_0 (call=25, rc=1, cib-update=58, confirmed=true) unknown error | |
2289 | + MsgNo.17-3) | |
2290 | + Jan 7 10:54:45 x3650a crmd: [32714]: info: process_lrm_event: LRM operation prmStonith2-3_start_0 (call=11, rc=1, cib-update=56, confirmed=true) unknown error | |
2291 | + MsgNo.19-1) | |
2292 | + Jan 7 13:47:57 x3650a crmd: [19263]: info: process_lrm_event: LRM operation prmStonith2-3_monitor_30000 (call=30, rc=14, cib-update=89, confirmed=false) status: unknown | |
2293 | + ''' | |
2294 | + def operation_failed(self, outputobj, logelm, lconvfrm): | |
2295 | + try: | |
2296 | + wordlist = logelm.halogmsg.split() | |
2297 | + rscid, op = self.parse_opid(wordlist[3])[:2] | |
2298 | + rcstr = self.trimmark(wordlist[5],"=") | |
2299 | + except: | |
2300 | + return CONV_PARSE_ERROR | |
2301 | + if self.is_empty(rscid, op, rcstr): | |
2302 | + return CONV_ITEM_EMPTY | |
2303 | + | |
2304 | + # If lrmd detected this operation's timeout, treated this log as | |
2305 | + # resource operation timed out. | |
2306 | + # It's for STONITH [start|stop|monitor] operation. | |
2307 | + convertedlog = ("Resource %s failed to %s." % (rscid, op)) | |
2308 | + rscid_and_op = (rscid + ":" + op) | |
2309 | + if rscid_and_op in cstat.timedoutRscopSet: | |
2310 | + convertedlog = ("%s (Timed Out)" % (convertedlog)) | |
2311 | + cstat.timedoutRscopSet.discard(rscid_and_op) | |
2312 | + else: | |
2313 | + convertedlog = ("%s (%s)" % (convertedlog, rcstr)) | |
2314 | + | |
2315 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
2316 | + return CONV_OK | |
2317 | + | |
2318 | + ''' | |
2319 | + Convert log message which means operation for OCF resource timed out. | |
2320 | + This function is common for start, stop, monitor, promote, demote. | |
2321 | + MsgNo.1-4) | |
2322 | + Jan 6 17:41:35 x3650a crmd: [1404]: ERROR: process_lrm_event: LRM operation prmApPostgreSQLDB_start_0 (25) Timed Out (timeout=30000ms) | |
2323 | + MsgNo.2-4) | |
2324 | + Jan 6 18:19:47 x3650a crmd: [7948]: ERROR: process_lrm_event: LRM operation prmApPostgreSQLDB_stop_0 (27) Timed Out (timeout=30000ms) | |
2325 | + MsgNo.3-3) | |
2326 | + Jan 6 19:55:31 x3650a crmd: [28183]: ERROR: process_lrm_event: LRM operation prmExPostgreSQLDB_monitor_10000 (27) Timed Out (timeout=30000ms) | |
2327 | + MsgNo.4-4) | |
2328 | + Jan 6 17:41:35 x3650a crmd: [1404]: ERROR: process_lrm_event: LRM operation prmStateful:1_promote_0 (25) Timed Out (timeout=30000ms) | |
2329 | + MsgNo.5-4) | |
2330 | + Jan 6 17:41:35 x3650a crmd: [1404]: ERROR: process_lrm_event: LRM operation prmStateful:1_demote_0 (25) Timed Out (timeout=30000ms) | |
2331 | + ''' | |
2332 | + def operation_timedout_ocf(self, outputobj, logelm, lconvfrm): | |
2333 | + try: | |
2334 | + opid = logelm.halogmsg.split()[3] | |
2335 | + rscid, op = self.parse_opid(opid)[:2] | |
2336 | + except: | |
2337 | + return CONV_PARSE_ERROR | |
2338 | + if self.is_empty(rscid, op): | |
2339 | + return CONV_ITEM_EMPTY | |
2340 | + | |
2341 | + # remove from timed out rscop list. | |
2342 | + # Because it became clear that the operation timed out. | |
2343 | + rscid_and_op = ("%s:%s" % (rscid, op)) | |
2344 | + cstat.timedoutRscopSet.discard(rscid_and_op) | |
2345 | + | |
2346 | + convertedlog = ("Resource %s failed to %s. (Timed Out)" % (rscid, op)) | |
2347 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
2348 | + return CONV_OK | |
2349 | + | |
2350 | + ''' | |
2351 | + Convert log message which means resource is not running. | |
2352 | + This function is only for OCF and STONITH resource's monitor | |
2353 | + (rc=OCF_NOT_RUNNING). | |
2354 | + | |
2355 | + MsgNo.3-2) | |
2356 | + Jan 6 19:45:58 x3650a crmd: [23987]: info: process_lrm_event: LRM operation prmExPostgreSQLDB_monitor_10000 (call=16, rc=7, cib-update=60, confirmed=false) not running | |
2357 | + MsgNo.19-2) | |
2358 | + Jan 7 13:47:57 x3650a crmd: [19263]: info: process_lrm_event: LRM operation prmStonith2-3_monitor_30000 (call=30, rc=14, cib-update=89, confirmed=false) status: unknown | |
2359 | + ''' | |
2360 | + def detect_rsc_failure(self, outputobj, logelm, lconvfrm): | |
2361 | + try: | |
2362 | + wordlist = logelm.halogmsg.split() | |
2363 | + rscid = self.parse_opid(wordlist[3])[0] | |
2364 | + rcstr = self.trimmark(wordlist[5],"=") | |
2365 | + except: | |
2366 | + return CONV_PARSE_ERROR | |
2367 | + if self.is_empty(rscid, rcstr): | |
2368 | + return CONV_ITEM_EMPTY | |
2369 | + | |
2370 | + convertedlog = ("Resource %s does not work. (%s)" % (rscid, rcstr)) | |
2371 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
2372 | + return CONV_OK | |
2373 | + | |
2374 | + ######### | |
2375 | + # For Node status event. | |
2376 | + ######### | |
2377 | + ''' | |
2378 | + Convert log message which means Node status updated. | |
2379 | + | |
2380 | + MsgNo.6-1) | |
2381 | + Jul 16 14:07:57 x3650a crmd: [7361]: notice: crmd_ha_status_callback: Status update: Node x3650b now has status [dead] (DC=true) | |
2382 | + MsgNo.6-2) | |
2383 | + Jul 16 13:41:04 x3650a crmd: [2114]: notice: crmd_ha_status_callback: Status update: Node x3650b now has status [active] (DC=true) | |
2384 | + ''' | |
2385 | + def node_status_updated(self, outputobj, logelm, lconvfrm): | |
2386 | + try: | |
2387 | + wordList = logelm.halogmsg.split() | |
2388 | + nodename = wordList[4] | |
2389 | + status = wordList[8].lstrip('[').rstrip(']') | |
2390 | + except: | |
2391 | + return CONV_PARSE_ERROR | |
2392 | + if self.is_empty(nodename, status): | |
2393 | + return CONV_ITEM_EMPTY | |
2394 | + | |
2395 | + ret = self.is_pingnode(nodename) | |
2396 | + if ret == True: | |
2397 | + #Ignore the network status's change. | |
2398 | + return CONV_OK | |
2399 | + elif ret == None: | |
2400 | + return CONV_GETINFO_ERROR | |
2401 | + | |
2402 | + # It's node status's change. | |
2403 | + output_loglevel = self.LOG_INFO_LV | |
2404 | + if status == "dead": | |
2405 | + output_loglevel = self.LOG_WARN_LV | |
2406 | + status = "lost" | |
2407 | + elif status == "active": | |
2408 | + if nodename in cstat.shutNodeSet: | |
2409 | + cstat.shutNodeSet.discard(nodename) | |
2410 | + status = "member" | |
2411 | + | |
2412 | + convertedlog = ("Node %s is %s." % (nodename, status)) | |
2413 | + outputobj.output_log(output_loglevel, convertedlog) | |
2414 | + return CONV_OK | |
2415 | + | |
2416 | + ########## | |
2417 | + # For Interconnect-LAN status event and | |
2418 | + # Network status event (detected by pingd). | |
2419 | + ########## | |
2420 | + ''' | |
2421 | + Convert log message which means Interconnect-LAN status changed to "dead" | |
2422 | + | |
2423 | + MsgNo.7-1) | |
2424 | + Jul 15 11:27:46 x3650a heartbeat: [17442]: info: Link x3650b:eth2 dead. | |
2425 | + ''' | |
2426 | + def detect_iconnlan_dead(self, outputobj, logelm, lconvfrm): | |
2427 | + try: | |
2428 | + wordlist = logelm.halogmsg.split() | |
2429 | + nodename, linkname = wordlist[1].split(':') | |
2430 | + except: | |
2431 | + return CONV_PARSE_ERROR | |
2432 | + if self.is_empty(nodename): | |
2433 | + return CONV_ITEM_EMPTY | |
2434 | + | |
2435 | + ret = self.is_pingnode(nodename) | |
2436 | + if ret == True: | |
2437 | + #Ignore the network failure. | |
2438 | + return CONV_OK | |
2439 | + elif ret == False: | |
2440 | + convertedlog = ("Link %s:%s is FAULTY." % (nodename, linkname)) | |
2441 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
2442 | + return CONV_OK | |
2443 | + else: | |
2444 | + return CONV_GETINFO_ERROR | |
2445 | + | |
2446 | + ''' | |
2447 | + Convert log message which means network status changed to "up". | |
2448 | + The same log appears when Interconnect-LAN's event occurs and | |
2449 | + Ping node's one. | |
2450 | + | |
2451 | + MsgNo.7-2) | |
2452 | + Jul 15 11:12:14 x3650a heartbeat: [17442]: info: Link x3650b:eth2 up. | |
2453 | + ''' | |
2454 | + def detect_network_up(self, outputobj, logelm, lconvfrm): | |
2455 | + try: | |
2456 | + wordlist = logelm.halogmsg.split() | |
2457 | + nodename, linkname = wordlist[1].split(':') | |
2458 | + except: | |
2459 | + return CONV_PARSE_ERROR | |
2460 | + if self.is_empty(nodename, linkname): | |
2461 | + return CONV_ITEM_EMPTY | |
2462 | + | |
2463 | + ret = self.is_pingnode(nodename) | |
2464 | + if ret == True: | |
2465 | + return CONV_OK | |
2466 | + elif ret == False: | |
2467 | + convertedlog = ("Link %s:%s is up." % (nodename, linkname)) | |
2468 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
2469 | + return CONV_OK | |
2470 | + else: | |
2471 | + return CONV_GETINFO_ERROR | |
2472 | + | |
2473 | + ''' | |
2474 | + Convert log message which means Network to ping node status changed | |
2475 | + to "dead" | |
2476 | + See also the comment on detect_iconnlan_dead(). | |
2477 | + | |
2478 | + MsgNo.8-1) | |
2479 | + Jan 13 16:24:13 x3650a pingd: [8849]: info: stand_alone_ping: Node 192.168.201.254 is unreachable (write) | |
2480 | + Jan 28 12:51:51 x3650a pingd: [16908]: info: stand_alone_ping: Node 192.168.201.254 is unreachable (read) | |
2481 | + ''' | |
2482 | + def detect_node_dead(self, outputobj, logelm, lconvfrm): | |
2483 | + try: | |
2484 | + nodename = logelm.halogmsg.split()[2] | |
2485 | + except: | |
2486 | + return CONV_PARSE_ERROR | |
2487 | + if self.is_empty(nodename): | |
2488 | + return CONV_ITEM_EMPTY | |
2489 | + | |
2490 | + convertedlog = ("Network to %s is unreachable." % (nodename)) | |
2491 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
2492 | + return CONV_OK | |
2493 | + | |
2494 | + ########## | |
2495 | + # For Disk status event (detected by diskd). | |
2496 | + ########## | |
2497 | + ''' | |
2498 | + Convert log message which means disk error. | |
2499 | + | |
2500 | + MsgNo.9-1) | |
2501 | + Jun 24 20:19:53 x3650a diskd: [22126]: WARN: check_status: disk status is changed, attr_name=diskcheck_status_internal, target=/tmp, new_status=ERROR | |
2502 | + ''' | |
2503 | + def detect_disk_error(self, outputobj, logelm, lconvfrm): | |
2504 | + try: | |
2505 | + wordlist = logelm.halogmsg.split(',') | |
2506 | + attrname = wordlist[1].split('=')[1] | |
2507 | + target = wordlist[2].split('=')[1] | |
2508 | + status = wordlist[3].split('=')[1] | |
2509 | + except: | |
2510 | + return CONV_PARSE_ERROR | |
2511 | + if self.is_empty(attrname, target, status): | |
2512 | + return CONV_ITEM_EMPTY | |
2513 | + | |
2514 | + convertedlog = ("Disk connection to %s is %s. (attr_name=%s)" % (target, status, attrname)) | |
2515 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
2516 | + return CONV_OK | |
2517 | + | |
2518 | + ######### | |
2519 | + # For respawn process event. | |
2520 | + ######### | |
2521 | + ''' | |
2522 | + Convert log message which means respawn process start. | |
2523 | + | |
2524 | + MsgNo.10-1) | |
2525 | + Jul 27 17:29:52 x3650a heartbeat: [25800]: info: Starting "/usr/lib64/heartbeat/attrd" as uid 500 gid 501 (pid 25800) | |
2526 | + ''' | |
2527 | + def respawn_start(self, outputobj, logelm, lconvfrm): | |
2528 | + try: | |
2529 | + keyword="Starting " | |
2530 | + start_pos = logelm.halogmsg.index(keyword) + len(keyword) | |
2531 | + end_pos = logelm.halogmsg.rindex("as uid") | |
2532 | + procname = logelm.halogmsg[start_pos:end_pos].strip().split('/')[-1].split()[0].strip("\"") | |
2533 | + leftwordList = logelm.halogmsg[end_pos:].split() | |
2534 | + pid = leftwordList[-1].split(')')[0] | |
2535 | + except: | |
2536 | + return CONV_PARSE_ERROR | |
2537 | + if self.is_empty(procname, pid): | |
2538 | + return CONV_ITEM_EMPTY | |
2539 | + | |
2540 | + convertedlog = ("Start \"%s\" process. (pid=%s)" % (procname, pid)) | |
2541 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
2542 | + return CONV_OK | |
2543 | + | |
2544 | + ''' | |
2545 | + Convert log message which means respawn process exited with error. | |
2546 | + | |
2547 | + MsgNo.10-2) | |
2548 | + Jul 20 15:47:47 x3650a heartbeat: [21753]: info: Managed /usr/lib64/heartbeat/attrd process 30930 exited with return code 0. | |
2549 | + ''' | |
2550 | + def respawn_exited_abnormally(self, outputobj, logelm, lconvfrm): | |
2551 | + try: | |
2552 | + keyword="Managed " | |
2553 | + start_pos = logelm.halogmsg.index(keyword) + len(keyword) | |
2554 | + end_pos = logelm.halogmsg.rindex("process") | |
2555 | + procname = logelm.halogmsg[start_pos:end_pos].strip().split('/')[-1].split()[0] | |
2556 | + leftwordList = logelm.halogmsg[end_pos:].split() | |
2557 | + pid = leftwordList[1] | |
2558 | + exitcode = leftwordList[6].rstrip(".") | |
2559 | + except: | |
2560 | + return CONV_PARSE_ERROR | |
2561 | + if self.is_empty(procname, pid, exitcode): | |
2562 | + return CONV_ITEM_EMPTY | |
2563 | + | |
2564 | + convertedlog = ("Managed \"%s\" process exited. (pid=%s, rc=%s)" % (procname, pid, exitcode)) | |
2565 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
2566 | + return CONV_OK | |
2567 | + | |
2568 | + ''' | |
2569 | + Convert log message which means respawn process killed by signal. | |
2570 | + | |
2571 | + MsgNo.10-3) | |
2572 | + Jul 20 15:46:43 x3650a heartbeat: [21753]: WARN: Managed /usr/lib64/heartbeat/attrd process 21772 killed by signal 9 [SIGKILL - Kill, unblockable]. | |
2573 | + ''' | |
2574 | + def respawn_killed(self, outputobj, logelm, lconvfrm): | |
2575 | + try: | |
2576 | + keyword="Managed " | |
2577 | + start_pos = logelm.halogmsg.index(keyword) + len(keyword) | |
2578 | + end_pos = logelm.halogmsg.rindex("process") | |
2579 | + procname = logelm.halogmsg[start_pos:end_pos].strip().split('/')[-1].split()[0] | |
2580 | + leftwordList = logelm.halogmsg[end_pos:].split() | |
2581 | + pid = leftwordList[1] | |
2582 | + signum = leftwordList[5].rstrip('.') | |
2583 | + except: | |
2584 | + return CONV_PARSE_ERROR | |
2585 | + if self.is_empty(procname, pid, signum): | |
2586 | + return CONV_ITEM_EMPTY | |
2587 | + | |
2588 | + convertedlog = ("Managed \"%s\" process terminated with signal %s. (pid=%s)" % (procname, signum, pid)) | |
2589 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
2590 | + return CONV_OK | |
2591 | + | |
2592 | + ''' | |
2593 | + Convert log message which means respawn process dumped core. | |
2594 | + | |
2595 | + MsgNo.10-4) | |
2596 | + Jul 20 17:08:38 x3650a heartbeat: [6154]: ERROR: Managed /usr/lib64/heartbeat/attrd process 6173 dumped core | |
2597 | + ''' | |
2598 | + def respawn_dumped_core(self, outputobj, logelm, lconvfrm): | |
2599 | + try: | |
2600 | + keyword="Managed " | |
2601 | + start_pos = logelm.halogmsg.index(keyword) + len(keyword) | |
2602 | + end_pos = logelm.halogmsg.rindex("process") | |
2603 | + procname = logelm.halogmsg[start_pos:end_pos].strip().split('/')[-1].split()[0] | |
2604 | + pid = logelm.halogmsg[end_pos:].split()[1] | |
2605 | + except: | |
2606 | + return CONV_PARSE_ERROR | |
2607 | + if self.is_empty(procname, pid): | |
2608 | + return CONV_ITEM_EMPTY | |
2609 | + | |
2610 | + convertedlog = ("Managed \"%s\" process dumped core. (pid=%s)" % (procname, pid)) | |
2611 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
2612 | + return CONV_OK | |
2613 | + | |
2614 | + ''' | |
2615 | + Convert log message which means respawn process went away strangely. | |
2616 | + | |
2617 | + MsgNo.10-5) | |
2618 | + Jul 27 17:30:34 x3650a heartbeat: [25793]: ERROR: Managed /usr/lib64/heartbeat/attrd process 6173 went away strangely (!) | |
2619 | + ''' | |
2620 | + def respawn_went_away(self, outputobj, logelm, lconvfrm): | |
2621 | + try: | |
2622 | + keyword="Managed " | |
2623 | + start_pos = logelm.halogmsg.index(keyword) + len(keyword) | |
2624 | + end_pos = logelm.halogmsg.rindex("process") | |
2625 | + procname = logelm.halogmsg[start_pos:end_pos].strip().split('/')[-1].split()[0] | |
2626 | + pid = logelm.halogmsg[end_pos:].split()[1] | |
2627 | + except: | |
2628 | + return CONV_PARSE_ERROR | |
2629 | + if self.is_empty(procname, pid): | |
2630 | + return CONV_ITEM_EMPTY | |
2631 | + | |
2632 | + convertedlog = ("Managed \"%s\" process went away strangely. (pid=%s)" % (procname, pid)) | |
2633 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
2634 | + return CONV_OK | |
2635 | + | |
2636 | + ''' | |
2637 | + Convert log message which means respawn process exited normally in shutdown process. | |
2638 | + | |
2639 | + MsgNo.10-6) | |
2640 | + Jul 27 17:30:34 x3650a heartbeat: [25793]: info: killing /usr/lib64/heartbeat/attrd process group 25803 with signal 15 | |
2641 | + ''' | |
2642 | + def respawn_exited_normally(self, outputobj, logelm, lconvfrm): | |
2643 | + try: | |
2644 | + keyword="killing " | |
2645 | + start_pos = logelm.halogmsg.index(keyword) + len(keyword) | |
2646 | + end_pos = logelm.halogmsg.rindex("process") | |
2647 | + procname = logelm.halogmsg[start_pos:end_pos].strip().split('/')[-1].split()[0] | |
2648 | + leftwordList = logelm.halogmsg[end_pos:].split() | |
2649 | + pgid = leftwordList[2] | |
2650 | + except: | |
2651 | + return CONV_PARSE_ERROR | |
2652 | + if self.is_empty(procname, pgid): | |
2653 | + return CONV_ITEM_EMPTY | |
2654 | + | |
2655 | + convertedlog = ("Stop \"%s\" process normally. (pid=%s)" % (procname, pgid)) | |
2656 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
2657 | + return CONV_OK | |
2658 | + | |
2659 | + ''' | |
2660 | + Convert log message which means do respawning too frequently in a short term. | |
2661 | + | |
2662 | + MsgNo.10-7) | |
2663 | + Jul 27 17:23:40 x3650a heartbeat: [23265]: ERROR: Client /usr/lib64/heartbeat/attrd "respawning too fast" | |
2664 | + ''' | |
2665 | + def respawn_too_fast(self, outputobj, logelm, lconvfrm): | |
2666 | + try: | |
2667 | + keyword="Client " | |
2668 | + start_pos = logelm.halogmsg.index(keyword) + len(keyword) | |
2669 | + end_pos = logelm.halogmsg.rindex("respawning") - 2 | |
2670 | + procname = logelm.halogmsg[start_pos:end_pos].strip().split('/')[-1].split()[0] | |
2671 | + except: | |
2672 | + return CONV_PARSE_ERROR | |
2673 | + if self.is_empty(procname): | |
2674 | + return CONV_ITEM_EMPTY | |
2675 | + | |
2676 | + convertedlog = ("Respawn count exceeded by \"%s\"." % (procname)) | |
2677 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
2678 | + return CONV_OK | |
2679 | + | |
2680 | + ########## | |
2681 | + # For Fail Over. | |
2682 | + ########## | |
2683 | + ''' | |
2684 | + Output the log which tells F/O starts. | |
2685 | + ''' | |
2686 | + def detect_fo_start(self, outputobj): | |
2687 | + self.debug_status() | |
2688 | + if cstat.IN_FO_PROCESS == True: | |
2689 | + return | |
2690 | + cstat.IN_FO_PROCESS = True | |
2691 | + convertedlog = ("Start to fail-over.") | |
2692 | + outputobj.output_log(self.LOG_ERR_LV, convertedlog) | |
2693 | + self.debug_status() | |
2694 | + return | |
2695 | + | |
2696 | + ''' | |
2697 | + Detect pengine starts the calculation for transition. | |
2698 | + This function is called when cluster status became "S_POLICY_ENGINE" | |
2699 | + and input data is not I_SHUTDOWN (do shutdown process). | |
2700 | + It considers a failure occurred when specified attributes are | |
2701 | + updated to abnormal value. | |
2702 | + When the failure occurred, this function outputs the log to tell it. | |
2703 | + If not or it is already in F/O process, it outputs nothing. | |
2704 | + | |
2705 | + MsgNo.F0-1, F9-1, F10-1) | |
2706 | + Jan 5 15:19:20 x3650a crmd: [17659]: info: do_state_transition: State transition S_IDLE -> S_POLICY_ENGINE [ input=I_PE_CALC cause=C_FSA_INTERNAL origin=abort_transition_graph ] | |
2707 | + ''' | |
2708 | + def detect_pe_calc(self, outputobj, logelm, lconvfrm): | |
2709 | + cstat.IN_CALC = True | |
2710 | + | |
2711 | + # Initialize resource status list. | |
2712 | + # See the comment on detect_rsc_unmanaged(). | |
2713 | + self.rscstatList = None | |
2714 | + self.rscstatList = list() | |
2715 | + | |
2716 | + # If any failure didn't occur and Heartbeat is not in shutdown process, | |
2717 | + # and the node on localhost is not in shutting down, | |
2718 | + # check each attribute's value to decide whether it is F/O or not. | |
2719 | + if cstat.FAILURE_OCCURRED == False and HOSTNAME not in cstat.shutNodeSet: | |
2720 | + nodeset = self.get_onlinenode() | |
2721 | + if nodeset == None: | |
2722 | + return CONV_GETINFO_ERROR | |
2723 | + for node in (nodeset - cstat.shutNodeSet): | |
2724 | + # Check each attribute's value. | |
2725 | + for attrRule in attrRuleList: | |
2726 | + attrname, op, attrval = tuple(attrRule) | |
2727 | + # Check attribute's value for each node. | |
2728 | + # Now, the node seems to be active. | |
2729 | + result = self.check_attribute(attrname, op, attrval, node)[0] | |
2730 | + if result == True: | |
2731 | + # attribute's value means "failure(s) occurred"! | |
2732 | + cstat.FAILURE_OCCURRED = FAIL_SCORE | |
2733 | + if cstat.ACTRSC_MOVE == FAIL_MOVE or \ | |
2734 | + cstat.ACTRSC_MOVE == FAIL_STP: | |
2735 | + self.detect_fo_start(outputobj) | |
2736 | + # [COMMENT] | |
2737 | + # result == False: | |
2738 | + # attribute did not change or | |
2739 | + # it was updated to normal value. | |
2740 | + # result == None: | |
2741 | + # some errors occurred in check_attribute() or | |
2742 | + # the node is not running or | |
2743 | + # specified attribute does not exist. | |
2744 | + return CONV_OK | |
2745 | + | |
2746 | + ''' | |
2747 | + Output the log which tells F/O finished. | |
2748 | + In addition, output all resources' status. | |
2749 | + It considers that F/O succeeded when all of specified resources | |
2750 | + (with the parameter OPT_ACTRSC in config file) are running, | |
2751 | + and if any resource at all stops, it considers F/O failed. | |
2752 | + This function is called when cluster status became "S_IDLE". | |
2753 | + | |
2754 | + MsgNo.F0-2, F12-1, F12-2) | |
2755 | + Jan 5 14:50:07 x3650a crmd: [13198]: info: do_state_transition: State transition S_TRANSITION_ENGINE -> S_IDLE [ input=I_TE_SUCCESS cause=C_FSA_INTERNAL origin=notify_crmd ] | |
2756 | + ''' | |
2757 | + def detect_fo_complete(self, outputobj, logelm, lconvfrm): | |
2758 | + | |
2759 | + # Check specified resources exist in this cluster. | |
2760 | + if len(self.rscstatList) > 0: | |
2761 | + for actrsc in actRscList: | |
2762 | + newrsc = RscStat(actrsc) | |
2763 | + if newrsc not in self.rscstatList: | |
2764 | + pm_log.error("detect_fo_complete(): " + | |
2765 | + "resource [%s] is not in this cluster." % (actrsc)) | |
2766 | + break | |
2767 | + | |
2768 | + if cstat.IN_FO_PROCESS == False: | |
2769 | + self.clear_status() | |
2770 | + return CONV_OK | |
2771 | + self.clear_status() | |
2772 | + | |
2773 | + # When one or more Unmanaged resource exists in the cluster, | |
2774 | + # (even if the resource is not set in act_rsc) | |
2775 | + # it is unusual state, so consider it "F/O failed". | |
2776 | + detect_fo_failed = False | |
2777 | + unmanaged_rsc_exists = False | |
2778 | + for rscstat in self.rscstatList: | |
2779 | + if rscstat.unmanaged: | |
2780 | + convertedlog = ("Unmanaged resource exists.") | |
2781 | + outputobj.output_log(self.LOG_ERR_LV, convertedlog) | |
2782 | + detect_fo_failed = True | |
2783 | + unmanaged_rsc_exists = True | |
2784 | + break | |
2785 | + | |
2786 | + if unmanaged_rsc_exists == False: | |
2787 | + # Confirm each resource's status. | |
2788 | + detect_fo_failed = False | |
2789 | + for rscstat in self.rscstatList: | |
2790 | + if rscstat.rscid in actRscList: | |
2791 | + if rscstat.fofailed or rscstat.status == "Stopped" : | |
2792 | + output_loglevel = self.LOG_ERR_LV | |
2793 | + output_status = ("Stopped") | |
2794 | + detect_fo_failed = True | |
2795 | + else: | |
2796 | + output_loglevel = self.LOG_INFO_LV | |
2797 | + output_status = rscstat.status | |
2798 | + convertedlog = ("Resource %s : %s" % (rscstat.rscid, output_status)) | |
2799 | + outputobj.output_log(output_loglevel, convertedlog) | |
2800 | + | |
2801 | + if detect_fo_failed: | |
2802 | + outputobj.output_log(self.LOG_ERR_LV, "fail-over failed.") | |
2803 | + else: | |
2804 | + outputobj.output_log(self.LOG_INFO_LV, "fail-over succeeded.") | |
2805 | + | |
2806 | + return CONV_OK | |
2807 | + | |
2808 | + ''' | |
2809 | + Node detects some failures in the cluster. | |
2810 | + Output nothing. | |
2811 | + | |
2812 | + MsgNo.F1-1, F1-2, F2-1, F2-2, F3-1, F3-2, F4-1, F4-2, F6-1, F6-2) | |
2813 | + Feb 25 13:31:37 x3650a crmd: [11105]: WARN: update_failcount: Updating failcount for prmApPostgreSQLDB on x3650a after failed monitor: rc=1 (update=value++, time=1267072297) | |
2814 | + ''' | |
2815 | + def dc_detect_failure(self, outputobj, logelm, lconvfrm): | |
2816 | + return CONV_OK | |
2817 | + | |
2818 | + ''' | |
2819 | + Node detects some failures in the cluster. | |
2820 | + Output nothing. | |
2821 | + | |
2822 | + MsgNo.F7-1, F7-2, F7-3, F7-4, F8-1) | |
2823 | + Jul 15 13:14:59 x3650a crmd: [31869]: WARN: match_down_event: No match for shutdown action on f8d52aae-518b-4b06-b1a1-b23486f8b410 | |
2824 | + ''' | |
2825 | + def dc_detect_node_failure(self, outputobj, logelm, lconvfrm): | |
2826 | + try: | |
2827 | + wordlist = logelm.halogmsg.split() | |
2828 | + nodename = self.get_nodename(wordlist[-1]) | |
2829 | + except: | |
2830 | + return CONV_PARSE_ERROR | |
2831 | + if self.is_empty(nodename): | |
2832 | + return CONV_ITEM_EMPTY | |
2833 | + | |
2834 | + if nodename in cstat.shutNodeSet: | |
2835 | + pm_log.debug("The [%s] exists in the shutdown list." % (nodename)) | |
2836 | + pm_log.debug("Ignore the fotrigger flag setting.") | |
2837 | + return CONV_SHUT_NODE | |
2838 | + | |
2839 | + return CONV_OK | |
2840 | + | |
2841 | + ''' | |
2842 | + Detect resource start action added. | |
2843 | + This is to get resource status when F/O finished. | |
2844 | + So it outputs nothing. | |
2845 | + | |
2846 | + MsgNo. F11-1) | |
2847 | + Jan 5 15:12:25 x3650a pengine: [16657]: notice: LogActions: Start prmExPostgreSQLDB (x3650a) | |
2848 | + ''' | |
2849 | + def add_rsc_start(self, outputobj, logelm, lconvfrm): | |
2850 | + try: | |
2851 | + wordlist = logelm.halogmsg.split() | |
2852 | + nodename = self.trimmark(wordlist[-1]) | |
2853 | + rscid = wordlist[2] | |
2854 | + except: | |
2855 | + return CONV_PARSE_ERROR | |
2856 | + if self.is_empty(nodename, rscid): | |
2857 | + return CONV_ITEM_EMPTY | |
2858 | + | |
2859 | + # Set the resource's status to the list. | |
2860 | + statstr = ("Started on %s" % (nodename)) | |
2861 | + self.set_rscstat(rscid, statstr, None, None) | |
2862 | + | |
2863 | + if rscid in actRscList: | |
2864 | + cstat.ACTRSC_MOVE = FAIL_STR | |
2865 | + if cstat.FAILURE_OCCURRED == FAIL_NODE: | |
2866 | + self.detect_fo_start(outputobj) | |
2867 | + return CONV_OK | |
2868 | + | |
2869 | + ''' | |
2870 | + Detect resource stop action added. | |
2871 | + This is to get resource status when F/O finished. | |
2872 | + | |
2873 | + MsgNo. F11-2) | |
2874 | + Jan 5 15:19:23 x3650a pengine: [17658]: notice: LogActions: Stop resource prmExPostgreSQLDB (x3650a) | |
2875 | + ''' | |
2876 | + def add_rsc_stop(self, outputobj, logelm, lconvfrm): | |
2877 | + try: | |
2878 | + wordlist = logelm.halogmsg.split() | |
2879 | + rscid = wordlist[-2] | |
2880 | + except: | |
2881 | + return CONV_PARSE_ERROR | |
2882 | + if self.is_empty(rscid): | |
2883 | + return CONV_ITEM_EMPTY | |
2884 | + | |
2885 | + # Set the resource's status to the list. | |
2886 | + statstr = ("Stopped") | |
2887 | + self.set_rscstat(rscid, statstr, None, None) | |
2888 | + | |
2889 | + if rscid in actRscList: | |
2890 | + cstat.ACTRSC_MOVE = FAIL_STP | |
2891 | + if cstat.FAILURE_OCCURRED == FAIL_RSC or cstat.FAILURE_OCCURRED == FAIL_SCORE: | |
2892 | + self.detect_fo_start(outputobj) | |
2893 | + return CONV_OK | |
2894 | + | |
2895 | + ''' | |
2896 | + Detect no action added for the resource. | |
2897 | + This is to get resource status when F/O finished. | |
2898 | + So it outputs nothing. | |
2899 | + | |
2900 | + MsgNo.F11-3) | |
2901 | + Jan 5 15:36:42 x3650a pengine: [27135]: notice: LogActions: Leave resource prmFsPostgreSQLDB1 (Started x3650a) | |
2902 | + MsgNo.F11-8) | |
2903 | + Jan 5 14:50:05 x3650a pengine: [13197]: notice: LogActions: Restart resource prmIpPostgreSQLDB (Started x3650b) | |
2904 | + MsgNo.F11-9) | |
2905 | + Jan 5 14:50:41 x3650a pengine: [13197]: notice: LogActions: Leave resource prmPingd:0 (Stopped) | |
2906 | + ''' | |
2907 | + def add_no_action(self, outputobj, logelm, lconvfrm): | |
2908 | + try: | |
2909 | + wordlist = logelm.halogmsg.split() | |
2910 | + rscid = wordlist[3] | |
2911 | + status = self.trimmark(wordlist[4]) | |
2912 | + node = "" | |
2913 | + if len(wordlist) >= 6: | |
2914 | + node = self.trimmark(wordlist[5]) | |
2915 | + except: | |
2916 | + return CONV_PARSE_ERROR | |
2917 | + if self.is_empty(rscid, status): | |
2918 | + return CONV_ITEM_EMPTY | |
2919 | + | |
2920 | + # Set the resource's status to the list. | |
2921 | + if node != "": | |
2922 | + statstr = ("%s on %s" % (status, node)) | |
2923 | + else: | |
2924 | + statstr = ("%s" % (status)) | |
2925 | + self.set_rscstat(rscid, statstr, None, None) | |
2926 | + | |
2927 | + if statstr == "Stopped": | |
2928 | + if rscid in actRscList: | |
2929 | + cstat.ACTRSC_MOVE = FAIL_STPD | |
2930 | + if cstat.FAILURE_OCCURRED == FAIL_NODE: | |
2931 | + self.detect_fo_start(outputobj) | |
2932 | + return CONV_OK | |
2933 | + | |
2934 | + ''' | |
2935 | + Detect resouce cannot run anywhere. | |
2936 | + This is to get resource status when F/O finished. | |
2937 | + So it outputs nothing. | |
2938 | + | |
2939 | + MsgNo. F11-4) | |
2940 | + Jan 5 15:19:20 x3650a pengine: [17658]: WARN: native_color: Resource prmApPostgreSQLDB cannot run anywhere | |
2941 | + ''' | |
2942 | + def detect_cannot_run_anywhere(self, outputobj, logelm, lconvfrm): | |
2943 | + try: | |
2944 | + wordlist = logelm.halogmsg.split() | |
2945 | + rscid = wordlist[2] | |
2946 | + except: | |
2947 | + return CONV_PARSE_ERROR | |
2948 | + if self.is_empty(rscid): | |
2949 | + return CONV_ITEM_EMPTY | |
2950 | + | |
2951 | + # Set the resource's status to the list. | |
2952 | + self.set_rscstat(rscid, None, True, None) | |
2953 | + return CONV_OK | |
2954 | + | |
2955 | + ''' | |
2956 | + Detect resouce became unmanaged. | |
2957 | + This is to get resource status when F/O finished. | |
2958 | + So it outputs nothing. | |
2959 | + When resource become *managed*, no particular log appears like | |
2960 | + "resource A is managed", the cluster just becomes S_POLICY_ENGINE and | |
2961 | + starts PE calcuration. | |
2962 | + So, to clear the "unmanaged" flag in RscStat, | |
2963 | + initialize the rscstatusList object in detect_pe_calc(). | |
2964 | + | |
2965 | + MsgNo. F11-5) | |
2966 | + Jan 5 10:04:09 x3650a pengine: [9727]: info: native_color: Unmanaged resource prmApPostgreSQLDB allocated to 'nowhere': inactive | |
2967 | + ''' | |
2968 | + def detect_rsc_unmanaged(self, outputobj, logelm, lconvfrm): | |
2969 | + try: | |
2970 | + wordlist = logelm.halogmsg.split() | |
2971 | + rscid = wordlist[3] | |
2972 | + except: | |
2973 | + return CONV_PARSE_ERROR | |
2974 | + if self.is_empty(rscid): | |
2975 | + return CONV_ITEM_EMPTY | |
2976 | + | |
2977 | + # Set the resource's status to the list. | |
2978 | + self.set_rscstat(rscid, None, None, True) | |
2979 | + return CONV_OK | |
2980 | + | |
2981 | + ''' | |
2982 | + Detect resource move action added. | |
2983 | + This is to get resource status when F/O started. | |
2984 | + | |
2985 | + MsgNo. F11-6) | |
2986 | + Jan 5 15:12:27 x3650a pengine: [16657]: notice: LogActions: Move resource prmExPostgreSQLDB (Started x3650a -> x3650b) | |
2987 | + ''' | |
2988 | + def add_rsc_move(self, outputobj, logelm, lconvfrm): | |
2989 | + try: | |
2990 | + wordlist = logelm.halogmsg.split() | |
2991 | + a_nodename = self.trimmark(wordlist[-1]) | |
2992 | + f_nodename = self.trimmark(wordlist[-3]) | |
2993 | + rscid = wordlist[3] | |
2994 | + except: | |
2995 | + return CONV_PARSE_ERROR | |
2996 | + | |
2997 | + if self.is_empty(a_nodename, rscid): | |
2998 | + return CONV_ITEM_EMPTY | |
2999 | + | |
3000 | + # Set the resource's status to the list. | |
3001 | + statstr = ("Move %s -> %s" % (f_nodename,a_nodename)) | |
3002 | + self.set_rscstat(rscid, statstr, None, None) | |
3003 | + | |
3004 | + if rscid in actRscList: | |
3005 | + cstat.ACTRSC_MOVE = FAIL_MOVE | |
3006 | + if cstat.FAILURE_OCCURRED == FAIL_RSC or cstat.FAILURE_OCCURRED == FAIL_SCORE: | |
3007 | + self.detect_fo_start(outputobj) | |
3008 | + | |
3009 | + return CONV_OK | |
3010 | + | |
3011 | + ########## | |
3012 | + # For DC election. | |
3013 | + ########## | |
3014 | + ''' | |
3015 | + Convert log message which means DC election is complete. | |
3016 | + | |
3017 | + MsgNo.13-2) | |
3018 | + Jan 6 14:16:18 x3650a crmd: [9874]: info: update_dc: Set DC to x3650a (3.0.1) | |
3019 | + ''' | |
3020 | + def dc_election_complete(self, outputobj, logelm, lconvfrm): | |
3021 | + try: | |
3022 | + nodename = logelm.halogmsg.split()[-2] | |
3023 | + except: | |
3024 | + return CONV_PARSE_ERROR | |
3025 | + if self.is_empty(nodename): | |
3026 | + return CONV_ITEM_EMPTY | |
3027 | + | |
3028 | + convertedlog = ("Set DC node to %s." % (nodename)) | |
3029 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
3030 | + return CONV_OK | |
3031 | + | |
3032 | + ''' | |
3033 | + Convert log message which means unset DC node. | |
3034 | + | |
3035 | + MsgNo.13-5) | |
3036 | + Jan 12 11:22:18 x3650a crmd: [5796]: info: update_dc: Unset DC x3650a | |
3037 | + ''' | |
3038 | + def detect_unset_dc(self, outputobj, logelm, lconvfrm): | |
3039 | + try: | |
3040 | + nodename = logelm.halogmsg.split()[-1] | |
3041 | + except: | |
3042 | + return CONV_PARSE_ERROR | |
3043 | + if self.is_empty(nodename): | |
3044 | + return CONV_ITEM_EMPTY | |
3045 | + | |
3046 | + convertedlog = ("Unset DC node %s." % (nodename)) | |
3047 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
3048 | + return CONV_OK | |
3049 | + | |
3050 | + ########## | |
3051 | + # For Pacemaker and Heartbeat service shutdown. | |
3052 | + ########## | |
3053 | + ''' | |
3054 | + Convert log message which means Pacemaker service on the node | |
3055 | + in the cluster send shutdown request. | |
3056 | + | |
3057 | + MsgNo.14-1) | |
3058 | + Jan 18 10:35:08 x3650a crmd: [10975]: info: handle_shutdown_request: Creating shutdown request for x3650b (state=S_IDLE) | |
3059 | + ''' | |
3060 | + def detect_shutdown_request(self, outputobj, logelm, lconvfrm): | |
3061 | + try: | |
3062 | + nodename = logelm.halogmsg.split()[-2] | |
3063 | + except: | |
3064 | + return CONV_PARSE_ERROR | |
3065 | + if self.is_empty(nodename): | |
3066 | + return CONV_ITEM_EMPTY | |
3067 | + | |
3068 | + cstat.shutNodeSet.add(nodename) | |
3069 | + convertedlog = ("Pacemaker on %s is shutting down." % (nodename)) | |
3070 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
3071 | + return CONV_OK | |
3072 | + | |
3073 | + ''' | |
3074 | + Detect Heartbeat service on localhost shutdown complete. | |
3075 | + Output message is static, but to remove the node name from | |
3076 | + shutting down node list, detect the message with | |
3077 | + peculiar function. | |
3078 | + | |
3079 | + MsgNo.14-2) | |
3080 | + Jul 15 15:35:37 x3650a heartbeat: [16986]: info: x3650a Heartbeat shutdown complete. | |
3081 | + ''' | |
3082 | + def detect_hb_shutdown(self, outputobj, logelm, lconvfrm): | |
3083 | + outputobj.output_log(lconvfrm.loglevel, lconvfrm.rulename) | |
3084 | + cstat.shutNodeSet.discard(HOSTNAME) | |
3085 | + return CONV_OK | |
3086 | + | |
3087 | + ''' | |
3088 | + Detect Pacemaker service on localhost starts to shutdown. | |
3089 | + Output message is static, but to add localhost name to | |
3090 | + shutting down node list, detect the message with | |
3091 | + peculiar function. | |
3092 | + | |
3093 | + MsgNo.14-3) | |
3094 | + Jan 18 10:36:18 x3650a crmd: [12294]: info: crm_shutdown: Requesting shutdown | |
3095 | + ''' | |
3096 | + def detect_pcmk_shutting_down(self, outputobj, logelm, lconvfrm): | |
3097 | + cstat.shutNodeSet.add(HOSTNAME) | |
3098 | + outputobj.output_log(lconvfrm.loglevel, lconvfrm.rulename) | |
3099 | + return CONV_OK | |
3100 | + | |
3101 | + ''' | |
3102 | + Convert log message which means Pacemaker service on node | |
3103 | + send shutdown request. | |
3104 | + | |
3105 | + MsgNo.14-4) | |
3106 | + Jan 18 10:35:26 x3650a cib: [10971]: info: cib_process_shutdown_req: Shutdown REQ from x3650b | |
3107 | + ''' | |
3108 | + def detect_dc_shutdown_request(self, outputobj, logelm, lconvfrm): | |
3109 | + try: | |
3110 | + nodename = logelm.halogmsg.split()[-1] | |
3111 | + except: | |
3112 | + return CONV_PARSE_ERROR | |
3113 | + if self.is_empty(nodename): | |
3114 | + return CONV_ITEM_EMPTY | |
3115 | + | |
3116 | + cstat.shutNodeSet.add(nodename) | |
3117 | + return CONV_OK | |
3118 | + | |
3119 | + ''' | |
3120 | + Detect the send shutdown request to DC. | |
3121 | + Add localhost name to shutting down node list. | |
3122 | + Output nothing. | |
3123 | + | |
3124 | + MsgNo.14-5) | |
3125 | + Sep 16 13:11:51 x3650a crmd: [11369]: info: do_shutdown_req: Sending shutdown request to DC: x3650a | |
3126 | + ''' | |
3127 | + def detect_send_shutdown(self, outputobj, logelm, lconvfrm): | |
3128 | + cstat.shutNodeSet.add(HOSTNAME) | |
3129 | + return CONV_OK | |
3130 | + | |
3131 | + ########## | |
3132 | + # For logging daemon event. | |
3133 | + ########## | |
3134 | + # use output_static_msg() only. | |
3135 | + | |
3136 | + ########## | |
3137 | + # For STONITH resource operation timed out. | |
3138 | + ########## | |
3139 | + ''' | |
3140 | + Get resource id and operation type which stonithd detected timed out. | |
3141 | + | |
3142 | + MsgNo.17-4) | |
3143 | + Jul 15 16:02:35 x3650a stonithd: [22087]: WARN: external_prmStonith2-2_start process (PID 22291) timed out (try 1). Killing with signal SIGTERM (15). | |
3144 | + MsgNo.19-3) | |
3145 | + Jan 7 14:20:16 x3650a stonithd: [14714]: WARN: external_prmStonith2-3_monitor process (PID 16383) timed out (try 1). Killing with signal SIGTERM (15). | |
3146 | + ''' | |
3147 | + def detect_rscop_timedout_stonithd(self, outputobj, logelm, lconvfrm): | |
3148 | + try: | |
3149 | + tmp = logelm.halogmsg.split()[0] | |
3150 | + wordlist = tmp.split('_') | |
3151 | + if len(wordlist) > 2: | |
3152 | + rscid = wordlist[1] | |
3153 | + op = wordlist[-1] | |
3154 | + else: | |
3155 | + rscid = wordlist[0] | |
3156 | + op = wordlist[-1] | |
3157 | + except: | |
3158 | + return CONV_PARSE_ERROR | |
3159 | + if self.is_empty(rscid, op): | |
3160 | + return CONV_ITEM_EMPTY | |
3161 | + | |
3162 | + rscid_and_op = ("%s:%s" % (rscid, op)) | |
3163 | + # Append to the list. | |
3164 | + cstat.timedoutRscopSet.add(rscid_and_op) | |
3165 | + return CONV_OK | |
3166 | + | |
3167 | + ########## | |
3168 | + # For fence operation. | |
3169 | + ########## | |
3170 | + ''' | |
3171 | + Convert log message which means fence operation started. | |
3172 | + | |
3173 | + MsgNo.20-1, No21-1) | |
3174 | + Jan 13 15:23:28 x3650a stonithd: [23731]: info: stonith_operate_locally::2713: sending fencing op RESET for x3650b to prmStonith2-1 (external/ssh) (pid=23852) | |
3175 | + ''' | |
3176 | + def fence_op_started(self, outputobj, logelm, lconvfrm): | |
3177 | + try: | |
3178 | + wordlist = logelm.halogmsg.split() | |
3179 | + op = wordlist[4] | |
3180 | + target = wordlist[6] | |
3181 | + msg = ' '.join(wordlist[8:]) | |
3182 | + except: | |
3183 | + return CONV_PARSE_ERROR | |
3184 | + if self.is_empty(op, target, msg): | |
3185 | + return CONV_ITEM_EMPTY | |
3186 | + | |
3187 | + convertedlog = ("Try to STONITH (%s) the Node %s to %s" % (op, target, msg)) | |
3188 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
3189 | + return CONV_OK | |
3190 | + | |
3191 | + ''' | |
3192 | + Convert log message which means fence operation succeeded. | |
3193 | + | |
3194 | + MsgNo.20-2) | |
3195 | + Jan 13 12:51:46 x3650a stonithd: [15595]: info: Succeeded to STONITH the node x3650b: optype=RESET. whodoit: x3650a | |
3196 | + ''' | |
3197 | + def fence_op_succeeded(self, outputobj, logelm, lconvfrm): | |
3198 | + try: | |
3199 | + wordlist = logelm.halogmsg.split() | |
3200 | + target = self.trimmark(wordlist[5]) | |
3201 | + | |
3202 | + oplist = wordlist[6].split('=') | |
3203 | + op = self.trimmark(oplist[1]) | |
3204 | + | |
3205 | + sniper = wordlist[-1] | |
3206 | + except: | |
3207 | + return CONV_PARSE_ERROR | |
3208 | + if self.is_empty(target, sniper, op): | |
3209 | + return CONV_ITEM_EMPTY | |
3210 | + | |
3211 | + convertedlog = ("Succeeded to STONITH (%s) " % (op) + "the Node %s by Node %s." % (target, sniper)) | |
3212 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
3213 | + return CONV_OK | |
3214 | + | |
3215 | + ''' | |
3216 | + Convert log message which means fence operation failed. | |
3217 | + | |
3218 | + MsgNo.20-3, 21-3) | |
3219 | + Jan 13 15:48:06 x3650a stonithd: [25195]: info: failed to STONITH node x3650b with local device prmStonith2-1 (exitcode 5), gonna try the next local device | |
3220 | + ''' | |
3221 | + def fence_op_failed(self, outputobj, logelm, lconvfrm): | |
3222 | + try: | |
3223 | + wordlist = logelm.halogmsg.split() | |
3224 | + nodename = wordlist[4] | |
3225 | + exitcode = self.trimmark(wordlist[10]) | |
3226 | + except: | |
3227 | + return CONV_PARSE_ERROR | |
3228 | + if self.is_empty(nodename, exitcode): | |
3229 | + return CONV_ITEM_EMPTY | |
3230 | + | |
3231 | + convertedlog = ("Failed to STONITH the Node %s " % (nodename) + "with one local device (exitcode=%s). " % (exitcode) + "Will try to use the next local device.") | |
3232 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
3233 | + return CONV_OK | |
3234 | + | |
3235 | + ''' | |
3236 | + Convert log message which means fence operation timed out. | |
3237 | + | |
3238 | + MsgNo.20-4, 21-4) | |
3239 | + Jan 13 14:08:01 x3650a stonithd: [20372]: ERROR: Failed to STONITH the node x3650b: optype=RESET, op_result=TIMEOUT | |
3240 | + ''' | |
3241 | + def fence_op_timedout(self, outputobj, logelm, lconvfrm): | |
3242 | + try: | |
3243 | + wordlist = logelm.halogmsg.split() | |
3244 | + nodename = self.trimmark(wordlist[5]) | |
3245 | + | |
3246 | + oplist = wordlist[6].split('=') | |
3247 | + op = self.trimmark(oplist[1]) | |
3248 | + except: | |
3249 | + return CONV_PARSE_ERROR | |
3250 | + if self.is_empty(nodename, op): | |
3251 | + return CONV_ITEM_EMPTY | |
3252 | + | |
3253 | + convertedlog = ("Failed to STONITH (%s) " % (op) + "the Node %s (Timed Out)." % (nodename)) | |
3254 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
3255 | + return CONV_OK | |
3256 | + | |
3257 | + ########## | |
3258 | + # For attribute event. | |
3259 | + ########## | |
3260 | + ''' | |
3261 | + Convert log message which means attribute value on own node updated. | |
3262 | + | |
3263 | + MsgNo.22-1) | |
3264 | + Jun 24 09:49:58 x3650a attrd: [16121]: info: attrd_perform_update: Sent update 45: diskcheck_status_internal=ERROR | |
3265 | + ''' | |
3266 | + def detect_attr_updated(self, outputobj, logelm, lconvfrm): | |
3267 | + try: | |
3268 | + # attribute name can has empty char. | |
3269 | + funcname_endpos = logelm.halogmsg.index(':') | |
3270 | + callid_endpos = logelm.halogmsg.index(':', (funcname_endpos + 1)) | |
3271 | + attr_and_val = \ | |
3272 | + logelm.halogmsg[(callid_endpos + 1):].strip().split('=') | |
3273 | + attrname = attr_and_val[0] | |
3274 | + attrval = attr_and_val[1] | |
3275 | + except: | |
3276 | + return CONV_PARSE_ERROR | |
3277 | + if self.is_empty(attrname, attrval): | |
3278 | + return CONV_ITEM_EMPTY | |
3279 | + | |
3280 | + convertedlog = ("Attribute \"%s\" is updated to \"%s\"." % | |
3281 | + (attrname, attrval)) | |
3282 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
3283 | + return CONV_OK | |
3284 | + | |
3285 | + ''' | |
3286 | + Convert log message which means attribute value on own node deleted. | |
3287 | + | |
3288 | + MsgNo.22-2) | |
3289 | + Jul 15 13:09:34 x3650a attrd: [17459]: info: attrd_perform_update: Sent delete 68: node=410de9dc-4458-4c0f-9d06-e7c8c2f0593e, attr=diskcheck_status, id=<n/a>, set=(null), section=status | |
3290 | + ''' | |
3291 | + def detect_attr_deleted(self, outputobj, logelm, lconvfrm): | |
3292 | + try: | |
3293 | + attrname = logelm.halogmsg.split(',')[1].strip().split("=")[1] | |
3294 | + except: | |
3295 | + return CONV_PARSE_ERROR | |
3296 | + if self.is_empty(attrname): | |
3297 | + return CONV_ITEM_EMPTY | |
3298 | + | |
3299 | + convertedlog = ("Attribute \"%s\" is deleted." % attrname) | |
3300 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
3301 | + return CONV_OK | |
3302 | + | |
3303 | + ########## | |
3304 | + # For Heartbeat service starts. | |
3305 | + ########## | |
3306 | + ''' | |
3307 | + Heartbeat log message which means Heartbeat service is starting. | |
3308 | + | |
3309 | + MsgNo.23-1) | |
3310 | + Jul 15 15:50:31 x3650a heartbeat: [22780]: info: Configuration validated. Starting heartbeat 3.0.3 | |
3311 | + ''' | |
3312 | + def detect_hb_start(self, outputobj, logelm, lconvfrm): | |
3313 | + try: | |
3314 | + wordlist = logelm.halogmsg.split() | |
3315 | + version = wordlist[-1] | |
3316 | + except: | |
3317 | + return CONV_PARSE_ERROR | |
3318 | + if self.is_empty(version): | |
3319 | + return CONV_ITEM_EMPTY | |
3320 | + | |
3321 | + convertedlog = ("Starting Heartbeat %s." % (version)) | |
3322 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
3323 | + return CONV_OK | |
3324 | + | |
3325 | + ''' | |
3326 | + Detect localhost status is set to up. | |
3327 | + Then clear all status (exclude ino, and offset). | |
3328 | + The message which is detected by detect_hb_start() appears when | |
3329 | + service Heartbeat start on the node which Heartbeat is already running, | |
3330 | + too. | |
3331 | + So, detect the following message to clear all status. | |
3332 | + | |
3333 | + MsgNo.23-3) | |
3334 | + Jul 15 11:12:13 x3650a heartbeat: [17442]: info: Local status now set to: 'up' | |
3335 | + ''' | |
3336 | + def detect_localstat_up(self, outputobj, logelm, lconvfrm): | |
3337 | + self.clear_status() | |
3338 | + return CONV_OK | |
3339 | + | |
3340 | + ########## | |
3341 | + # For pengine and tengine event. | |
3342 | + ########## | |
3343 | + ''' | |
3344 | + Convert log message which means pengine start. | |
3345 | + | |
3346 | + MsgNo.29-1) | |
3347 | + Aug 09 14:48:25 x3650a crmd: [5766]: info: start_subsystem: Starting sub-system "pengine" | |
3348 | + | |
3349 | + | |
3350 | + "crmd[2465]: 2009/06/08_17:36:36 info: start_subsystem: | |
3351 | + Starting sub-system "tengine"" | |
3352 | + ''' | |
3353 | + def crmd_subsystem_start(self, outputobj, logelm, lconvfrm): | |
3354 | + try: | |
3355 | + sysname = logelm.halogmsg.split()[-1].strip('"') | |
3356 | + except: | |
3357 | + return CONV_PARSE_ERROR | |
3358 | + if self.is_empty(sysname): | |
3359 | + return CONV_ITEM_EMPTY | |
3360 | + | |
3361 | + convertedlog = ("Start \"%s\" process." % (sysname)) | |
3362 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
3363 | + return CONV_OK | |
3364 | + | |
3365 | + ''' | |
3366 | + Convert log message which means pengine exits. | |
3367 | + | |
3368 | + MsgNo.29-2) | |
3369 | + Jul 20 15:48:33 x3650a crmd: [28373]: info: crmdManagedChildDied: Process pengine:[28390] exited (signal=0, exitcode=0) | |
3370 | + ''' | |
3371 | + def crmd_subsystem_exit(self, outputobj, logelm, lconvfrm): | |
3372 | + try: | |
3373 | + wordList = logelm.halogmsg.split() | |
3374 | + sys_and_pid = wordList[2].split(':') | |
3375 | + sysname = sys_and_pid[0] | |
3376 | + pid = sys_and_pid[1].lstrip('[').rstrip(']') | |
3377 | + except: | |
3378 | + return CONV_PARSE_ERROR | |
3379 | + if self.is_empty(sysname, pid): | |
3380 | + return CONV_ITEM_EMPTY | |
3381 | + | |
3382 | + convertedlog = ("Stop \"%s\" process normally. (pid=%s)" % (sysname, pid)) | |
3383 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
3384 | + return CONV_OK | |
3385 | + | |
3386 | + ''' | |
3387 | + Convert log message which means pengine killed by signal. | |
3388 | + | |
3389 | + MsgNo.29-3) | |
3390 | + Jul 20 15:48:33 x3650a crmd: [28373]: info: crmdManagedChildDied: Process pengine:[28390] exited (signal=9, exitcode=0) | |
3391 | + ''' | |
3392 | + def crmd_subsystem_kill(self, outputobj, logelm, lconvfrm): | |
3393 | + try: | |
3394 | + wordList = logelm.halogmsg.split() | |
3395 | + sys_and_pid = wordList[2].split(':') | |
3396 | + sysname = sys_and_pid[0] | |
3397 | + pid = sys_and_pid[1].lstrip('[').rstrip(']') | |
3398 | + signum = wordList[4].split('=')[1].rstrip(',') | |
3399 | + except: | |
3400 | + return CONV_PARSE_ERROR | |
3401 | + if self.is_empty(sysname, pid, signum): | |
3402 | + return CONV_ITEM_EMPTY | |
3403 | + | |
3404 | + convertedlog = ("Managed \"%s\" process terminated with signal %s. (pid=%s)" % (sysname, signum, pid)) | |
3405 | + outputobj.output_log(lconvfrm.loglevel, convertedlog) | |
3406 | + return CONV_OK | |
3407 | + | |
3408 | + ########## | |
3409 | + # Others. | |
3410 | + ########## | |
3411 | + ''' | |
3412 | + Detect a request for getting DC node name and DC status. | |
3413 | + For auto reset function. | |
3414 | + | |
3415 | + MsgNo.27-1) | |
3416 | + Jan 6 19:55:28 x3650a crmd: [28183]: info: handle_request: Current ping state: S_IDLE | |
3417 | + ''' | |
3418 | + def detect_dcstat_req(self, outputobj, logelm, lconvfrm): | |
3419 | + return CONV_OK | |
3420 | + | |
3421 | +if __name__ == "__main__": | |
3422 | + pm_log = LogconvLog(LogconvLog.LOG_INFO, None) | |
3423 | + sys.exit(LogConvert().main()) |
@@ -0,0 +1,93 @@ | ||
1 | +######################################## | |
2 | +# Derived definitions | |
3 | +######################################## | |
4 | +%define name pm_logconv | |
5 | +%define cluster hb | |
6 | +%define version 1.0 | |
7 | +%define release 1.el5 | |
8 | +%define prefix /usr | |
9 | +%define instdir pm_logconv | |
10 | +%define ORGARCH %{name}-%{version} | |
11 | +# | |
12 | +# | |
13 | +Summary: Pacemaker and Heartbeat log converter | |
14 | +Name: %{name}-%{cluster} | |
15 | +Version: %{version} | |
16 | +Release: %{release} | |
17 | +Group: Applications | |
18 | +Source: %{name}-%{version}.tar.gz | |
19 | +License: GPL | |
20 | +Vendor: NIPPON TELEGRAPH AND TELEPHONE CORPORATION | |
21 | +BuildRoot: %{_tmppath}/%{name}-%{version} | |
22 | +BuildRequires: make | |
23 | +BuildArch: noarch | |
24 | +Requires: python >= 2.4, python < 3.0 | |
25 | +Requires: pacemaker >= 1.0.9 | |
26 | +Requires: heartbeat >= 3.0.3 | |
27 | + | |
28 | +######################################## | |
29 | +%description | |
30 | +Log message converter for Pacemaker and Heartbeat. | |
31 | +support version | |
32 | + Pacemaker : stable-1.0 (1.0.9 or more) | |
33 | + Heartbeat : 3.0.3 | |
34 | + | |
35 | +######################################## | |
36 | +%prep | |
37 | +######################################## | |
38 | +rm -rf $RPM_BUILD_ROOT | |
39 | + | |
40 | +######################################## | |
41 | +%setup -q | |
42 | +######################################## | |
43 | + | |
44 | +######################################## | |
45 | +%build | |
46 | +######################################## | |
47 | + | |
48 | +######################################## | |
49 | +%configure | |
50 | +######################################## | |
51 | + | |
52 | +######################################## | |
53 | +%pre | |
54 | +######################################## | |
55 | + | |
56 | +######################################## | |
57 | +%install | |
58 | +######################################## | |
59 | +make DESTDIR=$RPM_BUILD_ROOT install | |
60 | + | |
61 | +######################################## | |
62 | +%clean | |
63 | +######################################## | |
64 | +if | |
65 | + [ -n "${RPM_BUILD_ROOT}" -a "${RPM_BUILD_ROOT}" != "/" ] | |
66 | +then | |
67 | + rm -rf $RPM_BUILD_ROOT | |
68 | +fi | |
69 | +rm -rf $RPM_BUILD_DIR/%{ORGARCH} | |
70 | + | |
71 | +######################################## | |
72 | +%post | |
73 | +######################################## | |
74 | +true | |
75 | +######################################## | |
76 | +%preun | |
77 | +######################################## | |
78 | +true | |
79 | +######################################## | |
80 | +%postun | |
81 | +######################################## | |
82 | +true | |
83 | + | |
84 | +######################################## | |
85 | +%files | |
86 | +######################################## | |
87 | +%defattr(-,root,root) | |
88 | +%dir /etc | |
89 | +%config /etc/pm_logconv.conf | |
90 | +%dir %{prefix}/share/pacemaker/%{instdir} | |
91 | +%{prefix}/share/pacemaker/%{instdir}/pm_logconv.py | |
92 | +%ghost %{prefix}/share/pacemaker/%{instdir}/pm_logconv.pyc | |
93 | +%ghost %{prefix}/share/pacemaker/%{instdir}/pm_logconv.pyo |