• R/O
  • SSH

pm_logconv-cs: Commit

Pacemaker 対応ログメッセージ変換機能。

Heartbeat-2.1.4 用 hb-logconv(*) のPacemaker 1.0.x + Corosync スタック対応版。
(*) http://sourceforge.jp/projects/linux-ha/releases/?package_id=10282


Commit MetaInfo

Revisiona17af55091b7f5e27bef59d98602a791b2d4003c (tree)
Time2013-06-03 17:15:04
AuthorYoshihiko SATO <satoyoshi@inte...>
CommiterYoshihiko SATO

Log Message

STONITH関連メッセージを精査

Change Summary

Incremental Difference

diff -r 9408666e7b2a -r a17af55091b7 pm_logconv.conf
--- a/pm_logconv.conf Wed May 29 17:29:23 2013 +0900
+++ b/pm_logconv.conf Mon Jun 03 17:15:04 2013 +0900
@@ -244,6 +244,12 @@
244244 func=fence_op_ended
245245 loglevel=error
246246
247+#MsgNo.21-4
248+[fence too many failures]
249+pattern=crmd,notice:,too_many_st_failures:,Too many failures to fence
250+func=fence_too_many_failures
251+loglevel=error
252+
247253 #MsgNo.21-5
248254 [executing stonith device start]
249255 pattern=stonith-ng,info:,call_remote_stonith:,Requesting that, perform op
@@ -251,22 +257,16 @@
251257
252258 #MsgNo.21-6
253259 [executing stonith device succeeded]
254-pattern=stonith-ng,notice:,log_operation:,Operation,for host,with device,returned: 0
260+pattern=stonith-ng,notice:,process_remote_stonith_exec:,Call to,on behalf of,OK
255261 func=exec_st_device_ended
256262 loglevel=info
257263
258264 #MsgNo.21-7
259265 [executing stonith device failed]
260-pattern=stonith-ng,error:,log_operation:,Operation,for host,with device,!returned: 0
266+pattern=stonith-ng,notice:,process_remote_stonith_exec:,Call to,on behalf of,!OK
261267 func=exec_st_device_ended
262268 loglevel=warning
263269
264-#MsgNo.21-8
265-[fence too many failures]
266-pattern=crmd,notice:,too_many_st_failures:,Too many failures to fence
267-func=fence_too_many_failures
268-loglevel=error
269-
270270 ###
271271 # For attribute event.
272272 ###
diff -r 9408666e7b2a -r a17af55091b7 pm_logconv.py
--- a/pm_logconv.py Wed May 29 17:29:23 2013 +0900
+++ b/pm_logconv.py Mon Jun 03 17:15:04 2013 +0900
@@ -3043,20 +3043,23 @@
30433043 Convert log message which means fence operation ended.
30443044
30453045 MsgNo.21-2)
3046- May 22 11:41:23 node01 crmd[798]: notice: tengine_stonith_notify:
3047- Peer node02 was terminated (reboot) by node01 for node01: OK
3048- (ref=2514dc6b-3497-41ea-8ade-60ccefb84123) by client crmd.798
3046+ Jan 1 00:00:00 node01 crmd[777]: notice: tengine_stonith_notify:
3047+ Peer node02 was terminated (reboot) by node03 for node01: OK
3048+ (ref=00000000-0000-0000-0000-000000000000) by client crmd.777
3049+ Jan 1 00:00:00 node01 crmd[777]: notice: tengine_stonith_notify:
3050+ Peer node02 was terminated (off) by a human for node01: OK
3051+ (ref=00000000-0000-0000-0000-000000000000) by client stonith_admin.888
30493052 MsgNo.21-3)
3050- May 21 14:00:28 node01 crmd[657]: notice: tengine_stonith_notify:
3051- Peer node02 was not terminated (reboot) by node01 for node01: Timer expired
3052- (ref=ff1ce466-ffd4-4415-a71d-063f5df1df8c) by client crmd.657
3053+ Jan 1 00:00:00 node01 crmd[777]: notice: tengine_stonith_notify:
3054+ Peer node02 was not terminated (reboot) by node03 for node01: Timer expired
3055+ (ref=00000000-0000-0000-0000-000000000000) by client crmd.777
30533056 '''
30543057 def fence_op_ended(self, outputobj, logelm, lconvfrm):
30553058 try:
30563059 wordlist = logelm.halogmsg.split()
30573060 tgt_node = wordlist[wordlist.index("Peer") + 1]
30583061 op = self.trimmark(wordlist[wordlist.index("terminated") + 1])
3059- by_node = wordlist[wordlist.index("by") + 1]
3062+ by_node = " ".join(wordlist[wordlist.index("by") + 1 : wordlist.index("for")])
30603063 for_node = self.trimmark(wordlist[wordlist.index("for") + 1])
30613064 result = wordlist[wordlist.index("for") + 2]
30623065 except:
@@ -3074,71 +3077,11 @@
30743077 return CONV_OK
30753078
30763079 '''
3077- Convert log message which means executing stonith device started.
3078-
3079- MsgNo.21-5)
3080- Mar 29 15:21:12 pm01 stonith-ng[31958]: info: call_remote_stonith:
3081- Requesting that pm01 perform op reboot pm02 with prmStonith1 for crmd.31962 (72s)
3082- '''
3083- def exec_st_device_started(self, outputobj, logelm, lconvfrm):
3084- try:
3085- wordlist = logelm.halogmsg.split()
3086- op = wordlist[6]
3087- target = wordlist[7]
3088- if wordlist[8] == "with":
3089- rsc = " " + wordlist[9]
3090- else:
3091- rsc = ""
3092-
3093- except:
3094- return CONV_PARSE_ERROR
3095- if self.is_empty(op, target):
3096- return CONV_ITEM_EMPTY
3097-
3098- convertedlog = ("Try to execute STONITH device%s for %s %s." % (rsc, op, target))
3099- outputobj.output_log(lconvfrm.loglevel, convertedlog)
3100- return CONV_OK
3101-
3102- '''
3103- Convert log message which means executing stonith device ended.
3104-
3105- MsgNo.21-6)
3106- May 22 11:41:23 node01 stonith-ng[794]: notice: log_operation:
3107- Operation 'reboot' [465] (call 2 from crmd.798) for host 'node02'
3108- with device 'prmStonith2' returned: 0 (OK)
3109- MsgNo.21-7)
3110- May 21 13:59:57 node01 stonith-ng[653]: error: log_operation:
3111- Operation 'reboot' [994] (call 2 from crmd.657) for host 'node02'
3112- with device 'prmStonith1' returned: -201 (Generic Pacemaker error)
3113- May 22 11:53:15 node01 stonith-ng[794]: error: log_operation:
3114- Operation 'reboot' [737] (call 3 from crmd.798) for host 'node02'
3115- with device 'prmStonith1' returned: -62 (Timer expired)
3116- '''
3117- def exec_st_device_ended(self, outputobj, logelm, lconvfrm):
3118- try:
3119- wordlist = logelm.halogmsg.split()
3120- op = self.trimmark(wordlist[wordlist.index("Operation") + 1])
3121- target = self.trimmark(wordlist[wordlist.index("host") + 1])
3122- rsc = self.trimmark(wordlist[wordlist.index("device") + 1])
3123- rtn = self.trimmark(wordlist[wordlist.index("returned:") + 1])
3124- except:
3125- return CONV_PARSE_ERROR
3126- if self.is_empty(op, target, rsc, rtn):
3127- return CONV_ITEM_EMPTY
3128-
3129- if rtn == "0":
3130- convertedlog = ("Succeeded to execute STONITH device %s for %s %s." % (rsc, op, target))
3131- else:
3132- convertedlog = ("Failed to execute STONITH device %s for %s %s." % (rsc, op, target))
3133-
3134- outputobj.output_log(lconvfrm.loglevel, convertedlog)
3135- return CONV_OK
3136-
3137- '''
31383080 Convert log message which means too many failuers to stonith.
31393081
3140- MsgNo.21-8)
3141- May 21 14:06:10 node01 crmd[657]: notice: too_many_st_failures: Too many failures to fence node02 (11), giving up
3082+ MsgNo.21-4)
3083+ Jan 1 00:00:00 node01 crmd[777]: notice: too_many_st_failures:
3084+ Too many failures to fence node02 (11), giving up
31423085 '''
31433086 def fence_too_many_failures(self, outputobj, logelm, lconvfrm):
31443087 try:
@@ -3154,6 +3097,62 @@
31543097 outputobj.output_log(lconvfrm.loglevel, convertedlog)
31553098 return CONV_OK
31563099
3100+ '''
3101+ Convert log message which means executing stonith device started.
3102+
3103+ MsgNo.21-5)
3104+ Jan 1 00:00:00 node01 stonith-ng[777]: info: call_remote_stonith:
3105+ Requesting that node03 perform op reboot node02 with prmStonith1 for crmd.888 (72s)
3106+ '''
3107+ def exec_st_device_started(self, outputobj, logelm, lconvfrm):
3108+ try:
3109+ wordlist = logelm.halogmsg.split()
3110+ by_node = wordlist[3]
3111+ op = wordlist[6]
3112+ for_node = wordlist[7]
3113+ if wordlist[8] == "with":
3114+ rsc = " " + wordlist[9]
3115+ else:
3116+ rsc = ""
3117+
3118+ except:
3119+ return CONV_PARSE_ERROR
3120+ if self.is_empty(by_node, op, for_node):
3121+ return CONV_ITEM_EMPTY
3122+
3123+ convertedlog = ("Try to execute STONITH device%s on %s for %s %s." % (rsc, by_node, op, for_node))
3124+ outputobj.output_log(lconvfrm.loglevel, convertedlog)
3125+ return CONV_OK
3126+
3127+ '''
3128+ Convert log message which means executing stonith device ended.
3129+
3130+ MsgNo.21-6)
3131+ Jan 1 00:00:00 node01 stonith-ng[777]: notice: process_remote_stonith_exec:
3132+ Call to prmStonith1 for node02 on behalf of crmd.888@node01: OK (0)
3133+ MsgNo.21-7)
3134+ Jan 1 00:00:00 node01 stonith-ng[777]: notice: process_remote_stonith_exec:
3135+ Call to prmStonith1 for node02 on behalf of crmd.888@node01: Generic Pacemaker error (-201)
3136+ '''
3137+ def exec_st_device_ended(self, outputobj, logelm, lconvfrm):
3138+ try:
3139+ wordlist = logelm.halogmsg.split()
3140+ target = self.trimmark(wordlist[wordlist.index("for") + 1])
3141+ rsc = self.trimmark(wordlist[wordlist.index("to") + 1])
3142+ rtn = self.trimmark(wordlist[-1])
3143+ except:
3144+ return CONV_PARSE_ERROR
3145+ if self.is_empty(target, rsc, rtn):
3146+ return CONV_ITEM_EMPTY
3147+
3148+ if rtn == "0":
3149+ convertedlog = ("Succeeded to execute STONITH device %s for %s." % (rsc, target))
3150+ else:
3151+ convertedlog = ("Failed to execute STONITH device %s for %s." % (rsc, target))
3152+
3153+ outputobj.output_log(lconvfrm.loglevel, convertedlog)
3154+ return CONV_OK
3155+
31573156 ##########
31583157 # For attribute event.
31593158 ##########
Show on old repository browser