[Spread-cvs] commit: r400 - in trunk: daemon docs

jonathan at spread.org jonathan at spread.org
Sat Oct 11 11:01:29 EDT 2008


Author: jonathan
Date: 2008-10-11 11:01:29 -0400 (Sat, 11 Oct 2008)
New Revision: 400

Modified:
   trunk/daemon/Changelog
   trunk/daemon/config_gram.l
   trunk/daemon/config_parse.y
   trunk/daemon/configuration.c
   trunk/daemon/configuration.h
   trunk/daemon/protocol.c
   trunk/daemon/spread_params.h
   trunk/docs/sample.spread.conf
Log:
Trigger membership change if 
token sequence value gets close to wrapping around at 2^32 by
dropping token until membership change occurs. The change will
trigger when sequence reaches MAX_WRAP_SEQUENCE_VALUE defined
in spread_params.h.

Add spread.conf parameter to debug sequence wrap by initializing
sequence to a value just under the wrap. Option DebugInitialSequence
can be enabled in spread.conf to trigger this. You never want
to enable this on production daemons. 

Bug with hung daemons when sequence numbers were not reset was reported
by several people including Nico Meyer. 


Modified: trunk/daemon/Changelog
===================================================================
--- trunk/daemon/Changelog	2008-10-09 06:39:34 UTC (rev 399)
+++ trunk/daemon/Changelog	2008-10-11 15:01:29 UTC (rev 400)
@@ -1,3 +1,19 @@
+Sat Oct 11 10:52:09 2008  Jonathan Stanton  <jonathan at spreadconcepts.com>
+
+	* spread_params.h, protocol.c (Prot_handle_token): Trigger membership change if 
+	token sequence value gets close to wrapping around at 2^32 by
+	dropping token until membership change occurs. The change will
+	trigger when sequence reaches MAX_WRAP_SEQUENCE_VALUE defined
+	in spread_params.h. Bug with hung daemons when sequence numbers 
+	were not reset was reported by several people on spread-users 
+	including Nico Meyer.
+
+	* config_parse.y, config_gram.l, configuration.c, configuration.h: 
+	Add spread.conf parameter to debug sequence wrap by initializing
+	sequence to a value just under the wrap. Option DebugInitialSequence
+	can be enabled in spread.conf to trigger this. You never want
+	to enable this on production daemons. 
+
 Thu Oct  9 02:33:50 2008  Jonathan Stanton  <jonathan at spreadconcepts.com>
 
 	* stdutil.c (stdhcode_sfh_get16bits): Fix broken compile because 

Modified: trunk/daemon/config_gram.l
===================================================================
--- trunk/daemon/config_gram.l	2008-10-09 06:39:34 UTC (rev 399)
+++ trunk/daemon/config_gram.l	2008-10-11 15:01:29 UTC (rev 400)
@@ -84,6 +84,7 @@
 DebugFlags                      { return DEBUGFLAGS; }
 DangerousMonitor                { return DANGEROUSMONITOR; }
 SocketPortReuse                 { return SOCKETPORTREUSE; }
+DebugInitialSequence            { return DEBUGINITIALSEQUENCE; }
 RuntimeDir                      { return RUNTIMEDIR; }
 DaemonUser                      { return SPUSER; }
 DaemonGroup                     { return SPGROUP; }

Modified: trunk/daemon/config_parse.y
===================================================================
--- trunk/daemon/config_parse.y	2008-10-09 06:39:34 UTC (rev 399)
+++ trunk/daemon/config_parse.y	2008-10-11 15:01:29 UTC (rev 400)
@@ -319,6 +319,7 @@
 %token DDEBUG DEXIT DPRINT DDATA_LINK DNETWORK DPROTOCOL DSESSION
 %token DCONF DMEMB DFLOW_CONTROL DSTATUS DEVENTS DGROUPS DMEMORY
 %token DSKIPLIST DACM DSECURITY DALL DNONE
+%token DEBUGINITIALSEQUENCE
 %token DANGEROUSMONITOR SOCKETPORTREUSE RUNTIMEDIR SPUSER SPGROUP ALLOWEDAUTHMETHODS REQUIREDAUTHMETHODS ACCESSCONTROLPOLICY
 %token SP_BOOL SP_TRIVAL LINKPROTOCOL PHOP PTCPHOP
 %token IMONITOR ICLIENT IDAEMON
@@ -441,6 +442,10 @@
                         {
                             Conf_set_runtime_dir($3.string);
                         }
+                |       DEBUGINITIALSEQUENCE
+                        {
+                            Conf_set_debug_initial_sequence();
+                        }
                 |       SPUSER EQUALS STRING
                         {
                             Conf_set_user($3.string);

Modified: trunk/daemon/configuration.c
===================================================================
--- trunk/daemon/configuration.c	2008-10-09 06:39:34 UTC (rev 399)
+++ trunk/daemon/configuration.c	2008-10-11 15:01:29 UTC (rev 400)
@@ -102,6 +102,8 @@
 
 static  int     Link_Protocol;
 
+static  bool    Conf_Debug_Initial_Sequence = FALSE;
+
 static  bool    Conf_Reload_State = FALSE;
 static  configuration *Config_Previous;
 static  proc    *Config_Previous_Procs;
@@ -174,6 +176,15 @@
         Conf_Reload_State = FALSE;
 }
 
+void    Conf_set_debug_initial_sequence(void)
+{
+        Conf_Debug_Initial_Sequence = TRUE;
+}
+bool    Conf_debug_initial_sequence(void)
+{
+        return( Conf_Debug_Initial_Sequence );
+}
+
 /* Basic algorithm:
  * 1) copy Config to oldConfig
  * 2) load new spread.conf file into Config

Modified: trunk/daemon/configuration.h
===================================================================
--- trunk/daemon/configuration.h	2008-10-09 06:39:34 UTC (rev 399)
+++ trunk/daemon/configuration.h	2008-10-11 15:01:29 UTC (rev 400)
@@ -39,6 +39,9 @@
 #include "arch.h"
 #include "spread_params.h"
 
+/* Part of sequence number debugging */
+#define INITIAL_SEQUENCE_NEAR_WRAP      ( MAX_WRAP_SEQUENCE_VALUE - 5 )
+
 /* For what spread services should listen on what interfaces */
 
 #define IFTYPE_MONITOR  0x1
@@ -111,6 +114,9 @@
 void            Conf_reload_state_end(void);
 bool            Conf_reload_initiate(void);
 
+void            Conf_set_debug_initial_sequence(void);
+bool            Conf_debug_initial_sequence(void);
+
 bool            Conf_get_dangerous_monitor_state(void);
 void            Conf_set_dangerous_monitor_state(bool new_state);
 port_reuse      Conf_get_port_reuse_type(void);

Modified: trunk/daemon/protocol.c
===================================================================
--- trunk/daemon/protocol.c	2008-10-09 06:39:34 UTC (rev 399)
+++ trunk/daemon/protocol.c	2008-10-11 15:01:29 UTC (rev 400)
@@ -112,13 +112,23 @@
 	for( i=0; i < MAX_PACKETS_IN_STRUCT; i++ )
 		Packets[i].exist = 0;
 
-	Highest_seq 	 = 0;
-	Highest_fifo_seq = 0;
-	My_aru	    	 = 0;
-	Aru		 = 0;
-	Set_aru		 = -1;
-	Last_discarded	 = 0;
-	Last_delivered	 = 0;
+        if ( Conf_debug_initial_sequence() ) {
+            Highest_seq 	 = INITIAL_SEQUENCE_NEAR_WRAP;
+            Highest_fifo_seq     = INITIAL_SEQUENCE_NEAR_WRAP;
+            My_aru	    	 = INITIAL_SEQUENCE_NEAR_WRAP;
+            Aru		         = INITIAL_SEQUENCE_NEAR_WRAP;
+            Set_aru		 = INITIAL_SEQUENCE_NEAR_WRAP -1;
+            Last_discarded	 = INITIAL_SEQUENCE_NEAR_WRAP;
+            Last_delivered	 = INITIAL_SEQUENCE_NEAR_WRAP;
+        } else {
+            Highest_seq 	 = 0;
+            Highest_fifo_seq     = 0;
+            My_aru	    	 = 0;
+            Aru		         = 0;
+            Set_aru		 = -1;
+            Last_discarded	 = 0;
+            Last_delivered	 = 0;
+        }
 
 	New_pack.num_elements = 2;
 	New_pack.elements[0].len = sizeof(packet_header);
@@ -468,6 +478,14 @@
 	if( !Same_endian( Token->type ) ) 
 		Flip_token_body( New_token.elements[1].buf, Token );
 
+        /* Deal with wrapping seq values (2^32) by triggering a membership by dropping the token */
+        if( (Memb_state() != EVS ) && (Token->seq > MAX_WRAP_SEQUENCE_VALUE ) )
+        {
+            Alarm( PROTOCOL, "Prot_handle_token: Token Sequence number (%ld) approaching 2^31 so trigger membership to reset it.\n", Token->seq);
+
+            return;
+        }
+
 	if( Conf_leader( Memb_active_ptr() ) == My.id )
 	{
 		if( Get_arq(Token->type) != Get_arq(Last_token->type) )
@@ -1224,17 +1242,29 @@
                 E_queue( Memb_lookup_new_members, 0, NULL, Zero_timeout);
                 Conf_reload_state_end();
         }
+
 	/* set variables for next membership */
-	Last_token->aru	 = 0;
-	Highest_seq 	 = 0;
-	Highest_fifo_seq = 0;
-	My_aru	    	 = 0;
-	Aru		 = 0;
-	Set_aru		 = -1;
-	Last_discarded	 = 0;
-	Last_delivered	 = 0;
+        if ( Conf_debug_initial_sequence() ) {
+            Last_token->aru	 = INITIAL_SEQUENCE_NEAR_WRAP;
+            Highest_seq 	 = INITIAL_SEQUENCE_NEAR_WRAP;
+            Highest_fifo_seq     = INITIAL_SEQUENCE_NEAR_WRAP;
+            My_aru	    	 = INITIAL_SEQUENCE_NEAR_WRAP;
+            Aru		         = INITIAL_SEQUENCE_NEAR_WRAP;
+            Set_aru		 = INITIAL_SEQUENCE_NEAR_WRAP -1;
+            Last_discarded	 = INITIAL_SEQUENCE_NEAR_WRAP;
+            Last_delivered	 = INITIAL_SEQUENCE_NEAR_WRAP;
+        } else {
+            Last_token->aru	 = 0;
+            Highest_seq 	 = 0;
+            Highest_fifo_seq     = 0;
+            My_aru	    	 = 0;
+            Aru		         = 0;
+            Set_aru		 = -1;
+            Last_discarded	 = 0;
+            Last_delivered	 = 0;
+        }
+
 	GlobalStatus.my_aru	 = My_aru;
-
 	Token_counter 	= 0;
 
     }else{

Modified: trunk/daemon/spread_params.h
===================================================================
--- trunk/daemon/spread_params.h	2008-10-09 06:39:34 UTC (rev 399)
+++ trunk/daemon/spread_params.h	2008-10-11 15:01:29 UTC (rev 400)
@@ -87,4 +87,6 @@
 #define		MAX_SESSION_MESSAGES	1000
 #define         MAX_GROUPS_PER_MESSAGE  100     /* Each multicast can't send to more groups then this */
 
+#define         MAX_WRAP_SEQUENCE_VALUE (1<<30) /* Maximum value for token->seq before reseting to zero with membership */
+
 #endif /* INC_SPREAD_PARAMS */

Modified: trunk/docs/sample.spread.conf
===================================================================
--- trunk/docs/sample.spread.conf	2008-10-09 06:39:34 UTC (rev 399)
+++ trunk/docs/sample.spread.conf	2008-10-11 15:01:29 UTC (rev 400)
@@ -76,6 +76,17 @@
 # or
 #EventTimeStamp = "[%a %d %b %Y %H:%M:%S]"
 
+# Set to initialize daemon sequence numbers to a 'large' number for testing
+# this is purely a debugging capability and should never be enabled on
+# production systems (note one side effect of enabling this is that 
+# your system will experience an extra daemon membership every few messages
+# so you REALLY do not want this turned on)
+# If you want to change the initial value the sequence number is set to
+# you need to edit the #define INITIAL_SEQUENCE_NEAR_WRAP at the top
+# of configuration.h
+
+#DebugInitialSequence
+
 #Set whether to allow dangerous monitor commands 
 # like "partition, flow_control, or kill"
 # Default setting is FALSE.




More information about the Spread-cvs mailing list