[Spread-cvs] commit: r660 - branches/spread_4_2_debug/daemon

jschultz at spread.org jschultz at spread.org
Mon Jan 20 19:34:17 EST 2014


Author: jschultz
Date: 2014-01-20 19:34:17 -0500 (Mon, 20 Jan 2014)
New Revision: 660

Modified:
   branches/spread_4_2_debug/daemon/flow_control.c
   branches/spread_4_2_debug/daemon/membership.c
   branches/spread_4_2_debug/daemon/membership.h
   branches/spread_4_2_debug/daemon/network.c
   branches/spread_4_2_debug/daemon/protocol.c
   branches/spread_4_2_debug/daemon/spread_params.h
Log:
Debugging and potential fixes of loss + membership problems


Modified: branches/spread_4_2_debug/daemon/flow_control.c
===================================================================
--- branches/spread_4_2_debug/daemon/flow_control.c	2014-01-19 17:07:44 UTC (rev 659)
+++ branches/spread_4_2_debug/daemon/flow_control.c	2014-01-21 00:34:17 UTC (rev 660)
@@ -45,8 +45,8 @@
 
 void	FC_init( )
 {
-	Window = 300;
-	Personal_window = 50;
+	Window = 100;
+	Personal_window = 20;
 
 	GlobalStatus.window = Window;
 	GlobalStatus.personal_window = Personal_window;

Modified: branches/spread_4_2_debug/daemon/membership.c
===================================================================
--- branches/spread_4_2_debug/daemon/membership.c	2014-01-19 17:07:44 UTC (rev 659)
+++ branches/spread_4_2_debug/daemon/membership.c	2014-01-21 00:34:17 UTC (rev 660)
@@ -79,6 +79,8 @@
 	int16		num_trans;
 } ring_info;
 
+bool                    Memb_Just_Installed = FALSE;  /* tracks if we just installed a reg memb due to last token we sent */
+
 static	configuration	Membership;
 static	membership_id	Membership_id;
 static	configuration	Future_membership;
@@ -835,6 +837,7 @@
     Shift_to_seg();
 
     Token_alive = 0;
+    Memb_Just_Installed = FALSE;
     E_dequeue( Memb_token_loss,    0, NULL );
     E_dequeue( Send_join,	   0, NULL );
     E_dequeue( Form_or_fail,	   0, NULL );
@@ -1364,7 +1367,7 @@
 	rg_info->aru		= Aru;
 	rg_info->highest_seq	= Highest_seq;
 
-	/*Alarmp(SPLOG_INFO, MEMB, "Create_form1: putting Aru = %d and Highest_Seq = %d on rg_info form1 token\n", Aru, Highest_seq);*/
+	Alarmp(SPLOG_INFO, MEMB, "Create_form1: putting Aru = %d and Highest_Seq = %d on rg_info form1 token\n", Aru, Highest_seq);
 
 	/* update holes */
 	rg_info->num_holes	= 0;
@@ -1422,14 +1425,16 @@
 	if( F_members.num_pending > 0 )
 	{
 		/* send to next member in pending list */
-		Net_ucast_token( F_members.members[F_members.num_members], 
-			   &send_scat );
-		Net_ucast_token( F_members.members[F_members.num_members], 
-			   &send_scat );
+		Net_ucast_token( F_members.members[F_members.num_members], &send_scat );
+		Net_ucast_token( F_members.members[F_members.num_members], &send_scat );
+		/*Net_ucast_token( F_members.members[F_members.num_members], &send_scat );*/
+
 	}else if( F_reps.rep_index < F_reps.num_reps){
 		/* send to next rep */
 		Net_ucast_token( F_reps.reps[F_reps.rep_index].proc_id, &send_scat );
 		Net_ucast_token( F_reps.reps[F_reps.rep_index].proc_id, &send_scat );
+		/*Net_ucast_token( F_reps.reps[F_reps.rep_index].proc_id, &send_scat );*/
+
 	}else{
 		/* singleton membership */
 		F_members.num_pending = 1;
@@ -1439,6 +1444,8 @@
 		form_token.rtr_len = send_scat.elements[1].len +
 			send_scat.elements[2].len + send_scat.elements[3].len;
 		Net_ucast_token( My.id, &send_scat );
+		Net_ucast_token( My.id, &send_scat );
+		/*Net_ucast_token( My.id, &send_scat );*/
 	}
 
 	E_dequeue( Send_join, 0, NULL );
@@ -1652,7 +1659,7 @@
 	new_rg_info->aru	 = Aru;
 	new_rg_info->highest_seq = Highest_seq;
 
-	/*Alarmp(SPLOG_INFO, MEMB, "Fill_form1: Putting Aru = %d and Highest_seq = %d on new_rg_info of form1 token\n", Aru, Highest_seq);*/
+	Alarmp(SPLOG_INFO, MEMB, "Fill_form1: Putting Aru = %d and Highest_seq = %d on new_rg_info of form1 token\n", Aru, Highest_seq);
 
 	if( my_rg_info == NULL )
 	{
@@ -1725,14 +1732,14 @@
 
 	    if( my_rg_info->aru         > Aru ) {
 		new_rg_info->aru 	= my_rg_info->aru;
-		/*Alarmp( SPLOG_INFO, MEMB, "my_rg_info->aru (%d) > Aru (%d) -> setting new_rg_info->aru to my_rg_info\n", 
-		  my_rg_info->aru, Aru );*/
+		Alarmp( SPLOG_INFO, MEMB, "my_rg_info->aru (%d) > Aru (%d) -> setting new_rg_info->aru to my_rg_info\n", 
+		  my_rg_info->aru, Aru );
 	    }
 
 	    if( my_rg_info->highest_seq > Highest_seq ) {
 		new_rg_info->highest_seq= my_rg_info->highest_seq;
-		/*Alarmp( SPLOG_INFO, MEMB, "my_rg_info->highest_seq (%d) > Highest_seq (%d) -> setting new_rg_info->highest_seq to my_rg_info\n", 
-		  my_rg_info->highest_seq, Highest_seq );*/
+		Alarmp( SPLOG_INFO, MEMB, "my_rg_info->highest_seq (%d) > Highest_seq (%d) -> setting new_rg_info->highest_seq to my_rg_info\n", 
+		  my_rg_info->highest_seq, Highest_seq );
 	    }
 
 	    for( i=0; i < my_rg_info->num_holes; i++ )
@@ -1824,11 +1831,13 @@
 		/* send to next member in pending list */
 		Net_ucast_token( m_info->members[m_info->num_members], &send_scat );
 		Net_ucast_token( m_info->members[m_info->num_members], &send_scat );
+		/*Net_ucast_token( m_info->members[m_info->num_members], &send_scat );*/
 
 	}else if( r_info->rep_index < r_info->num_reps){
 		/* send to next rep */
 		Net_ucast_token( r_info->reps[r_info->rep_index].proc_id, &send_scat );
 		Net_ucast_token( r_info->reps[r_info->rep_index].proc_id, &send_scat );
+		/*Net_ucast_token( r_info->reps[r_info->rep_index].proc_id, &send_scat );*/
 
 	}else{
 		/* prepare form2 token */
@@ -1842,6 +1851,7 @@
 				send_scat.elements[2].len + send_scat.elements[3].len;
 		Net_ucast_token( m_info->members[0], &send_scat );
 		Net_ucast_token( m_info->members[0], &send_scat );
+		/*Net_ucast_token( m_info->members[0], &send_scat );*/
 	}
 
 	E_dequeue( Send_join, 0, NULL );
@@ -1971,8 +1981,8 @@
                       *num_rings, num_bytes, Membership_id.proc_id, Membership_id.time); 
         }
 
-	/*Alarmp( SPLOG_INFO, MEMB, "Read_form2: updating Highest_seq %d -> %d; Aru %d -> %d\n", 
-	  Highest_seq, my_rg_info->highest_seq, Aru, my_rg_info->aru );*/
+	Alarmp( SPLOG_INFO, MEMB, "Read_form2: updating Highest_seq %d -> %d; Aru %d -> %d\n", 
+	  Highest_seq, my_rg_info->highest_seq, Aru, my_rg_info->aru );
 
 	Highest_seq = my_rg_info->highest_seq;
 	Aru	    = my_rg_info->aru;
@@ -2030,6 +2040,7 @@
 	{
 		Net_send_token( &send_scat );
 		Net_send_token( &send_scat );
+		/*Net_send_token( &send_scat );*/
 		Token_rounds = 0;
 
 	}else{
@@ -2038,12 +2049,14 @@
 
 		form_token->type = 0;
 		form_token->seq = 0;
-		form_token->aru = Last_seq;  /* TODO: is this right? if so, shouldn't we set Last_token this way too below? */
+		form_token->aru = INT32_MAX;  /* leader will lower to his My_aru */
+		form_token->aru_last_id = 0;  /* leader will set to My.id */
 		form_token->flow_control = 0;
 		form_token->rtr_len = 0;
 
 		Net_send_token( &send_scat );
 		Net_send_token( &send_scat );
+		/*Net_send_token( &send_scat );*/
 		Token_rounds = 1;
 	}
 	Token_alive = 1;
@@ -2053,6 +2066,9 @@
 	Last_token->seq  = 0;
 	Last_token->aru  = 0;
 
+	Aru = 0;
+	Alarmp( SPLOG_INFO, MEMB, "Read_form2: updating Aru to %d!\n", Aru );
+
 	State = EVS;
 	GlobalStatus.state = EVS;
 }
@@ -2215,8 +2231,8 @@
 	int	pack_entry;
 	int	i;
 
-	/*Alarmp( SPLOG_INFO, MEMB, "Backoff_membership entered: Last_discarded = %d; Highest_seq = %d; Aru = %d; My_aru = %d\n", 
-	  Last_discarded, Highest_seq, Aru, My_aru );*/
+	Alarmp( SPLOG_INFO, MEMB, "Backoff_membership entered: Last_discarded = %d; Highest_seq = %d; Aru = %d; My_aru = %d\n", 
+	  Last_discarded, Highest_seq, Aru, My_aru );
 
 	pack_entry=-1;
 	for( i=Last_discarded+1; i <= Highest_seq; i++ )
@@ -2240,8 +2256,8 @@
 		My_aru++;
 	}
 
-	/*Alarmp( SPLOG_INFO, MEMB, "Backoff_membership leaving: Last_discarded = %d; Highest_seq = %d; Aru = %d; My_aru = %d\n", 
-	  Last_discarded, Highest_seq, Aru, My_aru );*/
+	Alarmp( SPLOG_INFO, MEMB, "Backoff_membership leaving: Last_discarded = %d; Highest_seq = %d; Aru = %d; My_aru = %d\n", 
+	  Last_discarded, Highest_seq, Aru, My_aru );
 }
 
 void	Memb_commit()
@@ -2310,6 +2326,7 @@
 
 	Alarm( MEMB, "Memb_regular\n");
 	Transitional = 0;
+	Memb_Just_Installed = TRUE;
 
 	GlobalStatus.membership_changes++;
 
@@ -2332,6 +2349,7 @@
 	Foreign_found = 0;
 	if( Conf_leader( &Membership ) == My.id )
 		E_queue( Memb_lookup_new_members, 0, NULL, Lookup_timeout );
+
 	printf("Membership id is ( %d, %d)\n", Membership_id.proc_id, Membership_id.time );
 	printf("%c", Conf_print( &Membership ) );
 

Modified: branches/spread_4_2_debug/daemon/membership.h
===================================================================
--- branches/spread_4_2_debug/daemon/membership.h	2014-01-19 17:07:44 UTC (rev 659)
+++ branches/spread_4_2_debug/daemon/membership.h	2014-01-21 00:34:17 UTC (rev 660)
@@ -66,4 +66,6 @@
 void		Memb_regular();
 void	        Memb_print_form_token( sys_scatter *scat );
 
+extern bool     Memb_Just_Installed;
+
 #endif /* INC_MEMBERSHIP */

Modified: branches/spread_4_2_debug/daemon/network.c
===================================================================
--- branches/spread_4_2_debug/daemon/network.c	2014-01-19 17:07:44 UTC (rev 659)
+++ branches/spread_4_2_debug/daemon/network.c	2014-01-21 00:34:17 UTC (rev 660)
@@ -643,11 +643,9 @@
         token_ptr->conf_hash = Cn->hash_code;
 	token_ptr->transmiter_id = My.id;
 
-	Alarmp( SPLOG_INFO, NETWORK, 
-		"Net_send_token: type = 0x%08X; transmitter_id = 0x%08X; seq = %d; proc_id = 0x%08X\n"
-		"\taru = %d; aru_last_id = 0x%08X; Token_address = 0x%08X\n", 
-		token_ptr->type, token_ptr->transmiter_id, token_ptr->seq, token_ptr->proc_id, 
-		token_ptr->aru, token_ptr->aru_last_id, Token_address );
+	Alarmp( SPLOG_INFO, PROTOCOL, 
+		"Net_send_token: type = 0x%08X; transmitter_id = 0x%08X; seq = %d; proc_id = 0x%08X; aru = %d; aru_last_id = 0x%08X; Token_address = 0x%08X\n", 
+		token_ptr->type, token_ptr->transmiter_id, token_ptr->seq, token_ptr->proc_id, token_ptr->aru, token_ptr->aru_last_id, Token_address );
 
         if ( token_ptr->rtr_len > (MAX_PACKET_SIZE - sizeof(token_header) ) )
         {

Modified: branches/spread_4_2_debug/daemon/protocol.c
===================================================================
--- branches/spread_4_2_debug/daemon/protocol.c	2014-01-19 17:07:44 UTC (rev 659)
+++ branches/spread_4_2_debug/daemon/protocol.c	2014-01-21 00:34:17 UTC (rev 660)
@@ -55,7 +55,6 @@
 static	proc		My;
 static	int		My_index;
 
-static	int32		Set_aru;
 static	int		Token_counter;
 
 /* Used ONLY in Prot_handle_bcast, inited in Prot_init */
@@ -73,8 +72,6 @@
 /* Used to indicate a need to reload configuration at end of current membership */
 static  bool            Prot_Need_Conf_Reload  = FALSE;
 
-static  bool            Just_Installed = FALSE;  /* tracks if we just installed a reg memb due to last token we sent */
-
 /* ### Pack: 1 line */
 static	packet_info	Buffered_packets[ARCH_SCATTER_SIZE];
 
@@ -122,7 +119,6 @@
             Highest_fifo_seq     = INITIAL_SEQUENCE_NEAR_WRAP;
             My_aru	    	 = INITIAL_SEQUENCE_NEAR_WRAP;
             Aru		         = INITIAL_SEQUENCE_NEAR_WRAP;
-            Set_aru		 = INITIAL_SEQUENCE_NEAR_WRAP -1;
             Last_discarded	 = INITIAL_SEQUENCE_NEAR_WRAP;
             Last_delivered	 = INITIAL_SEQUENCE_NEAR_WRAP;
         } else {
@@ -130,7 +126,6 @@
             Highest_fifo_seq     = 0;
             My_aru	    	 = 0;
             Aru		         = 0;
-            Set_aru		 = -1;
             Last_discarded	 = 0;
             Last_delivered	 = 0;
         }
@@ -472,7 +467,7 @@
         }
 
         Alarmp( SPLOG_INFO, PROTOCOL, 
-	       "Prot_handle_token: type = 0x%08X; transmitter = 0x%08X; seq = %d; proc_id = 0x%08X; aru = %d; aru_last_id = 0x%08X;\n", 
+	       "\nProt_handle_token: type = 0x%08X; transmitter = 0x%08X; seq = %d; proc_id = 0x%08X; aru = %d; aru_last_id = 0x%08X;\n", 
 	       Token->type, Token->transmiter_id, Token->seq, Token->proc_id, Token->aru, Token->aru_last_id );
 
 	if( ret != sizeof(token_header) + Token->rtr_len )
@@ -502,10 +497,11 @@
 		    /* received double token - swallow it */
 		    goto END; 
 
-		} else if (Memb_state() == OP && Just_Installed) {
-		    Just_Installed  = FALSE;
-		    Last_token->aru = 0;      /* wipe out EVS aru on last token due to transitioning from EVS -> OP */
+		} else if (Memb_Just_Installed) {
+		    Memb_Just_Installed  = FALSE;
 		    /* NOTE: we do this now rather than just when we installed to allow proper token retransmissions */
+		    Last_token->aru = 0;  /* wipe out EVS aru on last token due to transitioning from EVS -> OP */
+		    Alarmp(SPLOG_INFO, PROTOCOL, "Prot_handle_token: leader just installed; setting Last_token->aru = 0!\n");
 		}
 
 	}else{
@@ -535,10 +531,11 @@
 
 		} else {
 
-			if (Memb_state() == OP && Just_Installed) {
-			  Just_Installed  = FALSE;
-			  Last_token->aru = 0;      /* wipe out EVS aru on last token due to transitioning from EVS -> OP */
+			if (Memb_Just_Installed) {
+			  Memb_Just_Installed  = FALSE;
 			  /* NOTE: we do this now rather than just when we installed to allow proper token retransmissions */
+			  Last_token->aru = 0;  /* wipe out EVS aru on last token due to transitioning from EVS -> OP */
+			  Alarmp(SPLOG_INFO, PROTOCOL, "Prot_handle_token: not-leader just installed; setting Last_token->aru = 0!\n");
 			}
 
                         if ( Get_retrans(Token->type) > 0 ) {
@@ -613,21 +610,19 @@
 	Token->rtr_len = new_ptr;
 	New_token.elements[1].len = new_ptr;
 
-	/* Calculating Token->aru and Set_aru */
-	if( ( Token->aru == Set_aru    ) ||
-            ( Token->aru_last_id == My.id ) ||
-	    ( Token->aru == Token->seq && Memb_state() != EVS ) )  /* NOTE: Token->seq is meaningless in EVS */
-	{
-		Token->aru = My_aru;
-                Token->aru_last_id = My.id;
-		if( My_aru < Highest_seq ) Set_aru = My_aru;
-		else Set_aru = -1;
-	}else if( Token->aru > My_aru ) {
-		Token->aru = My_aru;
-                Token->aru_last_id = My.id;
-		Set_aru    = My_aru;
-	}else{
-		Set_aru    = -1;
+	/* Calculating Token->aru and Token->aru_last_id */
+
+	Alarmp( SPLOG_INFO, PROTOCOL, "Prot_handle_token: calculating Token->aru and Token->aru_last_id: Token->aru = %d, My_aru = %d, Token->aru_last_id = 0x%08X, My.id = 0x%08X, Highest_seq = %d, Token->seq = %d, Memb_state() = %d\n",
+		Token->aru, My_aru, Token->aru_last_id, My.id, Highest_seq, Token->seq, Memb_state() );
+
+	if ( Token->aru > My_aru         ||                           /* this daemon may be missing packets: lower aru to My_aru */
+	     Token->aru_last_id == My.id ||                           /* this daemon last updated aru: try raising to My_aru */
+	     /*Token->aru == Highest_seq   ||*/                           /* everyone has everything so far: try raising to My_aru; NOTE: Highest_seq already incorporated Token->seq above */
+	     ( Token->aru == Token->seq && Memb_state() != EVS ) ) {  /* everyone has everything so far: try raising to My_aru; NOTE: Token->seq is meaningless in EVS */
+
+		Token->aru         = My_aru;
+		Token->aru_last_id = My.id;
+		Alarmp( SPLOG_INFO, PROTOCOL, "Prot_handle_token: setting Token->aru = %d, Token->aru_last_id = 0x%08X\n", Token->aru, Token->aru_last_id );
 	}
 	
 	Token->proc_id = My.id;
@@ -669,12 +664,26 @@
 
 	/* calculating Aru */
 	if( Token->aru > Last_token->aru ) {
-	        /*Alarmp( SPLOG_INFO, PROTOCOL, "Prot_handle_token: updating Aru from Last_token; Aru %d -> %d\n", Aru, Last_token->aru );*/
+
+		if ( Last_token->aru >= Aru ) {
+		        Alarmp( SPLOG_INFO, PROTOCOL, "Prot_handle_token: updating Aru from Last_token: Aru %d -> %d; (Token->aru = %d)\n", Aru, Last_token->aru, Token->aru );
+
+		} else {
+			Alarmp( SPLOG_FATAL, PROTOCOL, "Prot_handle_token: illegally lowering Aru from Last_token: Aru %d -> %d; (Token->aru = %d)\n", Aru, Last_token->aru, Token->aru );
+		}
+
 		Aru = Last_token->aru;
 	} else {
-	        /*Alarmp( SPLOG_INFO, PROTOCOL, "Prot_handle_token: updating Aru from Token; Aru %d -> %d\n", Aru, Token->aru );*/
+
+		if ( Token->aru >= Aru ) {
+		        Alarmp( SPLOG_INFO, PROTOCOL, "Prot_handle_token: updating Aru from Token: Aru %d -> %d; (Last_token->aru = %d)\n", Aru, Token->aru, Last_token->aru);
+		} else {
+		        Alarmp( SPLOG_FATAL, PROTOCOL, "Prot_handle_token: illegally lowering Aru from Token: Aru %d -> %d; (Last_token->aru = %d)\n", Aru, Token->aru, Last_token->aru);			
+		}
+
 		Aru = Token->aru;
 	}
+
 	if( Highest_seq == Aru ) Token_counter++;
 	else Token_counter = 0;
 
@@ -720,7 +729,7 @@
 	GlobalStatus.token_rounds = Token_rounds;
 
  END:
-	/*Alarmp( SPLOG_INFO, PROTOCOL, "Prot_handle_token: LEAVING!\n" )*/;
+	Alarmp( SPLOG_INFO, PROTOCOL, "Prot_handle_token: LEAVING!\n\n" );
 }
 
 /* Provide boolean result of whether the membership system needs to initiate a configuration reload
@@ -1280,7 +1289,6 @@
 
 	/* calculate and deliver regular membership */
 	Memb_regular();
-	Just_Installed = TRUE;
 
 	Log_membership();
 	reg_memb_id = Memb_id();
@@ -1307,7 +1315,6 @@
             Highest_fifo_seq     = INITIAL_SEQUENCE_NEAR_WRAP;
             My_aru	    	 = INITIAL_SEQUENCE_NEAR_WRAP;
             Aru		         = INITIAL_SEQUENCE_NEAR_WRAP;
-            Set_aru		 = INITIAL_SEQUENCE_NEAR_WRAP - 1;
             Last_discarded	 = INITIAL_SEQUENCE_NEAR_WRAP;
             Last_delivered	 = INITIAL_SEQUENCE_NEAR_WRAP;
         } else {
@@ -1315,12 +1322,11 @@
             Highest_fifo_seq     = 0;
             My_aru	    	 = 0;
             Aru		         = 0;
-            Set_aru		 = -1;
             Last_discarded	 = 0;
             Last_delivered	 = 0;
         }
 
-	/*Alarmp( SPLOG_INFO, PROTOCOL, "Discard_packets: Updated Aru to %d for next membership\n", Aru );*/
+	Alarmp( SPLOG_INFO, PROTOCOL, "Discard_packets: Updated Aru to %d for next membership\n", Aru );
 
 	GlobalStatus.my_aru	 = My_aru;
 	Token_counter 	= 0;

Modified: branches/spread_4_2_debug/daemon/spread_params.h
===================================================================
--- branches/spread_4_2_debug/daemon/spread_params.h	2014-01-19 17:07:44 UTC (rev 659)
+++ branches/spread_4_2_debug/daemon/spread_params.h	2014-01-21 00:34:17 UTC (rev 660)
@@ -71,7 +71,7 @@
 #define		MAX_PACKETS_IN_STRUCT 	8192
 #define		PACKET_MASK		0x00001fff
 
-#define		MAX_SEQ_GAP		1600	/* used in flow control to limit difference between highest_seq and aru */
+#define		MAX_SEQ_GAP		100	/* used in flow control to limit difference between highest_seq and aru */
 
 #define		MAX_EVS_ROUNDS		500 	/* used in EVS state to limit total # of rounds to complete EVS */
 




More information about the Spread-cvs mailing list