[Spread-cvs] commit: r402 - in trunk: daemon docs
jonathan at spread.org
jonathan at spread.org
Mon Oct 13 17:00:53 EDT 2008
Author: jonathan
Date: 2008-10-13 17:00:53 -0400 (Mon, 13 Oct 2008)
New Revision: 402
Modified:
trunk/daemon/Changelog
trunk/daemon/configuration.c
trunk/daemon/configuration.h
trunk/daemon/membership.c
trunk/daemon/network.c
trunk/daemon/prot_body.h
trunk/daemon/protocol.c
trunk/docs/DynamicConfiguration.txt
Log:
Fix for but where Memb_token_loss() crashes with an assertion failure when a configuration
reload is triggered while another membership change is occuring. This bug was reported on
spread-users by at least Adam Grossman, Clotho Tsang, Adrian Revill and Mike Perik.
The fix involves delaying the execution of the configuraiton reload until after the current
membership change has completed and the daemon has moved back into OP state.
Modified: trunk/daemon/Changelog
===================================================================
--- trunk/daemon/Changelog 2008-10-13 20:19:18 UTC (rev 401)
+++ trunk/daemon/Changelog 2008-10-13 21:00:53 UTC (rev 402)
@@ -1,3 +1,25 @@
+Mon Oct 13 16:22:28 2008 Jonathan Stanton <jonathan at spreadconcepts.com>
+
+ * protocol.c (Prot_initiate_conf_reload, Prot_need_conf_reload):
+ Change Prot_handle_conf_reload() to only start the reload by calling
+ the new function Prot_initiate_conf_reload if in OP membership state.
+ Otherwise a flag is set (Prot_Need_Conf_Reload). The flag is accessed
+ through the Prot_need_conf_reload() function and cleard by the
+ Prot_clear_need_conf_reload() function.
+
+ This fixes bug where if a reload is triggerd by the spmonitor when
+ the daemon is already in a membership change it causes a crash because
+ of the asserts in Memb_token_loss(). Bug was reported by Adam Grossman,
+ Clotho Tsang, Adrian Revill, Mike Perik, and others.
+
+Mon Oct 13 16:21:03 2008 Jonathan Stanton <jonathan at spreadconcepts.com>
+
+ * membership.c (Shift_to_op): Add Shift_to_op function to capture
+ all cases when daemon membership algorithm changes state to OP.
+ This triggers a configuration reload if one has been queued up.
+
+
+
Mon Oct 13 16:16:45 2008 Jonathan Stanton <jonathan at spreadconcepts.com>
* session.c, groups.c (G_shift_to_GOP): Add G_shift_to_GOP function
Modified: trunk/daemon/configuration.c
===================================================================
--- trunk/daemon/configuration.c 2008-10-13 20:19:18 UTC (rev 401)
+++ trunk/daemon/configuration.c 2008-10-13 21:00:53 UTC (rev 402)
@@ -105,6 +105,7 @@
static bool Conf_Debug_Initial_Sequence = FALSE;
static bool Conf_Reload_State = FALSE;
+static bool Conf_Reload_Singleton_State = FALSE;
static configuration *Config_Previous;
static proc *Config_Previous_Procs;
static char Conf_FileName[80];
@@ -159,6 +160,23 @@
Conf_load_conf_file( file_name, my_name);
}
+bool Conf_in_reload_singleton_state(void)
+{
+ return(Conf_Reload_Singleton_State);
+}
+
+void Conf_reload_singleton_state_begin(void)
+{
+
+ Conf_Reload_Singleton_State = TRUE;
+}
+
+void Conf_reload_singleton_state_end(void)
+{
+
+ Conf_Reload_Singleton_State = FALSE;
+}
+
bool Conf_in_reload_state(void)
{
return(Conf_Reload_State);
Modified: trunk/daemon/configuration.h
===================================================================
--- trunk/daemon/configuration.h 2008-10-13 20:19:18 UTC (rev 401)
+++ trunk/daemon/configuration.h 2008-10-13 21:00:53 UTC (rev 402)
@@ -109,9 +109,13 @@
int Conf_num_procs_in_seg( configuration *config, int16 seg_index );
void Conf_id_to_str( int32u id, char *str );
char Conf_print(configuration *config);
+
bool Conf_in_reload_state(void);
void Conf_reload_state_begin(void);
void Conf_reload_state_end(void);
+bool Conf_in_reload_singleton_state(void);
+void Conf_reload_singleton_state_begin(void);
+void Conf_reload_singleton_state_end(void);
bool Conf_reload_initiate(void);
void Conf_set_debug_initial_sequence(void);
Modified: trunk/daemon/membership.c
===================================================================
--- trunk/daemon/membership.c 2008-10-13 20:19:18 UTC (rev 401)
+++ trunk/daemon/membership.c 2008-10-13 21:00:53 UTC (rev 402)
@@ -112,6 +112,7 @@
static void Memb_handle_foreign( sys_scatter *scat );
static void Memb_handle_form1 ( sys_scatter *scat );
static void Memb_handle_form2 ( sys_scatter *scat );
+static void Shift_to_op();
static void Shift_to_seg();
static void Gather_or_represented();
static void Shift_to_gather();
@@ -140,8 +141,7 @@
int32 current_subnet;
int i, num_seg;
- State = OP;
- GlobalStatus.state = OP;
+ Shift_to_op();
GlobalStatus.membership_changes = 0;
My = Conf_my();
@@ -851,6 +851,16 @@
Conf_append_id_to_seg(&Membership.segments[My.seg_index], My.id);
}
+static void Shift_to_op()
+{
+ State = OP;
+ GlobalStatus.state = OP;
+
+ if ( Prot_need_conf_reload() ) {
+ E_queue( Prot_initiate_conf_reload, 0, NULL, Zero_timeout );
+ }
+}
+
static void Shift_to_seg()
{
State = SEG;
@@ -919,8 +929,7 @@
/* clear everything and go back to op */
E_dequeue( Send_join, 0, NULL);
E_queue( Memb_lookup_new_members, 0, NULL, Lookup_timeout );
- State = OP;
- GlobalStatus.state = OP;
+ Shift_to_op();
}else{
/* create and send form token */
Create_form1();
@@ -932,8 +941,7 @@
Alarm( MEMB, "Form_or_fail:failed, return to OP\n");
E_dequeue( Send_join, 0, NULL );
E_queue( Memb_lookup_new_members, 0, NULL, Lookup_timeout );
- State = OP;
- GlobalStatus.state = OP;
+ Shift_to_op();
}else{
Alarm( MEMB, "Form_or_fail: failed to gather\n");
/* failed to gather again */
@@ -2276,9 +2284,9 @@
Alarm( MEMB, "Memb_regular\n");
Transitional = 0;
- State = OP;
- GlobalStatus.state = OP;
+
GlobalStatus.membership_changes++;
+
Membership = Future_membership;
Membership_id = Future_membership_id;
Reg_membership = Membership;
@@ -2300,6 +2308,8 @@
E_queue( Memb_lookup_new_members, 0, NULL, Lookup_timeout );
printf("Membership id is ( %d, %d)\n", Membership_id.proc_id, Membership_id.time );
printf("%c", Conf_print( &Membership ) );
+
+ Shift_to_op();
}
void Flip_members( members_info *members_ptr )
Modified: trunk/daemon/network.c
===================================================================
--- trunk/daemon/network.c 2008-10-13 20:19:18 UTC (rev 401)
+++ trunk/daemon/network.c 2008-10-13 21:00:53 UTC (rev 402)
@@ -746,7 +746,7 @@
{
int i;
- if ( Conf_in_reload_state() ) {
+ if ( Conf_in_reload_singleton_state() ) {
Alarmp(SPLOG_DEBUG, NETWORK, "Net_set_partition: Can not change partition since daemon configuration change in progress\n");
return;
}
Modified: trunk/daemon/prot_body.h
===================================================================
--- trunk/daemon/prot_body.h 2008-10-13 20:19:18 UTC (rev 401)
+++ trunk/daemon/prot_body.h 2008-10-13 21:00:53 UTC (rev 402)
@@ -99,5 +99,8 @@
void Prot_token_hurry();
void Discard_packets();
+void Prot_initiate_conf_reload( void );
+bool Prot_need_conf_reload( void );
+void Prot_clear_need_conf_reload( void );
#endif /* INC_PROT_BODY */
Modified: trunk/daemon/protocol.c
===================================================================
--- trunk/daemon/protocol.c 2008-10-13 20:19:18 UTC (rev 401)
+++ trunk/daemon/protocol.c 2008-10-13 21:00:53 UTC (rev 402)
@@ -70,6 +70,9 @@
static sys_scatter Hurry_pack;
static sp_time Zero_timeout = { 0, 0};
+/* Used to indicate a need to reload configuration at end of current membership */
+static bool Prot_Need_Conf_Reload = FALSE;
+
/* ### Pack: 1 line */
static packet_info Buffered_packets[ARCH_SCATTER_SIZE];
@@ -698,18 +701,54 @@
GlobalStatus.token_rounds = Token_rounds;
}
+/* Provide boolean result of whether the membership system needs to initiate a configuration reload
+ * because it was delayed by an ongoing membership change
+ */
+bool Prot_need_conf_reload( void )
+{
+ return( Prot_Need_Conf_Reload );
+}
+
+void Prot_clear_need_conf_reload( void )
+{
+ Prot_Need_Conf_Reload = FALSE;
+}
+
+/* If we are in OP state for the daemon membership, initiate conf reload to new configuration file and membership,
+ * If we are not, then a membership change is ongoing and we need to let that complete before starting a
+ * new one to load in the new configuration.
+ */
+static void Prot_handle_conf_reload(sys_scatter *scat)
+{
+ if ( Memb_state() == OP ) {
+ Prot_initiate_conf_reload();
+ } else {
+ Prot_Need_Conf_Reload = TRUE;
+ }
+}
+
/* Basic algorithm:
* 1) have configuration code load new conf file and check for modifications to conf.
* 2) If only add/sub of daemons, then initiate membership change with token_loss and return;
* 3) else, then set Conf_reload_state, create singleton partition, and schedule token_loss.
* 4) When membership completes in Discard_packets() cleanup partition and probe for new members.
*/
-static void Prot_handle_conf_reload(sys_scatter *scat)
+void Prot_initiate_conf_reload( void )
{
bool need_memb_partition;
int16 singleton_partition[MAX_PROCS_RING];
int i;
+ if (Memb_state() != OP ) {
+ /* This is a race condition, that the Prot_initiate_conf_reload was scheduled when OP state,
+ * but another membership occured before it was executed.
+ * The membership system will requeue this function when it reaches OP state again.
+ */
+ return;
+ }
+ /* Disable queueing of this function when OP state reached in membership */
+ Prot_clear_need_conf_reload();
+
need_memb_partition = Conf_reload_initiate();
/* Signal all subsystems to update Conf and My strucures */
@@ -729,7 +768,7 @@
}
Net_set_partition(singleton_partition);
- Conf_reload_state_begin();
+ Conf_reload_singleton_state_begin();
}
E_queue( Memb_token_loss, 0, NULL, Zero_timeout );
}
@@ -1233,14 +1272,14 @@
* Remove partition
* Initiate Memb_lookup() to find other daemons
*/
- if( Conf_in_reload_state() ) {
+ if( Conf_in_reload_singleton_state() ) {
/* GOP state equals value 1, but is private declaration in groups.c */
if ( (GlobalStatus.gstate != 1 ) || ( Conf_num_procs( &Reg_membership ) != 1 ) ) {
Alarmp( SPLOG_FATAL, MEMB, "Discard_packets: Failed to reload configuration - gstate: %d and num_procs in membership: %d\n", GlobalStatus.gstate, Conf_num_procs( &Reg_membership) );
}
Net_clear_partition();
E_queue( Memb_lookup_new_members, 0, NULL, Zero_timeout);
- Conf_reload_state_end();
+ Conf_reload_singleton_state_end();
}
/* set variables for next membership */
Modified: trunk/docs/DynamicConfiguration.txt
===================================================================
--- trunk/docs/DynamicConfiguration.txt 2008-10-13 20:19:18 UTC (rev 401)
+++ trunk/docs/DynamicConfiguration.txt 2008-10-13 21:00:53 UTC (rev 402)
@@ -131,9 +131,12 @@
Daemon Algorithm Details:
-When a spmonitor command to reload configuration is received, the
-daemon executes the following steps. (See function
-Prot_handle_conf_reload() for the code).
+When a spmonitor command to reload configuration is received, if
+the daemon is in OP state, the function Prot_initiate_conf_reload()
+executes the following steps to reload the configuration. If the
+daemon not in OP state (i.e it is already working on a membership), a
+flag is set and when the membership completes (by moving into OP state)
+the Prot_initiate_conf_reload() function is then executed.
1) Call function in configuration code to load new configuration file
and examine the type of changes to the configuration. If the
More information about the Spread-cvs
mailing list