[Spread-cvs] cvs commit: spread/daemon groups.c groups.h membership.c membership.h protocol.c sess_body.h session.c session.h
jonathan at spread.org
jonathan at spread.org
Fri Oct 24 17:47:38 EDT 2003
jonathan 03/10/24 17:47:38
Modified: daemon groups.c groups.h membership.c membership.h
protocol.c sess_body.h session.c session.h
Log:
Apply patch to fix bug in group_id's that causes incorrect vs_sets to be
delivered.
Patch written by Ryan Caudy from CNDS.
An example from the original diagnosis email:
Daemons A and B are together, and C and D are together, each with at
least one member of group g. Daemons A and B partition away from each
other, as do C and D, but A is merged with C and B is merged with D.
Because of the way group ids are assigned in this situation (see lines
416 and 417 of groups.c), the vs_sets delivered to the members of g on A
and C will be {members from A, members from C}, and {members from B,
members from D} for B and D. By the definition of the vs_set, this is
clearly incorrect.
Revision Changes Path
1.11 +74 -29 spread/daemon/groups.c
Index: groups.c
===================================================================
RCS file: /storage/cvsroot/spread/daemon/groups.c,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- groups.c 17 Jun 2003 01:05:48 -0000 1.10
+++ groups.c 24 Oct 2003 21:47:37 -0000 1.11
@@ -86,6 +86,7 @@
static int Gstate;
static configuration Trans_memb;
+static membership_id Trans_memb_id;
static configuration Reg_memb;
static membership_id Reg_memb_id;
static char Mess_buf[MAX_MESSAGE_BODY_LEN];
@@ -101,6 +102,7 @@
static int Groups_control_down_queue;
static int G_id_is_equal( group_id g1, group_id g2 );
+static void G_print_group_id( group_id g );
static group *G_get_group( char *group_name );
static member *G_get_member( group *grp, char *private_group_name );
static int G_build_memb_buf( group *grp, message_obj *msg,
@@ -320,7 +322,7 @@
for( ; grp != NULL ; grp = nextgroup )
{
nextgroup = sl_next( &GroupsList, &giter );
- if( grp->grp_id.index < 0 )
+ if( grp->changed )
{
/* The group has changed */
/* eliminating partitioned members */
@@ -343,8 +345,15 @@
Num_groups--;
GlobalStatus.num_groups = Num_groups;
}else{
+ Alarm( GROUPS, "G_handle_reg_memb: skipping state transfer for group %s.\n", grp->name );
+ Alarm( DEBUG, "G_handle_reg_memb: changing group_id from: " );
+ G_print_group_id( grp->grp_id );
grp->grp_id.memb_id = Reg_memb_id;
- grp->grp_id.index = 1;
+ grp->grp_id.index = 1;
+ grp->changed = 0;
+ Alarm( DEBUG, " to: " );
+ G_print_group_id( grp->grp_id );
+ Alarm( DEBUG, "\n" );
if( grp->num_local > 0 ){
/* send members regular membership */
@@ -389,7 +398,7 @@
for( ; grp != NULL ; grp = nextgroup )
{
nextgroup = sl_next( &GroupsList, &giter );
- if( grp->grp_id.index < 0 )
+ if( grp->changed )
{
/* The group has changed */
/* eliminating partitioned members */
@@ -411,10 +420,6 @@
sl_remove ( &GroupsList, grp->name, dispose);
Num_groups--;
GlobalStatus.num_groups = Num_groups;
-
- }else{
- grp->grp_id.memb_id = Reg_memb_id;
- grp->grp_id.index = -1;
}
}
}
@@ -520,7 +525,7 @@
}
}
-void G_handle_trans_memb( configuration trans_memb )
+void G_handle_trans_memb( configuration trans_memb, membership_id trans_memb_id )
{
group *grp, *nextgroup;
member *mbr, *nextmember;
@@ -546,7 +551,11 @@
*/
Alarm( GROUPS, "G_handle_trans_memb in GOP\n");
- Trans_memb = trans_memb;
+ Trans_memb = trans_memb;
+ Trans_memb_id = trans_memb_id;
+ Alarm( GROUPS, "G_handle_trans_memb: Received trans memb id of:"
+ " {proc_id: %d "
+ " time: %d}\n", Trans_memb_id.proc_id, Trans_memb_id.time );
giter = sl_getlist( &GroupsList );
grp = (giter)?(group *)giter->data:NULL;
@@ -581,7 +590,15 @@
}
if( !needed ) Sess_dispose_message( mess_link );
}
- grp->grp_id.index = -1;
+ Alarm( DEBUG, "G_handle_trans_memb: changed group %s in GOP, change"
+ " group id from: ", grp->name );
+ G_print_group_id( grp->grp_id );
+ grp->grp_id.memb_id = trans_memb_id;
+ grp->grp_id.index = 1; /* Not technically needed, but not bad, either. */
+ grp->changed = 1;
+ Alarm( DEBUG, " to: " );
+ G_print_group_id( grp->grp_id );
+ Alarm( DEBUG, "\n" );
}
}
@@ -611,7 +628,13 @@
*/
Alarm( GROUPS, "G_handle_trans_memb in GGATHER\n");
- Trans_memb = trans_memb;
+ Trans_memb = trans_memb;
+ Trans_memb_id = trans_memb_id; /* Need this because we deliver the transitional again if we complete
+ * the state exchange during GGT. */
+ Alarm( GROUPS, "G_handle_trans_memb: Received trans memb id of:"
+ " {proc_id: %d "
+ " time: %d}\n", Trans_memb_id.proc_id, Trans_memb_id.time );
+
giter = sl_getlist( &GroupsList );
grp = (giter)?(group *)giter->data:NULL;
for( ; grp != NULL ; grp = nextgroup )
@@ -640,7 +663,7 @@
GlobalStatus.num_groups = Num_groups;
}else if( group_changed ) {
- grp->grp_id.index = -1;
+ grp->changed = 1;
}
}
@@ -694,7 +717,7 @@
* Notify all local members of a regular membership caused by join
* else if group is changed and coming from alive daemon then
* Insert to group as new
- * Decrement Grp_id
+ * Increment Grp_id
* if there are local members then
* build a membership with all members, and vs set with all established members
* notify all local established members with that membership (caused by network)
@@ -704,7 +727,7 @@
* mark new member as established
* else (if new member is coming from a partitioned daemon then)
* Insert to group as partitioned
- * if Grp_id.index > 0 then Grp_id.index = -1 else Grp_id.index-- (marking group as changed)
+ * Increment Grp_id, and mark group as changed if not already done
* if there are local members then
* build a membership with all members and vs set with all established members
* notify all local members with that membership (caused by network)
@@ -733,10 +756,17 @@
sl_set_compare( &new_grp->MembersList,
G_member_recordcompare,
G_member_keycompare);
- new_grp->grp_id.memb_id = Reg_memb_id;
- new_grp->grp_id.index = 0;
- if( Gstate == GOP ) new_grp->grp_id.index = 0;
- else new_grp->grp_id.index = -1;
+ if( Gstate == GOP) {
+ new_grp->changed = 0;
+ new_grp->grp_id.memb_id = Reg_memb_id;
+ } else { /* Gtrans */
+ new_grp->changed = 1;
+ new_grp->grp_id.memb_id = Trans_memb_id;
+ }
+ new_grp->grp_id.index = 0; /* 0 because we will definitely increment it */
+ Alarm( DEBUG, "G_handle_join: New group added with group id: " );
+ G_print_group_id( new_grp->grp_id );
+ Alarm( DEBUG, "\n" );
new_grp->num_members = 0;
new_grp->num_local = 0;
@@ -778,11 +808,11 @@
if( Gstate == GOP || ( Conf_id_in_conf( &Trans_memb, new_p.id ) != -1 ) )
{
/* new member is coming from alive daemon */
- if( grp->grp_id.index >= 0 )
+ if( !grp->changed )
{
/* group is unchanged */
/* Increment group id */
- grp->grp_id.index++;
+ grp->grp_id.index++;
/* Notify local members */
if( grp->num_local > 0 )
@@ -824,8 +854,8 @@
/* mark new member as new */
new_mbr->status = NEW_MEMBER;
- /* Decrement group id */
- grp->grp_id.index--;
+ /* Increment group id */
+ grp->grp_id.index++;
if( grp->num_local > 0 )
{
@@ -899,11 +929,10 @@
/* mark new member as partitioned member */
new_mbr->status = PARTITIONED_MEMBER;
/*
- * if Grp_id.index > 0, (Grp_id.index = -1), else Grp_id.index--
* (marking group as changed - it might be already )
*/
- if( grp->grp_id.index > 0 ) grp->grp_id.index = -1;
- else grp->grp_id.index--;
+ if( !grp->changed ) grp->changed = 1;
+ grp->grp_id.index++;
if( grp->num_local > 0 )
{
/* build a membership with all members, and vs set with all non-partitioned members */
@@ -1092,7 +1121,7 @@
return;
}
- if( grp->grp_id.index < 0 )
+ if( grp->changed )
{
if( Gstate != GTRANS ) Alarm( EXIT, "G_handle_leave: changed group in GOP\n");
/*
@@ -1260,7 +1289,7 @@
continue;
}
- if( grp->grp_id.index < 0 )
+ if( grp->changed )
{
if( Gstate != GTRANS ) Alarm( EXIT, "G_handle_kill: changed group in GOP\n");
/*
@@ -1433,7 +1462,10 @@
GlobalStatus.gstate = Gstate;
}else{
Gstate = GOP;
- G_handle_trans_memb( Trans_memb );
+ /* We do want to deliver a transitional signal to any
+ * groups that are going to get a CAUSED_BY_NETWORK
+ * after our Reg_memb is delivered. */
+ G_handle_trans_memb( Trans_memb, Trans_memb_id );
}
Message_Dec_Refcount(msg);
@@ -1580,13 +1612,20 @@
/* now our orig_grp is almost updated */
grp = orig_grp;
- if( grp->grp_id.index == -1 ) changed = 1;
+ if( grp->changed ) changed = 1;
if( !changed ) continue;
/* the group has changed */
+ Alarm( GROUPS, "G_compute_and_notify: completed group %s.\n", grp->name );
+ Alarm( DEBUG, "G_compute_and_notify: changing group id from: " );
+ G_print_group_id( grp->grp_id );
grp->grp_id.memb_id = Reg_memb_id;
grp->grp_id.index = 1;
+ grp->changed = 0;
+ Alarm( DEBUG, " to: " );
+ G_print_group_id( grp->grp_id );
+ Alarm( DEBUG, "\n" );
if( grp->num_local > 0 )
{
@@ -2286,4 +2325,10 @@
group *grp = G_get_group( group_name );
if( grp == NULL ) return 0;
return grp->num_local;
+}
+
+static void G_print_group_id( group_id g )
+{
+ Alarm( DEBUG, "{Proc ID: %d, Time: %d, Index: %d}",
+ g.memb_id.proc_id, g.memb_id.time, g.index );
}
1.4 +2 -1 spread/daemon/groups.h
Index: groups.h
===================================================================
RCS file: /storage/cvsroot/spread/daemon/groups.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- groups.h 22 Sep 2002 02:56:52 -0000 1.3
+++ groups.h 24 Oct 2003 21:47:37 -0000 1.4
@@ -51,7 +51,8 @@
void G_init(void);
void G_handle_reg_memb( configuration reg_memb, membership_id reg_memb_id );
-void G_handle_trans_memb( configuration trans_memb );
+void G_handle_trans_memb( configuration trans_memb,
+ membership_id trans_memb_id );
void G_handle_join( char *private_group_name, char *group_name );
void G_handle_leave( char *private_group_name, char *group_name );
void G_handle_kill( char *private_group_name );
1.3 +54 -8 spread/daemon/membership.c
Index: membership.c
===================================================================
RCS file: /storage/cvsroot/spread/daemon/membership.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- membership.c 22 Sep 2002 02:56:52 -0000 1.2
+++ membership.c 24 Oct 2003 21:47:37 -0000 1.3
@@ -70,6 +70,7 @@
typedef struct dummy_ring_info{
membership_id memb_id;
+ int32 trans_time;
int32 aru;
int32 highest_seq;
int32 num_holes;
@@ -82,6 +83,10 @@
static configuration Future_membership;
static membership_id Future_membership_id;
+static membership_id Trans_memb_id;
+static int32 F_trans_memb_time;
+static int32 Last_time_used;
+
static int State;
static int Token_alive;
static proc My;
@@ -190,6 +195,7 @@
Conf_append_id_to_seg( &Membership.segments[My.seg_index], My.id);
Membership_id.proc_id = My.id;
Membership_id.time = -1;
+ Last_time_used = Membership_id.time;
Transitional = 0;
Reg_membership = Membership;
@@ -213,6 +219,11 @@
return( Membership_id );
}
+membership_id Memb_trans_id()
+{
+ return( Trans_memb_id );
+}
+
int Memb_is_equal( membership_id m1, membership_id m2 )
{
if( m1.proc_id == m2.proc_id && m1.time == m2.time )
@@ -1289,6 +1300,7 @@
holes_procs_ptr = (int32 *)&rg_info_buf[num_bytes];
rg_info->memb_id = Membership_id;
+ rg_info->trans_time = 0;
rg_info->aru = Aru;
rg_info->highest_seq = Highest_seq;
@@ -1566,12 +1578,13 @@
num_bytes += sizeof(ring_info);
(*new_num_rings)++;
- new_rg_info->memb_id = Membership_id;
- new_rg_info->num_holes = 0;
- new_holes_procs_ptr = (int32 *)&rg_info_buf[num_bytes];
+ new_rg_info->memb_id = Membership_id;
+ new_rg_info->trans_time = 0;
+ new_rg_info->num_holes = 0;
+ new_holes_procs_ptr = (int32 *)&rg_info_buf[num_bytes];
- new_rg_info->aru = Aru;
- new_rg_info->highest_seq = Highest_seq;
+ new_rg_info->aru = Aru;
+ new_rg_info->highest_seq = Highest_seq;
if( my_rg_info == 0 )
{
@@ -1760,6 +1773,7 @@
proc p;
int ret;
int i;
+ int32 memb_time = 0;
num_bytes = 0;
@@ -1785,7 +1799,6 @@
Flip_rings( rings_buf );
}
-
form_token->proc_id = My.id;
/* validity check */
@@ -1799,9 +1812,19 @@
if( m_info->num_members == 1 )
{
+ /* The time in memb_time is saved to use on one of the
+ * Trans_memb_ids (there is one per Trans_membership view).
+ * The time for the real membership id will be one past
+ * the one in memb_time, because I want time to go forward.
+ */
+ memb_time = E_get_time().sec;
+ if( memb_time <= Last_time_used )
+ memb_time = Last_time_used + 1;
+ Last_time_used = memb_time;
+
/* I am future leader, fill membership_id */
m_id_info->proc_id = My.id;
- m_id_info->time = E_get_time().sec;
+ m_id_info->time = ++Last_time_used;
}
/* build Future membership and Future membership id */
@@ -1840,7 +1863,8 @@
}
if (my_rg_info == NULL) {
- Alarm(EXIT, "Read_form2: num_rings = %d, num_bytes = %d, Memb_id = (%d %d)\n", *num_rings, num_bytes, Membership_id.proc_id, Membership_id.time);
+ Alarm(EXIT, "Read_form2: num_rings = %d, num_bytes = %d, Memb_id = (%d %d)\n",
+ *num_rings, num_bytes, Membership_id.proc_id, Membership_id.time);
}
Highest_seq = my_rg_info->highest_seq;
@@ -1868,6 +1892,24 @@
my_holes_procs_ptr++;
}
+ /* The token circulates in conf order, which also defines the order
+ * by which we choose "leaders." So, if noone else has set the id
+ * for my ring, I get to, and I'll be leader. */
+ if( !my_rg_info->trans_time )
+ {
+ /* memb_time could be already set, if this daemon is the
+ * future leader */
+ if( !memb_time )
+ {
+ memb_time = E_get_time().sec;
+ if( memb_time <= Last_time_used )
+ memb_time = Last_time_used + 1;
+ Last_time_used = memb_time;
+ }
+ my_rg_info->trans_time = memb_time;
+ }
+ F_trans_memb_time = my_rg_info->trans_time;
+
send_scat.num_elements = 2;
send_scat.elements[0].buf = scat->elements[0].buf;
send_scat.elements[0].len = sizeof(token_header);
@@ -1958,6 +2000,9 @@
}
}
+ Trans_memb_id.proc_id = Conf_leader( &Trans_membership );
+ Trans_memb_id.time = F_trans_memb_time;
+
Commit_membership.num_segments = Cn.num_segments;
for( i=0; i < Cn.num_segments; i++ )
{
@@ -2066,6 +2111,7 @@
ring_info_ptr->memb_id.proc_id = Flip_int32( ring_info_ptr->memb_id.proc_id );
ring_info_ptr->memb_id.time = Flip_int32( ring_info_ptr->memb_id.time );
+ ring_info_ptr->trans_time = Flip_int32( ring_info_ptr->trans_time );
ring_info_ptr->aru = Flip_int32( ring_info_ptr->aru );
ring_info_ptr->highest_seq = Flip_int32( ring_info_ptr->highest_seq );
ring_info_ptr->num_holes = Flip_int32( ring_info_ptr->num_holes );
1.3 +1 -0 spread/daemon/membership.h
Index: membership.h
===================================================================
RCS file: /storage/cvsroot/spread/daemon/membership.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- membership.h 22 Sep 2002 02:56:52 -0000 1.2
+++ membership.h 24 Oct 2003 21:47:37 -0000 1.3
@@ -50,6 +50,7 @@
void Memb_init();
configuration *Memb_active_ptr();
membership_id Memb_id();
+membership_id Memb_trans_id();
int Memb_is_equal( membership_id m1, membership_id m2 );
int32 Memb_state();
int Memb_token_alive();
1.6 +1 -1 spread/daemon/protocol.c
Index: protocol.c
===================================================================
RCS file: /storage/cvsroot/spread/daemon/protocol.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- protocol.c 22 Sep 2002 02:56:52 -0000 1.5
+++ protocol.c 24 Oct 2003 21:47:37 -0000 1.6
@@ -1037,7 +1037,7 @@
/* calculate and deliver transitional membership */
Memb_transitional();
- Sess_deliver_trans_memb( Trans_membership );
+ Sess_deliver_trans_memb( Trans_membership, Memb_trans_id() );
/* deliver all packets */
found_hole = 0;
1.4 +2 -0 spread/daemon/sess_body.h
Index: sess_body.h
===================================================================
RCS file: /storage/cvsroot/spread/daemon/sess_body.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- sess_body.h 22 Sep 2002 02:56:52 -0000 1.3
+++ sess_body.h 24 Oct 2003 21:47:37 -0000 1.4
@@ -77,6 +77,8 @@
typedef struct dummy_group {
char name[MAX_GROUP_NAME];
group_id grp_id;
+ int changed; /* No longer use negative grp_id.index to flag
+ * groups that have changed. */
int num_members;
Skiplist MembersList;
int num_local;
1.15 +2 -2 spread/daemon/session.c
Index: session.c
===================================================================
RCS file: /storage/cvsroot/spread/daemon/session.c,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -r1.14 -r1.15
--- session.c 17 Jun 2003 19:00:10 -0000 1.14
+++ session.c 24 Oct 2003 21:47:37 -0000 1.15
@@ -1908,9 +1908,9 @@
G_handle_reg_memb( reg_memb, reg_memb_id );
}
-void Sess_deliver_trans_memb( configuration trans_memb )
+void Sess_deliver_trans_memb( configuration trans_memb, membership_id trans_memb_id )
{
- G_handle_trans_memb( trans_memb );
+ G_handle_trans_memb( trans_memb, trans_memb_id );
}
static void Sess_handle_join( message_link *mess_link )
1.4 +1 -1 spread/daemon/session.h
Index: session.h
===================================================================
RCS file: /storage/cvsroot/spread/daemon/session.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- session.h 22 Sep 2002 02:56:52 -0000 1.3
+++ session.h 24 Oct 2003 21:47:37 -0000 1.4
@@ -82,7 +82,7 @@
void Sess_unblock_user(int xxx);
void Sess_deliver_message( message_link *mess_link );
void Sess_deliver_reg_memb( configuration reg_memb, membership_id reg_memb_id );
-void Sess_deliver_trans_memb( configuration trans_memb );
+void Sess_deliver_trans_memb( configuration trans_memb, membership_id trans_memb_id );
void Flip_mess( message_header *head_ptr );
void Sess_write_scat( int ses, message_link *mess_link, int *needed );
void Sess_write(int ses, message_link *mess_link, int *needed );
More information about the Spread-cvs
mailing list