Hello

I have the following problem : group the lines of a dataframe when no information change (Matricule, Nom, Sexe, DateNaissance, Contrat, Pays) and when the value of Debut of lines i = value Fin of lines i-1
I can obtain it with a do loop. Is it possible to avoid the loop ?

The dataframe initial is df1
dput(df1)
structure(list(Matricule = c(1L, 1L, 1L, 6L, 6L, 6L, 6L, 6L,
6L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 934L, 934L, 934L, 934L,
934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L,
934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L,
934L, 934L, 934L, 934L), Nom = c("VERON", "VERON", "VERON", "BENARD",
"BENARD", "BENARD", "BENARD", "BENARD", "BENARD", "DALNIC", "DALNIC",
"DALNIC", "DALNIC", "DALNIC", "DALNIC", "DALNIC", "DALNIC", "DALNIC",
"FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI",
"FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI",
"FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI",
"FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI",
"FORNI", "FORNI"), Sexe = c("Féminin", "Féminin", "Féminin",
"Masculin", "Masculin", "Masculin", "Masculin", "Masculin", "Masculin",
"Féminin", "Féminin", "Féminin", "Féminin", "Féminin", "Féminin",
"Féminin", "Féminin", "Féminin", "Masculin", "Masculin", "Masculin",
"Masculin", "Masculin", "Masculin", "Masculin", "Masculin", "Masculin",
"Masculin", "Masculin", "Masculin", "Masculin", "Masculin", "Masculin",
"Masculin", "Masculin", "Masculin", "Masculin", "Masculin", "Masculin",
"Masculin", "Masculin", "Masculin", "Masculin", "Masculin", "Masculin",
"Masculin", "Masculin", "Masculin"), DateNaissance = c("02/09/1935",
"02/09/1935", "02/09/1935", "01/04/1935", "01/04/1935", "01/04/1935",
"01/04/1935", "01/04/1935", "01/04/1935", "19/02/1940", "19/02/1940",
"19/02/1940", "19/02/1940", "19/02/1940", "19/02/1940", "19/02/1940",
"19/02/1940", "19/02/1940", "10/07/1961", "10/07/1961", "10/07/1961",
"10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961",
"10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961",
"10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961",
"10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961",
"10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961",
"10/07/1961", "10/07/1961"), contrat = c("CDI commun", "CDI commun",
"CDI commun", "CDI commun", "CDI commun", "CDI commun", "CDI commun",
"CDI commun", "CDI commun", "CDI commun", "CDI commun", "CDI commun",
"CDI commun", "CDI commun", "CDI commun", "CDI commun", "CDI commun",
"CDI commun", "CDD détaché ext. Cirad", "CDD détaché ext. Cirad",
"CDD détaché ext. Cirad", "CDD détaché ext. Cirad", "CDD détaché ext. Cirad", "CDD détaché ext. Cirad", "CDD détaché ext. Cirad", "CDD détaché ext. Cirad",
"CDD détaché ext. Cirad", "CDI Détachés Autres", "CDI Détachés Autres",
"CDI Détachés Autres", "CDI Détachés Autres", "CDI Détachés Autres",
"CDI Détachés Autres", "CDI Détachés Autres", "CDI Détachés Autres",
"CDI Détachés Autres", "CDI Détachés Autres", "CDI Détachés Autres",
"CDI Détachés Autres", "CDI Détachés Autres", "CDI commun", "CDI commun",
"CDI commun", "CDI commun", "CDI commun", "CDI commun", "CDI commun",
"CDI commun"), Pays = c("France", "France", "France", "Philippines",
"Philippines", "Philippines", "France", "France", "France", "France",
"France", "Martinique", "Martinique", "Martinique", "Martinique",
"Martinique", "Martinique", "Martinique", "Cameroun", "Cameroun",
"Cameroun", "Cameroun", "Cameroun", "Cameroun", "Cameroun", "Cameroun",
"Cameroun", "France", "France", "France", "France", "France",
"France", "France", "Congo", "Congo", "Congo", "Congo", "Congo",
"Congo", "Gabon", "Gabon", "Gabon", "Gabon", "Gabon", "Gabon",
"Congo", "Congo"), Debut = c("24/01/1995", "01/05/1997", "31/12/1997",
"02/02/1995", "28/02/1995", "01/03/1995", "13/03/1995", "01/01/1996",
"31/01/1996", "24/01/1995", "01/07/1995", "01/09/1995", "01/07/1997",
"01/01/1998", "01/08/1998", "01/01/2000", "17/01/2000", "29/02/2000",
"26/01/1995", "01/07/1996", "16/09/1997", "01/01/1998", "01/07/1998",
"04/11/1999", "01/01/2001", "01/04/2001", "31/08/2001", "01/09/2001",
"02/09/2001", "01/12/2001", "01/02/2003", "01/04/2003", "01/01/2004",
"01/03/2004", "01/09/2004", "01/01/2005", "01/04/2005", "28/10/2006",
"01/01/2007", "01/04/2007", "01/09/2007", "01/01/2009", "01/04/2009",
"01/01/2010", "01/01/2011", "01/04/2011", "05/09/2012", "01/01/2013"
), Fin = c("30/04/1997", "30/12/1997", "31/12/1997", "27/02/1995",
"28/02/1995", "12/03/1995", "30/06/1995", "30/01/1996", "31/01/1996",
"30/06/1995", "31/08/1995", "30/06/1997", "31/12/1997", "31/07/1998",
"31/12/1999", "16/01/2000", "28/02/2000", "29/02/2000", "30/06/1996",
"15/09/1997", "31/12/1997", "30/06/1998", "03/11/1999", "31/12/2000",
"31/03/2001", "30/08/2001", "31/08/2001", "01/09/2001", "30/11/2001",
"31/01/2003", "31/03/2003", "31/12/2003", "29/02/2004", "31/08/2004",
"31/12/2004", "31/03/2005", "27/10/2006", "31/12/2006", "31/03/2007",
"31/08/2007", "31/12/2008", "31/03/2009", "31/12/2009", "31/12/2010",
"31/03/2011", "04/09/2012", "31/12/2012", "31/12/4712")), .Names = c("Matricule",
"Nom", "Sexe", "DateNaissance", "contrat", "Pays", "Debut", "Fin"
), class = "data.frame", row.names = c(NA, -48L))

The dataframe to be obtained is df2
dput(df2)
structure(list(Mat = c(1L, 6L, 6L, 6L, 8L, 8L, 934L, 934L, 934L,
934L, 934L), Nom = c("VERON", "BENARD", "BENARD", "BENARD", "DALNIC",
"DALNIC", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI"), Sexe = c("Féminin",
"Masculin", "Masculin", "Masculin", "Féminin", "Féminin", "Masculin",
"Masculin", "Masculin", "Masculin", "Masculin"), DateNaissance = c("02/09/1935",
"01/04/1935", "01/04/1935", "01/04/1935", "19/02/1940", "19/02/1940",
"10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961"
), contrat = c("CDI commun", "CDI commun", "CDI commun", "CDI commun",
"CDI commun", "CDI commun", "CDD détaché ext. Cirad", "CDI Détachés Autres",
"CDI Détachés Autres", "CDI commun", "CDI commun"), Pays = c("France",
"Philippines", "France", "France", "France", "Martinique", "Cameroun",
"France", "Congo", "Gabon", "Congo"), Debut = c("24/01/1995",
"02/02/1995", "13/03/1995", "01/01/1996", "24/01/1995", "01/09/1995",
"26/01/1995", "01/09/2001", "01/09/2004", "01/09/2007", "05/09/2012"
), Fin = c("31/12/1997", "12/03/1995", "30/06/1995", "31/01/1996",
"31/08/1995", "29/02/2000", "31/08/2001", "31/08/2004", "31/08/2007",
"04/09/2012", "31/12/4712")), .Names = c("Mat", "Nom", "Sexe",
"DateNaissance", "contrat", "Pays", "Debut", "Fin"), class = "data.frame", row.names = c(NA,
-11L))

Thank you for your help

--
Michel ARNAUD
Chargé de mission auprès du DRH
DGDRD-Drh - TA 174/04
Av Agropolis 34398 Montpellier cedex 5
tel : 04.67.61.75.38
fax : 04.67.61.57.87
port: 06.47.43.55.31

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to