Here is how you can do it with the 'data.table' package:

>  require(data.table)
>  df1 <- data.table(df1)
>  result <- df1[
+ , list(Debut = Debut[1L]  # first entry
+ , Fin = Fin[1L]
+ )
+ , keyby = c("Matricule", "Nom", "Sexe", "DateNaissance", "contrat",
"Pays")
+ ]
> result
    Matricule    Nom     Sexe DateNaissance                contrat
Pays      Debut        Fin
 1:         1  VERON  Féminin    02/09/1935             CDI commun
France 24/01/1995 30/04/1997
 2:         6 BENARD Masculin    01/04/1935             CDI commun
France 13/03/1995 30/06/1995
 3:         6 BENARD Masculin    01/04/1935             CDI commun
Philippines 02/02/1995 27/02/1995
 4:         8 DALNIC  Féminin    19/02/1940             CDI commun
France 24/01/1995 30/06/1995
 5:         8 DALNIC  Féminin    19/02/1940             CDI commun
Martinique 01/09/1995 30/06/1997
 6:       934  FORNI Masculin    10/07/1961 CDD détaché ext. Cirad
Cameroun 26/01/1995 30/06/1996
 7:       934  FORNI Masculin    10/07/1961    CDI Détachés Autres
Congo 01/09/2004 31/12/2004
 8:       934  FORNI Masculin    10/07/1961    CDI Détachés Autres
France 01/09/2001 01/09/2001
 9:       934  FORNI Masculin    10/07/1961             CDI commun
Congo 05/09/2012 31/12/2012
10:       934  FORNI Masculin    10/07/1961             CDI commun
Gabon 01/09/2007 31/12/2008
>
>


On Fri, Jul 12, 2013 at 3:45 PM, Arnaud Michel <michel.arn...@cirad.fr>wrote:

> Hello
>
> I have the following problem : group the lines of a dataframe when no
> information change (Matricule, Nom, Sexe, DateNaissance, Contrat, Pays) and
> when the value of Debut of lines i = value Fin of lines i-1
> I can obtain it with a do loop. Is it possible to avoid the loop ?
>
> The dataframe initial is df1
> dput(df1)
> structure(list(Matricule = c(1L, 1L, 1L, 6L, 6L, 6L, 6L, 6L,
> 6L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 934L, 934L, 934L, 934L,
> 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L,
> 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L, 934L,
> 934L, 934L, 934L, 934L), Nom = c("VERON", "VERON", "VERON", "BENARD",
> "BENARD", "BENARD", "BENARD", "BENARD", "BENARD", "DALNIC", "DALNIC",
> "DALNIC", "DALNIC", "DALNIC", "DALNIC", "DALNIC", "DALNIC", "DALNIC",
> "FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI",
> "FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI",
> "FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI",
> "FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI",
> "FORNI", "FORNI"), Sexe = c("Féminin", "Féminin", "Féminin",
> "Masculin", "Masculin", "Masculin", "Masculin", "Masculin", "Masculin",
> "Féminin", "Féminin", "Féminin", "Féminin", "Féminin", "Féminin",
> "Féminin", "Féminin", "Féminin", "Masculin", "Masculin", "Masculin",
> "Masculin", "Masculin", "Masculin", "Masculin", "Masculin", "Masculin",
> "Masculin", "Masculin", "Masculin", "Masculin", "Masculin", "Masculin",
> "Masculin", "Masculin", "Masculin", "Masculin", "Masculin", "Masculin",
> "Masculin", "Masculin", "Masculin", "Masculin", "Masculin", "Masculin",
> "Masculin", "Masculin", "Masculin"), DateNaissance = c("02/09/1935",
> "02/09/1935", "02/09/1935", "01/04/1935", "01/04/1935", "01/04/1935",
> "01/04/1935", "01/04/1935", "01/04/1935", "19/02/1940", "19/02/1940",
> "19/02/1940", "19/02/1940", "19/02/1940", "19/02/1940", "19/02/1940",
> "19/02/1940", "19/02/1940", "10/07/1961", "10/07/1961", "10/07/1961",
> "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961",
> "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961",
> "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961",
> "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961",
> "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961",
> "10/07/1961", "10/07/1961"), contrat = c("CDI commun", "CDI commun",
> "CDI commun", "CDI commun", "CDI commun", "CDI commun", "CDI commun",
> "CDI commun", "CDI commun", "CDI commun", "CDI commun", "CDI commun",
> "CDI commun", "CDI commun", "CDI commun", "CDI commun", "CDI commun",
> "CDI commun", "CDD détaché ext. Cirad", "CDD détaché ext. Cirad",
> "CDD détaché ext. Cirad", "CDD détaché ext. Cirad", "CDD détaché ext.
> Cirad",
> "CDD détaché ext. Cirad", "CDD détaché ext. Cirad", "CDD détaché ext.
> Cirad",
> "CDD détaché ext. Cirad", "CDI Détachés Autres", "CDI Détachés Autres",
> "CDI Détachés Autres", "CDI Détachés Autres", "CDI Détachés Autres",
> "CDI Détachés Autres", "CDI Détachés Autres", "CDI Détachés Autres",
> "CDI Détachés Autres", "CDI Détachés Autres", "CDI Détachés Autres",
> "CDI Détachés Autres", "CDI Détachés Autres", "CDI commun", "CDI commun",
> "CDI commun", "CDI commun", "CDI commun", "CDI commun", "CDI commun",
> "CDI commun"), Pays = c("France", "France", "France", "Philippines",
> "Philippines", "Philippines", "France", "France", "France", "France",
> "France", "Martinique", "Martinique", "Martinique", "Martinique",
> "Martinique", "Martinique", "Martinique", "Cameroun", "Cameroun",
> "Cameroun", "Cameroun", "Cameroun", "Cameroun", "Cameroun", "Cameroun",
> "Cameroun", "France", "France", "France", "France", "France",
> "France", "France", "Congo", "Congo", "Congo", "Congo", "Congo",
> "Congo", "Gabon", "Gabon", "Gabon", "Gabon", "Gabon", "Gabon",
> "Congo", "Congo"), Debut = c("24/01/1995", "01/05/1997", "31/12/1997",
> "02/02/1995", "28/02/1995", "01/03/1995", "13/03/1995", "01/01/1996",
> "31/01/1996", "24/01/1995", "01/07/1995", "01/09/1995", "01/07/1997",
> "01/01/1998", "01/08/1998", "01/01/2000", "17/01/2000", "29/02/2000",
> "26/01/1995", "01/07/1996", "16/09/1997", "01/01/1998", "01/07/1998",
> "04/11/1999", "01/01/2001", "01/04/2001", "31/08/2001", "01/09/2001",
> "02/09/2001", "01/12/2001", "01/02/2003", "01/04/2003", "01/01/2004",
> "01/03/2004", "01/09/2004", "01/01/2005", "01/04/2005", "28/10/2006",
> "01/01/2007", "01/04/2007", "01/09/2007", "01/01/2009", "01/04/2009",
> "01/01/2010", "01/01/2011", "01/04/2011", "05/09/2012", "01/01/2013"
> ), Fin = c("30/04/1997", "30/12/1997", "31/12/1997", "27/02/1995",
> "28/02/1995", "12/03/1995", "30/06/1995", "30/01/1996", "31/01/1996",
> "30/06/1995", "31/08/1995", "30/06/1997", "31/12/1997", "31/07/1998",
> "31/12/1999", "16/01/2000", "28/02/2000", "29/02/2000", "30/06/1996",
> "15/09/1997", "31/12/1997", "30/06/1998", "03/11/1999", "31/12/2000",
> "31/03/2001", "30/08/2001", "31/08/2001", "01/09/2001", "30/11/2001",
> "31/01/2003", "31/03/2003", "31/12/2003", "29/02/2004", "31/08/2004",
> "31/12/2004", "31/03/2005", "27/10/2006", "31/12/2006", "31/03/2007",
> "31/08/2007", "31/12/2008", "31/03/2009", "31/12/2009", "31/12/2010",
> "31/03/2011", "04/09/2012", "31/12/2012", "31/12/4712")), .Names =
> c("Matricule",
> "Nom", "Sexe", "DateNaissance", "contrat", "Pays", "Debut", "Fin"
> ), class = "data.frame", row.names = c(NA, -48L))
>
> The dataframe to be obtained is df2
> dput(df2)
> structure(list(Mat = c(1L, 6L, 6L, 6L, 8L, 8L, 934L, 934L, 934L,
> 934L, 934L), Nom = c("VERON", "BENARD", "BENARD", "BENARD", "DALNIC",
> "DALNIC", "FORNI", "FORNI", "FORNI", "FORNI", "FORNI"), Sexe = c("Féminin",
> "Masculin", "Masculin", "Masculin", "Féminin", "Féminin", "Masculin",
> "Masculin", "Masculin", "Masculin", "Masculin"), DateNaissance =
> c("02/09/1935",
> "01/04/1935", "01/04/1935", "01/04/1935", "19/02/1940", "19/02/1940",
> "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961", "10/07/1961"
> ), contrat = c("CDI commun", "CDI commun", "CDI commun", "CDI commun",
> "CDI commun", "CDI commun", "CDD détaché ext. Cirad", "CDI Détachés
> Autres",
> "CDI Détachés Autres", "CDI commun", "CDI commun"), Pays = c("France",
> "Philippines", "France", "France", "France", "Martinique", "Cameroun",
> "France", "Congo", "Gabon", "Congo"), Debut = c("24/01/1995",
> "02/02/1995", "13/03/1995", "01/01/1996", "24/01/1995", "01/09/1995",
> "26/01/1995", "01/09/2001", "01/09/2004", "01/09/2007", "05/09/2012"
> ), Fin = c("31/12/1997", "12/03/1995", "30/06/1995", "31/01/1996",
> "31/08/1995", "29/02/2000", "31/08/2001", "31/08/2004", "31/08/2007",
> "04/09/2012", "31/12/4712")), .Names = c("Mat", "Nom", "Sexe",
> "DateNaissance", "contrat", "Pays", "Debut", "Fin"), class = "data.frame",
> row.names = c(NA,
> -11L))
>
> Thank you for your help
>
> --
> Michel ARNAUD
> Chargé de mission auprès du DRH
> DGDRD-Drh - TA 174/04
> Av Agropolis 34398 Montpellier cedex 5
> tel : 04.67.61.75.38
> fax : 04.67.61.57.87
> port: 06.47.43.55.31
>
> ______________________________**________________
> R-help@r-project.org mailing list
> https://stat.ethz.ch/mailman/**listinfo/r-help<https://stat.ethz.ch/mailman/listinfo/r-help>
> PLEASE do read the posting guide http://www.R-project.org/**
> posting-guide.html <http://www.R-project.org/posting-guide.html>
> and provide commented, minimal, self-contained, reproducible code.
>



-- 
Jim Holtman
Data Munger Guru

What is the problem that you are trying to solve?
Tell me what you want to do, not how you want to do it.

        [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to