George, You are very close. Try this ...
# make Debitor a character variable in the data frame ds_example$Debitor <- as.character(ds_example$Debitor) duplicates <- duplicated(ds_example$Debitor) duplicated_debitors <- unique(ds_example$Debitor[duplicates]) ds_duplicates <- ds_example[ds_example$Debitor %in% duplicated_debitors, ] Jean On Mon, May 23, 2016 at 8:28 AM, <g.maub...@weinwolf.de> wrote: > # Hi All, > # > # I have the following data frame (example): > > Debitor <- c("968691", "968691", "968691", > "A04046", "A04046", > "L0006", "L0006", "L0006", > "L0023", "L0023", > "L0056", "L0056", > "L0094", "L0094", "L0094", > "L0124", "L0124", > "L0143", > "L0170", > "13459", > "473908", > "394704", > "4711", > "4712", > "4713") > Debitor <- as.character(Debitor) > var1 <- c(11, 12, 13, > 14, 14, > 12, 13, 14, > 10, 11, > 12, 12, > 12, 12, 12, > 15, 17, > 11, > 14, > 12, > 17, > 13, > 15, > 16, > 11) > ds_example <- data.frame(Debitor, var1) > ds_example$case_id <- 1:nrow(ds_example) > ds_example <- ds_example[, sort(colnames(ds_example))] > ds_example > > # I would like to generate a data frame that contains the duplicates AND > the > # corresponding non-duplicates to the duplicates. > # For example, finding the duplicates with deliver case 2 and 3 but the > list > # should also contain case 1 because case 1 is the corresponding case to > the > # duplicate cases 2 and 3. > # For the whole example dataset that would be: > needed <- c(1, 1, 1, > 1, 1, > 1, 1, 1, > 1, 1, > 1, 1, > 1, 1, 1, > 1, 1, > 0, 0, 0, 0, 0, 0, 0, 0) > needed <- as.logical(needed) > ds_example <- data.frame(ds_example, needed) > ds_example > > # To find the duplicates and the corresponding non-duplicates > duplicates <- duplicated(ds_example$Debitor) > > list_of_duplicated_debitors <- as.character(ds_example[duplicates, > "Debitor"]) > > filter_variable <- unique(list_of_duplicated_debitors) > > ds_duplicates <- ds_example["Debitor" == filter_variable] # Result: > dataset with 0 columns > > ds_duplicates <- ds_example["Debitor"] %in% filter_variable # Result: > FALSE > > # How can I create a dataset like this > > ds_example <- ds_example[needed, ] > ds_example > > # using the Debitor IDs? > > Kind regards > > Georg Maubach > > ______________________________________________ > R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide > http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. > [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.