> > Hello. > > Now i tried to do what you told me. > I used the str(fuction), and data$date1 and data3$date1 where both listed
You have no date1 only date. Therefore result<- merge(data, data3, by=c("date", "name"), all=T) takes all values from both data frames > dim(data) [1] 20 15 > dim(data3) [1] 20 9 alltogether 24 columns from which 4 are date and name columns therefore 20 columns contain data. > dim(result) [1] 40 22 So the result has all 20 columns from both data frames plus one name and one date column and all rows from both data frames = 40. Those two sets are disjoint. If you had some common date and name in both data frames these rows would be merged on the same row in result. Let us try this. > data3$name[1:5] <- data$name[1:5] > data3$date[3:5] <- data$date[3:5] result<- merge(data, data3, by=c("date", "name"), all=T) dim(result) [1] 37 22 Regards Petr > "character". I changed "name" to character but it did not work either. > I also changed all variables to character, with no positive result. > > str(data) > 'data.frame': 14446 obs. of 15 variables: > $ id : chr "1" "1" "1" "1" ... > $ compid : chr "2514" "2514" "2514" "2514" ... > $ secid : chr "15856" "15856" "15856" "15856" ... > $ name : chr "A-pressen" "A-pressen" "A-pressen" "A-pressen" ... > $ period : chr "1" "2" "3" "4" ... > $ date : chr "17.05.1980" "17.05.1981" "17.05.1982" "17.05.1983" ... > $ enddate: chr "17.05.1981" "17.05.1982" "17.05.1983" "17.05.1984" ... > $ div : chr NA NA NA NA ... > $ ndivs : chr NA NA NA NA ... > $ posdiv : chr NA NA NA NA ... > $ ddiv2 : chr NA NA NA NA ... > $ ddiv3 : chr NA NA NA NA ... > $ ddiv4 : chr NA NA NA NA ... > $ ddiv5 : chr NA NA NA NA ... > $ ddiv6 : chr NA NA NA NA ... > > str(data3) > 'data.frame': 812354 obs. of 9 variables: > $ date : chr "02.01.1996" "03.01.1996" "04.01.1996" > "05.01.1996" ... > $ Securityid : chr "6001" "6001" "6001" "6001" ... > $ Symbol : chr "AAV" "AAV" "AAV" "AAV" ... > $ name : chr "Adresseavisen" "Adresseavisen" > "Adresseavisen" "Adresseavisen" ... > $ Securitytype : chr "Ordinary Shares" "Ordinary Shares" > "Ordinary Shares" "Ordinary Shares" ... > $ Unadjusted : chr "200" "200" "200" "200" ... > $ Event.adjusted : chr "200" "200" "200" "200" ... > $ Div.and.Event.adjusted: chr "109,7595375" "109,7595375" "109,7595375" > "109,7595375" ... > $ Sharesissued : chr "1901646" "1901646" "1901646" "1901646" ... > > Here is some suitable data for "data" > > > dput(data[1:20,]) > > structure(list(id = c("1", "1", "1", "1", "1", "1", "1", "1", > "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"), > compid = c("2514", "2514", "2514", "2514", "2514", "2514", > "2514", "2514", "2514", "2514", "2514", "2514", "2514", "2514", > "2514", "2514", "2514", "2514", "2514", "2514"), secid = c("15856", > "15856", "15856", "15856", "15856", "15856", "15856", "15856", > "15856", "15856", "15856", "15856", "15856", "15856", "15856", > "15856", "15856", "15856", "15856", "15856"), name = c("A-pressen", > "A-pressen", "A-pressen", "A-pressen", "A-pressen", "A-pressen", > "A-pressen", "A-pressen", "A-pressen", "A-pressen", "A-pressen", > "A-pressen", "A-pressen", "A-pressen", "A-pressen", "A-pressen", > "A-pressen", "A-pressen", "A-pressen", "A-pressen"), period = c("1", > "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", > "13", "14", "15", "16", "17", "18", "19", "20"), date = c("17.05.1980", > "17.05.1981", "17.05.1982", "17.05.1983", "17.05.1984", "17.05.1985", > "17.05.1986", "17.05.1987", "17.05.1988", "17.05.1989", "17.05.1990", > "17.05.1991", "17.05.1992", "17.05.1993", "17.05.1994", "17.05.1995", > "17.05.1996", "17.05.1997", "17.05.1998", "17.05.1999"), > enddate = c("17.05.1981", "17.05.1982", "17.05.1983", "17.05.1984", > "17.05.1985", "17.05.1986", "17.05.1987", "17.05.1988", "17.05.1989", > "17.05.1990", "17.05.1991", "17.05.1992", "17.05.1993", "17.05.1994", > "17.05.1995", "17.05.1996", "17.05.1997", "17.05.1998", "17.05.1999", > "17.05.2000"), div = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, > "0", "0", "0", "0", "0", "5", "0", "1.1", "1.2", "1", "0" > ), ndivs = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, "0", "0", > "0", "0", "0", "1", "0", "1", "1", "1", "0"), posdiv = c(NA, > NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "1", > NA, "1", "1", "1", NA), ddiv2 = c(NA, NA, NA, NA, NA, NA, > NA, NA, NA, NA, "0", "0", "0", "0", "0", NA, "0", "1", NA, > NA), ddiv3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, > "0", "0", "0", "0", "0", "0", "0", "0", "-1"), ddiv4 = c(NA, > NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "0", "0", "0", > "0", "0", "0", "0", "0"), ddiv5 = c(NA, NA, NA, NA, NA, NA, > NA, NA, NA, NA, NA, NA, NA, "0", "0", "0", "0", "0", "0", > "0"), ddiv6 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, > NA, NA, NA, "0", "0", "0", "0", "0", "0")), .Names = c("id", > "compid", "secid", "name", "period", "date", "enddate", "div", > "ndivs", "posdiv", "ddiv2", "ddiv3", "ddiv4", "ddiv5", "ddiv6" > ), row.names = c(NA, 20L), class = "data.frame") > > > > > Here is some suitable data for "data3": > > > dput(data3[1:20,]) > > structure(list(date = c("02.01.1996", "03.01.1996", "04.01.1996", > "05.01.1996", "08.01.1996", "09.01.1996", "10.01.1996", "11.01.1996", > "12.01.1996", "15.01.1996", "16.01.1996", "17.01.1996", "18.01.1996", > "19.01.1996", "22.01.1996", "23.01.1996", "24.01.1996", "25.01.1996", > "26.01.1996", "29.01.1996"), Securityid = c("6001", "6001", "6001", > "6001", "6001", "6001", "6001", "6001", "6001", "6001", "6001", > "6001", "6001", "6001", "6001", "6001", "6001", "6001", "6001", > "6001"), Symbol = c("AAV", "AAV", "AAV", "AAV", "AAV", "AAV", > "AAV", "AAV", "AAV", "AAV", "AAV", "AAV", "AAV", "AAV", "AAV", > "AAV", "AAV", "AAV", "AAV", "AAV"), name = c("Adresseavisen", > "Adresseavisen", "Adresseavisen", "Adresseavisen", "Adresseavisen", > "Adresseavisen", "Adresseavisen", "Adresseavisen", "Adresseavisen", > "Adresseavisen", "Adresseavisen", "Adresseavisen", "Adresseavisen", > "Adresseavisen", "Adresseavisen", "Adresseavisen", "Adresseavisen", > "Adresseavisen", "Adresseavisen", "Adresseavisen"), Securitytype = > c("Ordinary Shares", > "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", > "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", > "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", > "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", > "Ordinary Shares", "Ordinary Shares", "Ordinary Shares"), Unadjusted = > c("200", > "200", "200", "200", "200", "200", "200", "200", "200", "200", > "200", "200", "200", "200", "200", "200", "200", "200", "200", > "200"), Event.adjusted = c("200", "200", "200", "200", "200", > "200", "200", "200", "200", "200", "200", "200", "200", "200", > "200", "200", "200", "200", "200", "200"), Div.and.Event.adjusted = > c("109,7595375", > "109,7595375", "109,7595375", "109,7595375", "109,7595375", "109,7595375", > "109,7595375", "109,7595375", "109,7595375", "109,7595375", "109,7595375", > "109,7595375", "109,7595375", "109,7595375", "109,7595375", "109,7595375", > "109,7595375", "109,7595375", "109,7595375", "109,7595375"), > Sharesissued = c("1901646", "1901646", "1901646", "1901646", > "1901646", "1901646", "1901646", "1901646", "1901646", "1901646", > "1901646", "1901646", "1901646", "1901646", "1901646", "1901646", > "1901646", "1901646", "1901646", "1901646")), .Names = c("date", > "Securityid", "Symbol", "name", "Securitytype", "Unadjusted", > "Event.adjusted", "Div.and.Event.adjusted", "Sharesissued"), row.names = > c(NA, > 20L), class = "data.frame") > > > > When I run the function: > > data4<-merge(data,data3, by=c("name","date1"), all=T) > > > dput(data4[1:20,]) > > structure(list(name = c("A-pressen", "A-pressen", "A-pressen", > "A-pressen", "A-pressen", "A-pressen", "A-pressen", "A-pressen", > "A-pressen", "A-pressen", "A-pressen", "A-pressen", "A-pressen", > "A-pressen", "A-pressen", "A-pressen", "A-pressen", "A-pressen", > "A-pressen", "A-pressen"), date = c("01.02.1999", "01.02.2000", > "01.02.2001", "01.02.2002", "01.03.1999", "01.03.2000", "01.03.2001", > "01.03.2002", "01.04.2003", "01.06.1999", "01.06.2001", "01.07.1999", > "01.07.2002", "01.07.2003", "01.08.2000", "01.08.2001", "01.08.2002", > "01.08.2003", "01.09.1999", "01.09.2000"), id = c(NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_), > compid = c(NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_ > ), secid = c(NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_), period = c(NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_), enddate = c(NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_), div = c(NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_), ndivs = c(NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_), posdiv = c(NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_), ddiv2 = c(NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_), ddiv3 = c(NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_), ddiv4 = c(NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_), ddiv5 = c(NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_), ddiv6 = c(NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_, NA_character_, > NA_character_, NA_character_, NA_character_), Securityid = c("15856", > "15856", "15856", "15856", "15856", "15856", "15856", "15856", > "15856", "15856", "15856", "15856", "15856", "15856", "15856", > "15856", "15856", "15856", "15856", "15856"), Symbol = c("APR", > "APR", "APR", "APR", "APR", "APR", "APR", "APR", "APR", "APR", > "APR", "APR", "APR", "APR", "APR", "APR", "APR", "APR", "APR", > "APR"), Securitytype = c("Ordinary Shares", "Ordinary Shares", > "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", > "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", > "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", > "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", > "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", > "Ordinary Shares", "Ordinary Shares", "Ordinary Shares"), > Unadjusted = c("120", "140", "160", "105", "110", "190", > "160", "112", "115", "120", "150", "127", "106,5", "154", > "155", "160", "111", "155", "127", "205"), Event.adjusted = c("120", > "140", "160", "105", "110", "190", "160", "112", "115", "120", > "150", "127", "106,5", "154", "155", "160", "111", "155", > "127", "205"), Div.and.Event.adjusted = c("111,4092308", > "129,9774359", "148,545641", "100,1538462", "102,1251282", > "176,3979487", "148,545641", "106,8307692", "109,6923077", > "111,4092308", "143,0769231", "117,9081026", "101,5846154", > "154", "143,9035897", "152,6153846", "105,8769231", "155", > "117,9081026", "190,3241026"), Sharesissued = c("8839643", > "8839643", "8854307", "8866191", "8839643", "8839643", "8854307", > "8867791", "8885537", "8839643", "8866191", "8839643", "8885537", > "8903842", "8854307", "8866191", "8885537", "8903842", "8839643", > "8854307")), .Names = c("name", "date", "id", "compid", "secid", > "period", "enddate", "div", "ndivs", "posdiv", "ddiv2", "ddiv3", > "ddiv4", "ddiv5", "ddiv6", "Securityid", "Symbol", "Securitytype", > "Unadjusted", "Event.adjusted", "Div.and.Event.adjusted", "Sharesissued" > ), row.names = c(NA, 20L), class = "data.frame") > > > If I want to just keep the observations from "data" (14446 observations) > merged with "data3". Is it correct to use: > data4<-merge(data,data3, by=c("name","date1"), all=T) > or should I use: > data4<-merge(data,data3, by=c("name","date1"), all.x=F)? > (Then I get ca 14000 obs, but "NA" in all variables from "data3". > > In advance, thank you. > > > > -- > View this message in context: http://r.789695.n4.nabble.com/merging-two- > dataframes-tp3932869p3940157.html > Sent from the R help mailing list archive at Nabble.com. > > ______________________________________________ > R-help@r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.