That's fine. Am I correct that this is the format you want for the output file? nams <- c("fldsampid", "CLP_ID", "sacode", "matrix", "etc.", "CL", "PO4", "SO4AG", "AL", "AS", "B", "V", "Zn", "etc.", "ALK", "ALKB", "ALKC", "SOLID", "DOC", "TOC", "NO3")
It seems a bit suspicious as you have etc. in there twice. In any case if I understand what you want all you need to do is arrange those names in the order you want and put them in a vector call it bb. Then you simply say newxx <- xx[,bb] et voilàI You may want to have a look at indexing in the Introduction to R to get a feeling for what's happening herel Below is a small example. However I don't think your data.frame is quite what you think it is. When I do str(xx) to look at the structure all your variables are being read in as factors, which I suspect is not what you want. R tries to recognize what type of variable is being read in and often seems to decide a character or even a numeric variable is a factor You may want to run the command options(stringsAsFactors = FALSE) before you load the data into the data.frame I hope this is of some help. #=================================================================== df1 <- structure(list(site = c(1, 1, 4, 4, 1, 4), id = structure(c(1L, 2L, 2L, 3L, 1L, 2L), .Label = c("a", "b", "c"), class = "factor"), cata = c(1, 1, 6, 1, 1, NA), catb = c(1, 2, 3, 4, 5, 6), doga = c(3, 5, 3, 6, 4, 0), dogb = c(2, 4, 6, 8, 10, 12), rata = c(NA, 9, 9, 8, 9, 8), ratb = c(1, 2, 3, 4, 5, 6), bata = c(12, 42, NA, 45, 32, 54), batb = c(13, 15, 17, 19, 21, 23)), .Names = c("site", "id", "cata", "catb", "doga", "dogb", "rata", "ratb", "bata", "batb"), row.names = c("aa", "bb", "cc", "dd", "ee", "ff"), class = "data.frame") df1 bb <- c("dogb", "rata", "ratb", "bata", "batb", "site", "id", "cata", "catb", "doga") newdf <- df1[,bb] #================================================================== --- On Mon, 1/10/11, Guy Jett <gj...@itsi.com> wrote: > From: Guy Jett <gj...@itsi.com> > Subject: RE: [R] Help with Data Transformation > To: "John Kane" <jrkrid...@yahoo.ca>, "r-help@r-project.org" > <r-help@r-project.org> > Received: Monday, January 10, 2011, 5:20 PM > Thank you John, > I have put a copy of the dput file below my signature > block. I hope that is what you need as I am unfamiliar > with that function. Note that empty cells need to > become "NA". > The "=" character is part of the "PARVALUE" column. > Yours, > Guy > gj...@itsi.com > > structure(list(X = c(2268L, 2269L, 2270L, 2272L, 2273L, > 2274L, > 2275L, 2276L, 2289L, 2290L, 2291L, 2292L, 2293L, 2294L, > 2295L, > 2296L, 2326L, 2327L, 2328L, 2329L, 2330L, 2331L, 2332L, > 2346L, > 2349L, NA, NA, NA, NA, NA), fldsampid = structure(c(3L, 3L, > 3L, > 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, > 4L, > 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 2L, 3L, 4L), .Label = c("", > "fldsampid", > "LHR020GW-01E2", "LHR020SD-00E2"), class = "factor"), > CLP_ID = structure(c(1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 1L, > 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 3L), .Label > = c("", > "CLP_ID", "MY77J8"), class = "factor"), sacode = > structure(c(2L, > 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, > 2L, > 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 3L, 2L, 2L), .Label > = c("", > "N", "sacode"), class = "factor"), matrix = structure(c(4L, > 4L, > 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, > 3L, > 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 2L, 4L, 3L), .Label = > c("", > "matrix", "SE", "WG"), class = "factor"), etc. = > structure(c(1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L), .Label > = c("", > "etc."), class = "factor"), prccode = structure(c(4L, 4L, > 4L, > 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 8L, 5L, 6L, > 7L, > 5L, 5L, 5L, 5L, 5L, 6L, 1L, 1L, 3L, 2L, NA), .Label = c("", > "<value>", > "CL", "INO", "MET", "MI", "ORG", "SN"), class = "factor"), > Lab = structure(c(4L, > 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, > 4L, > 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 5L, 2L, NA), .Label > = c("", > "<value>", "A4SW", "BRLS", "PO4"), class = "factor"), > EXMCODE = structure(c(5L, > 5L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 5L, 5L, 5L, 5L, 5L, > 4L, > 4L, 5L, 4L, 4L, 4L, 4L, 7L, 4L, 1L, 1L, 6L, 2L, NA), .Label > = c("", > "<value>", "FLDFLT", "METHOD", "NONE", "SO4", > "SW3050B"), class = "factor"), > Analysis = structure(c(13L, 13L, 13L, 10L, > 11L, 11L, 11L, > 11L, 11L, 11L, 11L, 3L, 3L, 3L, 5L, 13L, 9L, > 8L, 15L, 7L, > 12L, 12L, 12L, 14L, 4L, 1L, 1L, 6L, 2L, 2L), > .Label = c("", > "<value>", "A2320", "A2540G", "A5310B", > "AG", "C245.5", "E160.3", > "E1630", "E1631", "E1638", "E200.8", "E300", > "SW6010B", "SW9060" > ), class = "factor"), PARLABEL = > structure(c(10L, 16L, 17L, > 12L, 3L, 4L, 8L, 9L, 20L, 21L, 15L, 5L, 6L, > 7L, 11L, 14L, > 13L, 18L, 19L, 12L, 3L, 8L, 9L, 20L, 18L, 1L, > 1L, 4L, 2L, > 2L), .Label = c("", "<value>", "AG", > "AL", "ALK", "ALKB", > "ALKC", "AS", "B", "CL", "DOC", "HG", "MEHG", > "NO3", "PB", > "PO4", "SO4", "SOLID", "TOC", "V", "Zn"), > class = "factor"), > PARVQ = structure(c(3L, 5L, 3L, 3L, 3L, 3L, > 3L, 3L, 3L, 3L, > 5L, 3L, 3L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, > 3L, 3L, 3L, 3L, > 1L, 1L, 4L, 2L, 2L), .Label = c("", > "<value>", "=", "AS", > "ND"), class = "factor"), Result = > structure(c(12L, 20L, > 11L, 3L, 10L, 8L, 14L, 26L, 9L, 5L, 4L, 25L, > 25L, 13L, 19L, > 24L, 15L, 18L, 16L, 21L, 6L, 22L, 7L, 23L, > 17L, 1L, 1L, 27L, > 2L, 2L), .Label = c("", "<value>", > "0.00171", "0.008", "1.76", > "1050", "11400", "122", "131", "2.57", > "22460", "23590.9", > "2500", "317", "4.28", "4.823", "47.7", > "48.45", "49330", > "50", "5100", "5500", "56900", "792", > "807000", "9970", "B" > ), class = "factor"), X.1 = structure(c(1L, > 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 2L, 2L), > .Label = c("", "<value>", > "V"), class = "factor"), X.2 = > structure(c(1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 2L, NA), > .Label = c("", > "<value>", "Zn"), class = "factor"), > X.3 = structure(c(1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 3L, 2L, 2L), .Label = c("", > "<value>", "etc."), class = "factor"), > X.4 = structure(c(1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 3L, 2L, NA), .Label = c("", > "<value>", "ALK"), class = "factor"), > X.5 = structure(c(1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 3L, 2L, NA), .Label = c("", > "<value>", "ALKB"), class = "factor"), > X.6 = structure(c(1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 3L, 2L, NA), .Label = c("", > "<value>", "ALKC"), class = "factor"), > X.7 = structure(c(1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 3L, 2L, 2L), .Label = c("", > "<value>", "SOLID"), class = "factor"), > X.8 = structure(c(1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 3L, 2L, NA), .Label = c("", > "<value>", "DOC"), class = "factor"), > X.9 = structure(c(1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 3L, 2L, NA), .Label = c("", > "<value>", "TOC"), class = "factor"), > X.10 = structure(c(1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 3L, 2L, NA), .Label = c("", > "<value>", "NO3"), class = "factor")), > .Names = c("X", "fldsampid", > "CLP_ID", "sacode", "matrix", "etc.", "prccode", "Lab", > "EXMCODE", > "Analysis", "PARLABEL", "PARVQ", "Result", "X.1", "X.2", > "X.3", > "X.4", "X.5", "X.6", "X.7", "X.8", "X.9", "X.10"), class = > "data.frame", row.names = c(NA, > -30L)) > > > -----Original Message----- > From: John Kane [mailto:jrkrid...@yahoo.ca] > > Sent: Monday, January 10, 2011 1:43 PM > To: r-help@r-project.org; > Guy Jett > Subject: Re: [R] Help with Data Transformation > > That sample data set is really hard to read. Could > you resent it after having used dput on it? > > A file output with dput is easily read into R and makes > seeing what you need much easier. BTW what are the = > doing? > > Thanks > > > > --- On Mon, 1/10/11, Guy Jett <gj...@itsi.com> > wrote: > > > From: Guy Jett <gj...@itsi.com> > > Subject: [R] Help with Data Transformation > > To: "r-help@r-project.org" > <r-help@r-project.org> > > Received: Monday, January 10, 2011, 3:59 PM Greetings, > I am new to R > > and am having trouble with parsing a file with the > following > > characteristics: > > > > * Individual results > > for a single sample are written to multiple lines. > > > > * First 16 columns > > are constant from sample to sample. > > > > * Remaining 10 need > > to be matched up (cross-tabbed?) > > > > o (the exact contents for the remaining 10 vary > from sample to > > sample, as indicated in the extract > > below) > > > > * Ultimate goal is to > > run various comparisons between the variable columns, > compare samples > > from separate populations, and graph samples from the > separate > > populations. > > > > * (An extract is > > provided below) > > > > The data is initially extracted from an SQL database > into Excel, then > > saved as a tab-delimited text file for use in R. > > I have been successful in using subset() to extract > specific sample > > types, but have not yet been able to transform the > data so that all > > the data needed is on a single line. I have looked > at several R > > manuals, read through 'R in a Nutshell', prowled the > help resources (R > > Site Search and the Google link), tried stack(), > subset(), reshape(), > > and several other functions, to no avail. > > > > Thank you very much for your help. This seems like a > wonderful > > community, Guy Jett, R.G. > > Project Geologist > > gj...@itsi.com<mailto:gj...@itsi.com> > > > > Example Data Input (subset): > > > > fldsampid > > CLP_ID sacode > > matrix etc... > > prccode > > Lab > > EXMCODE > > Analysis > > PARLABEL > > PARVQ Result > > 2268 LHR020GW-01E2 > > > > N > > WG > > > > INO > > BRLS NONE E300 > > CL > > = > > 23590.9 > > 2269 LHR020GW-01E2 > > > > N > > WG > > > > INO > > BRLS NONE E300 > > PO4 ND > > 50 > > 2270 LHR020GW-01E2 > > > > N > > WG > > > > INO > > BRLS NONE E300 > > SO4 = > > 22460 > > 2272 LHR020GW-01E2 > > > > N > > WG > > > > MET > > BRLS FLDFLT > > E1631 HG > > = > > 0.00171 > > 2273 LHR020GW-01E2 > > > > N > > WG > > > > MET > > BRLS FLDFLT > > E1638 AG > > = 2.57 > > 2274 LHR020GW-01E2 > > > > N > > WG > > > > MET > > BRLS FLDFLT > > E1638 AL > > = > > 122 > > 2275 LHR020GW-01E2 > > > > N > > WG > > > > MET > > BRLS FLDFLT > > E1638 AS > > = > > 317 > > 2276 LHR020GW-01E2 > > > > N > > WG > > > > MET > > BRLS FLDFLT > > E1638 B > > = > > 9970 > > 2289 LHR020GW-01E2 > > > > N > > WG > > > > MET > > BRLS FLDFLT > > E1638 V > > = > > 131 > > 2290 LHR020GW-01E2 > > > > N > > WG > > > > MET > > BRLS FLDFLT > > E1638 Zn > > = > > 1.76 > > 2291 LHR020GW-01E2 > > > > N > > WG > > > > MET > > BRLS METHOD > > E1638 > > PB ND > > 0.008 > > 2292 LHR020GW-01E2 > > > > N > > WG > > > > MI > > BRLS NONE A2320 > > ALK = > > 807000 > > 2293 LHR020GW-01E2 > > > > N > > WG > > > > MI > > BRLS NONE A2320 > > ALKB = > > 807000 > > 2294 LHR020GW-01E2 > > > > N > > WG > > > > MI > > BRLS NONE A2320 > > ALKC ND > > 2500 > > 2295 LHR020GW-01E2 > > > > N > > WG > > > > ORG > > BRLS NONE > > A5310B DOC = > > 49330 > > 2296 LHR020GW-01E2 > > > > N > > WG > > > > SN > > BRLS NONE E300 > > NO3 = > > 792 > > 2326 LHR020SD-00E2 > > > > N > > SE > > > > MET > > BRLS METHOD > > E1630 > > MEHG = > > 4.28 > > 2327 LHR020SD-00E2 > > > > N > > SE > > > > MI > > BRLS METHOD > > E160.3 SOLID > > = > > 48.45 > > 2328 LHR020SD-00E2 > > > > N > > SE > > > > ORG > > BRLS NONE > > SW9060 > > TOC = > > 4.823 > > 2329 LHR020SD-00E2 MY77J8 > > N > > SE > > > > MET > > A4SW METHOD > > > > C245.5 HG > > = 5100 > > 2330 LHR020SD-00E2 MY77J8 > > N > > SE > > > > MET > > A4SW METHOD > > > > E200.8 AG > > ND 1050 > > 2331 LHR020SD-00E2 MY77J8 > > N > > SE > > > > MET > > A4SW METHOD > > > > E200.8 AS > > = > > 5500 > > 2332 LHR020SD-00E2 MY77J8 > > N > > SE > > > > MET > > A4SW METHOD > > > > E200.8 B > > = > > 11400 > > 2346 LHR020SD-00E2 MY77J8 > > N > > SE > > > > MET > > A4SW SW3050B > > SW6010B > > V > > = > > 56900 > > 2349 LHR020SD-00E2 MY77J8 > > N > > SE > > > > MI > > A4SW METHOD > > A2540G > > SOLID = > > 47.7 > > > > Desired output: > > > > fldsampid > > CLP_ID sacode > > matrix etc... CL > > PO4 > > SO4 AG > > AL > > AS > > B > > V > > Zn > > etc... ALK > > ALKB ALKC > > SOLID DOC > > TOC NO3 > > > > LHR020GW-01E2 > > > > N > > WG > > > > <value> > > <value> > > <value> > > > > <value> > > <value> > > <value> > > > > <value> > > <value> > > <value> > > > > <value> > > <value> > > <value> > > > > <value> > > <value> > > <value> > > > > <value> > > <value> > > > > LHR020SD-00E2 MY77J8 N > > SE > > > > NA > > NA NA > > <value> > > <value> > > > > <value> > > <value> > > <value> > > NA > > <value> > > NA > > NA NA > > <value> > > NA > > NA > > NA > > > > [[alternative HTML version deleted]] > > > > ______________________________________________ > > R-help@r-project.org > > mailing list > > https://stat.ethz.ch/mailman/listinfo/r-help > > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > > and provide commented, minimal, self-contained, > > reproducible code. > > > > > ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.