Thanks - I checked through and it looks as if all of the geneids are 
formatted similarily so I don't know which one would be causing an error.
Interestingly, your sapply method works on the same data.  So I'm happy 
although still confused, because the strsplit method worked the other 
day with a similarly generated dataset.

I dumped my entire dataframe below.  Incase anyone wants to investigate.

Alison

Rumino_Reps_agreeWalign$geneid.prefix <- sapply(gene.list, "[", 1)
Rumino_Reps_agreeWalign$geneid.suffix <- sapply(gene.list, "[", 2)
 > dput(Rumino_Reps_agreeWalign)
structure(list(geneid = c("657313.locus_tag:RTO_08940", "457412.251848018",
"657314.locus_tag:CK5_20630", "657323.locus_tag:CK1_33060", 
"657313.locus_tag:RTO_09690",
"471875.197297106", "411470.DS231493.G14", "411459.149830627",
"657313.locus_tag:RTO_09720", "411460.145845997", "411459.149831369",
"657321.locus_tag:RBR_01830", "411460.145846414", "457412.251848805",
"657321.locus_tag:RBR_08030", "471875.197296907", "457412.251847995",
"657314.locus_tag:CK5_20840", "411460.145846423", 
"657314.locus_tag:CK5_25030",
"457412.251847990", "471875.197297117", "471875.197299322", 
"411459.149831093",
"411459.149831815", "411460.145846434", "213810.locus_tag:RUM_09700",
"657314.locus_tag:CK5_09460", "657323.locus_tag:CK1_18840", 
"471875.197297108",
"411460.145846680", "411459.149831368", "657314.locus_tag:CK5_19120",
"657321.locus_tag:RBR_09560", "411460.145846435", 
"657323.locus_tag:CK1_11530",
"457412.251850723", "213810.locus_tag:RUM_12960", 
"213810.locus_tag:RUM_14740",
"213810.locus_tag:RUM_07030", "471875.197296936", "411459.149831092",
"471875.197297110", "471875.197298135", "411460.145846430", 
"657314.locus_tag:CK5_20370",
"657313.locus_tag:RTO_09790", "657323.locus_tag:CK1_33050", 
"411460.145846407",
"457412.251849909", "411460.145846340", "657313.locus_tag:RTO_14810",
"457412.251848010", "457412.251850599", "657323.locus_tag:CK1_33200",
"657323.locus_tag:CK1_33190", "213810.locus_tag:RUM_03050", 
"657314.locus_tag:CK5_09880",
"213810.locus_tag:RUM_15180", "657313.locus_tag:RTO_14610", 
"657313.locus_tag:RTO_23930",
"411459.149830473", "657313.locus_tag:RTO_18090", 
"657323.locus_tag:CK1_27940",
"657314.locus_tag:CK5_20720", "411459.149831855", "471875.197297691",
"411459.149833320", "457412.251849358", "657321.locus_tag:RBR_13130",
"411459.149831077", "471875.197297272", "657314.locus_tag:CK5_09370",
"457412.251847994", "411459.149831080", "657314.locus_tag:CK5_20730",
"457412.251850579", "213810.locus_tag:RUM_14870", 
"657321.locus_tag:RBR_01750",
"657313.locus_tag:RTO_09660", "657314.locus_tag:CK5_28910", 
"411460.145846907",
"657313.locus_tag:RTO_09860", "457412.251847996", 
"657323.locus_tag:CK1_38480",
"411460.145846417", "471875.197297592", "411459.149831814", 
"457412.251848016",
"411459.149831804", "657323.locus_tag:CK1_32880", 
"657321.locus_tag:RBR_08130",
"411460.145846429", "657313.locus_tag:RTO_09880", 
"213810.locus_tag:RUM_03410",
"657313.locus_tag:RTO_09740", "657313.locus_tag:RTO_09840", 
"457412.251848009",
"657323.locus_tag:CK1_33090", "657323.locus_tag:CK1_25000", 
"411459.149831095",
"411459.149830934", "457412.251847970", "457412.251848000", 
"657314.locus_tag:CK5_20680",
"411459.149831088", "657323.locus_tag:CK1_19350", 
"657321.locus_tag:RBR_08670",
"471875.197299547", "411459.149831081", "657323.locus_tag:CK1_32550",
"411459.149831091", "657313.locus_tag:RTO_24580", "457412.251848004",
"471875.197297195", "411460.145846602", "657321.locus_tag:RBR_06200",
"213810.locus_tag:RUM_19570", "411460.145846361", "411459.149833804",
"657323.locus_tag:CK1_32930", "471875.197296906", "411459.149831078",
"657321.locus_tag:RBR_09900", "411460.145846496", 
"657321.locus_tag:RBR_08260",
"411459.149833021", "657313.locus_tag:RTO_02600", 
"657323.locus_tag:CK1_33030",
"657313.locus_tag:RTO_09750", "213810.locus_tag:RUM_14790", 
"457412.251848017",
"457412.251848806", "457412.251847640", "657314.locus_tag:CK5_20620",
"411459.149830474", "657323.locus_tag:CK1_11750", 
"213810.locus_tag:RUM_09690",
"457412.251847999", "657321.locus_tag:RBR_05870", "411460.145846409",
"657313.locus_tag:RTO_16220", "657321.locus_tag:RBR_10630", 
"411459.149833026",
"457412.251847997", "657313.locus_tag:RTO_09650", "471875.197297129",
"471875.197297112", "213810.locus_tag:RUM_14720", "457412.251847991",
"657313.locus_tag:RTO_09730", "471875.197297132", 
"657313.locus_tag:RTO_14650",
"411470.DS231491.G186", "457412.251849520", "657323.locus_tag:CK1_04710",
"657323.locus_tag:CK1_04510", "411460.145846182", "411460.145846883",
"657321.locus_tag:RBR_08040", "411459.149833983", "457412.251849519",
"471875.197297124", "457412.251849906", "657321.locus_tag:RBR_08010",
"657321.locus_tag:RBR_03380", "657323.locus_tag:CK1_20230", 
"471875.197297115",
"657323.locus_tag:CK1_13100", "657323.locus_tag:CK1_32950", 
"411460.145846428",
"471875.197297120", "213810.locus_tag:RUM_13040", 
"657314.locus_tag:CK5_25080",
"411459.149831096", "411459.149831090", "411459.149833331", 
"411459.149831370",
"657313.locus_tag:RTO_26330", "411459.149833340", 
"657314.locus_tag:CK5_20590",
"411460.145846458", "471875.197297290", "657313.locus_tag:RTO_09850",
"213810.locus_tag:RUM_12130", "657323.locus_tag:CK1_32910", 
"213810.locus_tag:RUM_09770",
"657313.locus_tag:RTO_09640", "657313.locus_tag:RTO_09830", 
"457412.251849013",
"411460.145847544", "657323.locus_tag:CK1_33040", 
"213810.locus_tag:RUM_23250",
"657314.locus_tag:CK5_20580", "411459.149831082", "471875.197297125",
"657314.locus_tag:CK5_14780", "657321.locus_tag:RBR_03820", 
"213810.locus_tag:RUM_06600",
"657314.locus_tag:CK5_20610", "657321.locus_tag:RBR_08120", 
"657314.locus_tag:CK5_20770",
"471875.197297119", "657313.locus_tag:RTO_10610", 
"657321.locus_tag:RBR_08270",
"657323.locus_tag:CK1_32920", "457412.251849800", "411460.145846603",
"411459.149830653", "411459.149833020", "411459.149831085", 
"411459.149833803",
"657323.locus_tag:CK1_32990", "471875.197297121", "411459.149833164",
"657313.locus_tag:RTO_14600", "457412.251848005", 
"657314.locus_tag:CK5_10670",
"213810.locus_tag:RUM_14730", "411459.149831367", 
"657314.locus_tag:CK5_20640",
"657321.locus_tag:RBR_15140", "411460.145847269", 
"657323.locus_tag:CK1_04820",
"457412.251848673", "471875.197296932", "411459.149831083", 
"657323.locus_tag:CK1_33080",
"657321.locus_tag:RBR_08170", "657314.locus_tag:CK5_20740", 
"657321.locus_tag:RBR_08100",
"657314.locus_tag:CK5_20850", "471875.197297111", 
"657313.locus_tag:RTO_17750",
"471875.197297308", "657314.locus_tag:CK5_00900", 
"657313.locus_tag:RTO_03810",
"471875.197297779", "411460.145848384", "657313.locus_tag:RTO_29320",
"657314.locus_tag:CK5_20780", "471875.197299321", "411460.145846431",
"471875.197298831", "471875.197297131", "657323.locus_tag:CK1_30770",
"457412.251848007", "657314.locus_tag:CK5_25320", 
"213810.locus_tag:RUM_03700",
"657313.locus_tag:RTO_19560", "411460.145846432", 
"657314.locus_tag:CK5_29790",
"411460.145848483", "657323.locus_tag:CK1_32890", "411460.145846406",
"657321.locus_tag:RBR_15270", "657321.locus_tag:RBR_18050", 
"657314.locus_tag:CK5_20650",
"657323.locus_tag:CK1_33210", "411459.149831086", "457412.251847993",
"411459.149831051", "411460.145846418", "657321.locus_tag:RBR_07990",
"411459.149830912", "471875.197298686", "457412.251850588", 
"457412.251848006",
"657314.locus_tag:CK5_17510", "657313.locus_tag:RTO_05370", 
"457412.251849359",
"471875.197297105", "657313.locus_tag:RTO_09820", 
"657323.locus_tag:CK1_25830",
"471875.197297130", "657314.locus_tag:CK5_09290", "457412.251848019",
"471875.197297928", "657314.locus_tag:CK5_14710", "411460.145847612",
"457412.251849367", "657314.locus_tag:CK5_20860", "471875.197297907",
"657321.locus_tag:RBR_07980"), count_Conser = c(7L, 1L, 2L, 1L,
3L, 0L, 1L, 0L, 4L, 0L, 3L, 4L, 1L, 3L, 0L, 5L, 2L, 2L, 1L, 0L,
0L, 2L, 3L, 0L, 2L, 1L, 1L, 4L, 0L, 0L, 0L, 1L, 1L, 5L, 0L, 0L,
2L, 0L, 1L, 1L, 2L, 0L, 1L, 1L, 1L, 3L, 1L, 2L, 0L, 0L, 0L, 1L,
0L, 0L, 2L, 1L, 1L, 0L, 1L, 4L, 0L, 1L, 1L, 4L, 0L, 7L, 0L, 4L,
1L, 1L, 2L, 0L, 1L, 0L, 0L, 2L, 3L, 0L, 4L, 0L, 1L, 0L, 1L, 4L,
1L, 0L, 5L, 4L, 0L, 6L, 2L, 1L, 3L, 1L, 0L, 2L, 3L, 0L, 1L, 12L,
1L, 1L, 2L, 0L, 0L, 2L, 1L, 2L, 1L, 3L, 2L, 0L, 2L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 3L, 0L, 2L, 0L, 1L, 0L, 2L, 1L, 1L, 1L, 1L,
0L, 2L, 0L, 2L, 2L, 5L, 2L, 18L, 0L, 4L, 2L, 0L, 3L, 0L, 1L,
0L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 2L, 0L, 1L, 0L, 1L, 0L, 2L, 0L,
0L, 1L, 1L, 2L, 1L, 0L, 1L, 2L, 1L, 0L, 1L, 1L, 2L, 3L, 2L, 0L,
0L, 0L, 3L, 3L, 1L, 1L, 0L, 0L, 3L, 1L, 1L, 0L, 0L, 1L, 0L, 6L,
0L, 3L, 8L, 1L, 3L, 0L, 0L, 3L, 5L, 0L, 1L, 0L, 0L, 1L, 0L, 4L,
3L, 1L, 2L, 0L, 0L, 0L, 4L, 0L, 6L, 6L, 0L, 1L, 2L, 0L, 2L, 3L,
1L, 3L, 0L, 2L, 4L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 2L, 2L, 2L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 4L, 0L,
0L, 3L, 3L, 1L, 0L, 1L, 1L, 2L, 0L, 0L, 1L, 3L, 0L, 2L, 5L, 0L,
0L, 1L, 0L, 8L, 1L, 8L, 2L, 0L, 1L), count_NonCons = c(5L, 4L,
4L, 0L, 0L, 2L, 0L, 2L, 0L, 2L, 4L, 0L, 0L, 2L, 1L, 1L, 2L, 0L,
0L, 0L, 3L, 1L, 1L, 2L, 1L, 0L, 0L, 4L, 1L, 0L, 4L, 2L, 2L, 15L,
2L, 0L, 2L, 0L, 1L, 0L, 1L, 0L, 3L, 0L, 0L, 8L, 0L, 0L, 0L, 0L,
1L, 2L, 4L, 0L, 0L, 0L, 1L, 3L, 5L, 2L, 0L, 0L, 6L, 0L, 2L, 1L,
1L, 4L, 1L, 4L, 1L, 8L, 5L, 1L, 6L, 1L, 5L, 0L, 11L, 0L, 0L,
0L, 2L, 1L, 0L, 0L, 6L, 1L, 0L, 10L, 2L, 1L, 0L, 1L, 1L, 3L,
2L, 1L, 3L, 4L, 1L, 0L, 12L, 0L, 0L, 1L, 3L, 15L, 9L, 4L, 12L,
2L, 4L, 2L, 0L, 0L, 0L, 2L, 2L, 3L, 1L, 1L, 1L, 0L, 0L, 1L, 0L,
5L, 0L, 0L, 1L, 0L, 3L, 4L, 1L, 1L, 2L, 0L, 0L, 0L, 1L, 3L, 9L,
1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 10L, 2L, 0L, 12L, 0L, 1L,
1L, 2L, 0L, 1L, 1L, 3L, 3L, 1L, 4L, 0L, 2L, 1L, 1L, 4L, 0L, 2L,
5L, 5L, 4L, 0L, 0L, 0L, 2L, 0L, 3L, 0L, 2L, 3L, 2L, 3L, 1L, 4L,
2L, 2L, 0L, 6L, 2L, 1L, 2L, 3L, 0L, 7L, 0L, 0L, 6L, 2L, 2L, 1L,
2L, 0L, 6L, 0L, 0L, 3L, 0L, 0L, 0L, 2L, 2L, 1L, 0L, 2L, 2L, 0L,
0L, 4L, 0L, 2L, 1L, 3L, 2L, 0L, 1L, 0L, 1L, 0L, 6L, 1L, 1L, 1L,
2L, 2L, 4L, 1L, 0L, 0L, 2L, 3L, 2L, 0L, 1L, 0L, 0L, 0L, 1L, 2L,
1L, 0L, 16L, 1L, 3L, 0L, 5L, 10L, 1L, 2L, 4L, 0L, 6L, 0L, 0L,
0L, 1L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 11L, 1L, 4L, 5L, 1L, 1L),
     count_ConsSubst = c(5, 3, 1, 1, 3, 1, 0, 1, 1, 0, 0, 2, 0,
     0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 3, 0, 1, 0, 0,
     0, 6, 1, 1, 1, 0, 0, 0, 1, 2, 1, 0, 0, 4, 0, 0, 1, 0, 0,
     4, 1, 0, 0, 0, 0, 1, 0, 3, 0, 1, 0, 2, 1, 3, 0, 3, 0, 3,
     2, 0, 1, 1, 3, 4, 2, 0, 9, 0, 1, 1, 1, 0, 2, 0, 1, 1, 0,
     1, 1, 3, 0, 2, 0, 1, 0, 2, 2, 1, 3, 0, 6, 0, 0, 0, 2, 7,
     3, 1, 5, 1, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 1, 0,
     0, 0, 1, 0, 0, 3, 1, 0, 1, 1, 2, 0, 2, 0, 5, 2, 0, 0, 0,
     0, 2, 0, 2, 0, 0, 3, 0, 0, 2, 0, 2, 0, 2, 1, 1, 0, 2, 1,
     1, 1, 0, 0, 1, 1, 4, 0, 1, 0, 1, 5, 0, 0, 0, 5, 2, 1, 0,
     0, 1, 0, 0, 0, 4, 0, 2, 1, 1, 1, 2, 1, 1, 1, 4, 1, 2, 1,
     1, 2, 0, 0, 0, 1, 0, 1, 0, 0, 2, 0, 0, 1, 1, 0, 3, 1, 1,
     2, 2, 1, 1, 1, 1, 0, 2, 1, 1, 0, 0, 0, 1, 0, 0, 0, 3, 2,
     0, 1, 1, 0, 0, 0, 0, 2, 1, 1, 0, 0, 0, 0, 0, 3, 1, 0, 0,
     3, 4, 0, 5, 1, 0, 4, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 4,
     1, 4, 0, 0, 0), count_NCSubst = c(1, 0, 0, 0, 1, 1, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
     0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
     0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 2, 0, 0, 1, 0, 0, 1, 0, 0,
     0, 1, 1, 1, 0, 0, 1, 3, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0,
     1, 0, 1, 0, 5, 0, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0,
     0, 1, 1, 1, 0, 2, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0,
     0, 1, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
     1, 0, 1, 0, 0, 0, 1, 0, 2, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0,
     0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0,
     0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
     0, 0, 0, 0, 0, 1, 1, 0, 0, 0)), .Names = c("geneid", "count_Conser",
"count_NonCons", "count_ConsSubst", "count_NCSubst"), class = 
"data.frame", row.names = c(NA,
-284L))

On 04/11/2012 08:01 PM, Jean V Adams wrote:
>
> Alison,
>
> Your code works fine on the first six lines of the data that you 
> provided.
>
> Rumino_Reps_agreeWalign <- data.frame(
>         geneid = c("657313.locus_tag:RTO_08940",
>                 "457412.251848018",
>                 "657314.locus_tag:CK5_20630",
>                 "657323.locus_tag:CK1_33060",
>                 "657313.locus_tag:RTO_09690",
>                 "471875.197297106"),
>         count_Conser = c(7, 1, 2, 1, 3, 0),
>         count_NonCons = c(5, 4, 4, 0, 0, 2),
>         count_ConsSubst = c(5, 3, 1, 1, 3, 1),
>         count_NCSubst = c(1, 0, 0, 0, 1, 1))
> gene.list <- strsplit(as.character(Rumino_Reps_agreeWalign$geneid), 
> "\\.")
> Rumino_Reps_agreeWalignTR <- transform(Rumino_Reps_agreeWalign,
>         taxid=do.call(rbind, gene.list))
>
> Perhaps in later rows of the data there are cases where there is no 
> "." in geneid?  If not, can you provide a subset of your data that 
> results in the warning?  Use the dput() function.
>
> It's not a good idea to create an object named "strsplit".  That will 
> only mask the function strsplit() in later runs.
>
> If time is an issue, a slightly faster way to do this, after the 
> strsplit() function is:
> Rumino_Reps_agreeWalign$geneid.prefix <- sapply(gene.list, "[", 1)
> Rumino_Reps_agreeWalign$geneid.suffix <- sapply(gene.list, "[", 2)
>
> Jean
>
>
> alison waller wrote on 04/11/2012 08:23:29 AM:
>
> > Dear all,
> >
> > I want to use string split to parse column names, however, I am having
> > some errors that I don't understand.
> > I see a problem when I try to rbind the output from strsplit.
> >
> > please let me know if I'm missing something obvious,
> >
> > thanks,
> > alison
> >
> > here are my commands:
> > >strsplit<-strsplit(as.character(Rumino_Reps_agreeWalign$geneid),"\\.")
> > >
> > Rumino_Reps_agreeWalignTR<-transform
> > (Rumino_Reps_agreeWalign,taxid=do.call(rbind,
> > strsplit))
> > Warning message:
> > In function (..., deparse.level = 1)  :
> >    number of columns of result is not a multiple of vector length 
> (arg 1)
> >
> >
> > here is my data:
> >
> > > head(Rumino_Reps_agreeWalign)
> >                        geneid count_Conser count_NonCons count_ConsSubst
> > 1 657313.locus_tag:RTO_08940            7             5               5
> > 2           457412.251848018            1             4               3
> > 3 657314.locus_tag:CK5_20630            2             4               1
> > 4 657323.locus_tag:CK1_33060            1             0               1
> > 5 657313.locus_tag:RTO_09690            3             0               3
> > 6           471875.197297106            0             2               1
> >    count_NCSubst
> > 1             1
> > 2             0
> > 3             0
> > 4             0
> > 5             1
> > 6             1
> >
> > here are the results from strsplit:
> > > head(strsplit)
> > [[1]]
> > [1] "657313"              "locus_tag:RTO_08940"
> >
> > [[2]]
> > [1] "457412"    "251848018"
> >
> > [[3]]
> > [1] "657314"              "locus_tag:CK5_20630"
> >
> > [[4]]
> > [1] "657323"              "locus_tag:CK1_33060"
> >
> > [[5]]
> > [1] "657313"              "locus_tag:RTO_09690"
> >
> > [[6]]
> > [1] "471875"    "197297106"


        [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to