Hello,
I am trying to read in elements out of a very large binary file ... the total file is 4 gigs. I want to select rows out of the file, and the current procedure I run works but is prohibitively slow (takes more than a day to run and still won't complete). Is there any faster way to accomplish this? My current procedure looks like this: readHH <- function(file_name, hhid_list) { incon=file(file_name, open="rb") result=data.frame() tran=list() byte_mark=0 last_1M_mod=0 file_size=file.info(file_name)$size write.table(paste("Data pulled from", file_name, sep=" "), file="readHH_output.txt", sep=",", row.names=FALSE, col.names=FALSE, append=TRUE) while (TRUE) { tran$hh_id <- readBin(incon,integer(),1,size=4) if(is.element(tran$hh_id, hhid_list)) { tran$prov_id <- readBin(incon,integer(),1,size=2) tran$txn_dn <- readBin(incon,integer(),1,size=2) tran$total_dollars <- readBin(incon,integer(),1,size=4) tran$total_items <- readBin(incon,integer(),1,size=4) tran$order_id <- readBin(incon,integer(),1,size=4) tran$txn_type <- readChar(incon,1) tran$gender <- readChar(incon,1) tran$zip_code <- readChar(incon,5) tran$region_code <- readChar(incon,1) tran$county_code <- readChar(incon,1) tran$state_abbrev <- readChar(incon,2) tran$channel_code <- readChar(incon,1) tran$source_code <- readChar(incon,20) tran$payment_type <- readChar(incon,1) tran$credit_card <- readChar(incon,1) tran$promo_type <- readChar(incon,1) tran$flags <- readChar(incon,1) write.table(data.frame(tran), file="readHH_output", sep=",", row.names=FALSE, col.names=FALSE, append=TRUE) result <- rbind(result,data.frame(tran)) } else { byte_mark=byte_mark+42 if (byte_mark>=file_size) {break} else {seek(incon, where=byte_mark)} } } return(result) } Thanks Matt Matt Anthony | Senior Statistician| 303.327.1761 | [EMAIL PROTECTED] 10155 Westmoor Drive | Westminster, CO 80021 | FAX 303.327.1650 [[alternative HTML version deleted]] ______________________________________________ R-help@stat.math.ethz.ch mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.