hi all, especially dear steve and martin: thx for your kindly help, that lead me finish the first stage of my pipeline; such is coding:
fastqfile="query.fastq" library(ShortRead) reads <- readFastq(fastqfile); ids<- id(reads); seqs <- sread(reads); # first report the data quality qa <- qa(dirPath=".", pattern="query.fastq", type=c("fastq")) # read in fastq file report(qa, dest="qcReport1", type="html") #this is not necessary nCount<-alphabetFrequency(seqs)[,"N"] nDist<- table(nCount) #replaced all the nucleotide residue whose quality score below the cutoff by "N" qualityCutoff <- 20 qual <- PhredQuality(quality(quality(reads))) # get quality score list as PhredQuality myqual_mat <- matrix(charToRaw(as.character(unlist(qual))), nrow=length(qual), byrow=TRUE) # convert quality score to matrix at <- myqual_mat < charToRaw(as.character(PhredQuality(as.integer(qualityCutoff)))) letter_subject <- DNAString(paste(rep.int("N", width(seqs)[1]), collapse="")) letter <- as(Views(letter_subject, start=1, end=rowSums(at)), "DNAStringSet") injectedseqs <- replaceLetterAt(seqs, at, letter) #remove all the "N" at 5' and 3' end, and see the "N" distribution in reads seqsWithoutNend <-trimLRPatterns(Rpattern = letter_subject, Lpattern = letter_subject,subject = injectedseqs) trimCoords<-trimLRPatterns(Rpattern = letter_subject, Lpattern = letter_subject,subject = injectedseqs,ranges=T) nCount<-alphabetFrequency(seqsWithoutNend)[,"N"] nDist<- table(nCount) #from the "N" distribution, decide a cutoff to remove some reads, here the cutoff is set 2 nCutoff=2 middleN <-nCount < nCutoff reads<-reads[middleN] seqs <- seqs[middleN] qual <- qual[middleN] trimCoords<-trimCoords[middleN] seqs <- DNAStringSet(seqs, start=start(trimCoords), end=end(trimCoords)) qual1 <- BStringSet(qual, start=start(trimCoords), end=end(trimCoords)) qual <- SFastqQuality(qual1) # reapply quality score type trimmed <- ShortReadQ(sread=seqs, quality=qual, id=id(reads)) table(width(trimmed)) at last, we get all the high quality reads without "N" at 5' and 3' end [[alternative HTML version deleted]] _______________________________________________ Bioc-sig-sequencing mailing list Bioc-sig-sequencing@r-project.org https://stat.ethz.ch/mailman/listinfo/bioc-sig-sequencing