Good Afternoon Jia:

Please note the sequence of code generated by this script attached below.

--Hiram

Zeng, Jia wrote:
> Hi Katrina,
> 
> Thank you very much for your helpful response. I did set up the kent source 
> tree and located the perl script. 
> I also looked up the file /kent/src/hg/makeDb/doc/hg19.txt to grep the 
> particular command you guys used for 
> generating the rbest chain:
> 
> /cluster/bin/scripts/doRecipBest.pl -workhorse=`uname -n` -stop=recipBest 
> -buildDir=/hive/data/genomes/hg19/bed/lastz.$db hg19 $db >& $db.recipBest.log
> 
> I scanned the perl script briefly and thought that I roughly understood the 
> basic usage of the tool. Then I tried
> to do the following to construct a rbest chain of panTro3 vs rheMac2:
> 
> ./doRecipBest.pl -stop=recipBest -buildDir=~/data/panTro3 ~/data/panTro3 
> ~/data/rheMac2
> 
> where the directory ~/data/panTro3 contains the file: 
> panTro3.rheMac2.all.chain and the directory ~/data/rheMac2 contains the file: 
> rheMac2.panTro3.all.chain
> Both are downloaded from the UCSC golden path repository. 
> 
> But somehow I think providing the chain files only is not sufficient for the 
> doRecipBest script to work out. 
> Could you please let me know what files are assumed to exist in these 
> directories? And what is the buildDir in relation to the tDb and gDb?
> 
> Thank you.
>  
> Best,
> - Jia

#!/bin/csh -efx
# This script nets in both directions to get reciprocal best chains and nets.
# This script will fail if any of its commands fail.

cd /hive/data/genomes/hg19/bed/lastzPanTro3.2011-02-22/axtChain

# Swap hg19-best chains to be panTro3-referenced:
chainStitchId hg19.panTro3.over.chain.gz stdout \
| chainSwap stdin stdout \
| chainSort stdin panTro3.hg19.tBest.chain

# Net those on panTro3 to get panTro3-ref'd reciprocal best net:
chainPreNet panTro3.hg19.tBest.chain \
   /hive/data/genomes/{panTro3,hg19}/chrom.sizes stdout \
| chainNet -minSpace=1 -minScore=0 \
   stdin /hive/data/genomes/{panTro3,hg19}/chrom.sizes stdout /dev/null \
| netSyntenic stdin stdout \
| gzip -c > panTro3.hg19.rbest.net.gz

# Extract panTro3-ref'd reciprocal best chain:
netChainSubset panTro3.hg19.rbest.net.gz panTro3.hg19.tBest.chain stdout \
| chainStitchId stdin stdout \
| gzip -c > panTro3.hg19.rbest.chain.gz

# Swap to get hg19-ref'd reciprocal best chain:
chainSwap panTro3.hg19.rbest.chain.gz stdout \
| chainSort stdin stdout \
| gzip -c > hg19.panTro3.rbest.chain.gz

# Net those on hg19 to get hg19-ref'd reciprocal best net:
chainPreNet hg19.panTro3.rbest.chain.gz \
   /hive/data/genomes/{hg19,panTro3}/chrom.sizes stdout \
| chainNet -minSpace=1 -minScore=0 \
   stdin /hive/data/genomes/{hg19,panTro3}/chrom.sizes stdout /dev/null \
| netSyntenic stdin stdout \
| gzip -c > hg19.panTro3.rbest.net.gz

# Clean up the one temp file and make md5sum:
rm panTro3.hg19.tBest.chain
md5sum *.rbest.*.gz > md5sum.rbest.txt

# Create files for testing coverage of *.rbest.*.
netToBed -maxGap=1 panTro3.hg19.rbest.net.gz panTro3.hg19.rbest.net.bed
netToBed -maxGap=1 hg19.panTro3.rbest.net.gz hg19.panTro3.rbest.net.bed
chainToPsl panTro3.hg19.rbest.chain.gz \
   /hive/data/genomes/{panTro3,hg19}/chrom.sizes \
   /hive/data/genomes/panTro3/panTro3.2bit /hive/data/genomes/hg19/hg19.2bit \
   panTro3.hg19.rbest.chain.psl
chainToPsl hg19.panTro3.rbest.chain.gz \
   /hive/data/genomes/{hg19,panTro3}/chrom.sizes \
   /hive/data/genomes/hg19/hg19.2bit /hive/data/genomes/panTro3/panTro3.2bit \
   hg19.panTro3.rbest.chain.psl

# Verify that all coverage figures are equal:
set tChCov = `awk '{print $19;}' hg19.panTro3.rbest.chain.psl | sed -e 
's/,/\n/g' | awk 'BEGIN {N = 0;} {N += $1;} END {printf 
"%d\n", N;}'`
set qChCov = `awk '{print $19;}' panTro3.hg19.rbest.chain.psl | sed -e 
's/,/\n/g' | awk 'BEGIN {N = 0;} {N += $1;} END {printf 
"%d\n", N;}'`
set tNetCov = `awk 'BEGIN {N = 0;} {N += ($3 - $2);} END {printf "%d\n", N;}' 
hg19.panTro3.rbest.net.bed`
set qNetCov = `awk 'BEGIN {N = 0;} {N += ($3 - $2);} END {printf "%d\n", N;}' 
panTro3.hg19.rbest.net.bed`
if ($tChCov != $qChCov) then
   echo "Warning: hg19 rbest chain coverage $tChCov != panTro3 $qChCov"
endif
if ($tNetCov != $qNetCov) then
   echo "Warning: hg19 rbest net coverage $tNetCov != panTro3 $qNetCov"
endif
if ($tChCov != $tNetCov) then
   echo "Warning: hg19 rbest chain coverage $tChCov != net cov $tNetCov"
endif

mkdir experiments
mv *.bed *.psl experiments
# Make rbest net axt's download: one .axt per hg19 seq.
netSplit hg19.panTro3.rbest.net.gz rBestNet
chainSplit rBestChain hg19.panTro3.rbest.chain.gz
cd ..
mkdir axtRBestNet
foreach f (axtChain/rBestNet/*.net)
     netToAxt $f axtChain/rBestChain/$f:t:r.chain \
     /hive/data/genomes/hg19/hg19.2bit /hive/data/genomes/panTro3/panTro3.2bit 
stdout \
     | axtSort stdin stdout \
     | gzip -c > axtRBestNet/$f:t:r.hg19.panTro3.net.axt.gz
   end

# Make rbest mafNet for multiz: one .maf per hg19 seq.
mkdir mafRBestNet
foreach f (axtRBestNet/*.hg19.panTro3.net.axt.gz)
     axtToMaf -tPrefix=hg19. -qPrefix=panTro3. $f \
         /hive/data/genomes/{hg19,panTro3}/chrom.sizes \
             stdout \
       | gzip -c > mafRBestNet/$f:t:r:r:r:r:r.maf.gz
end

_______________________________________________
Genome maillist  -  [email protected]
https://lists.soe.ucsc.edu/mailman/listinfo/genome

Reply via email to