[gentoo-commits] proj/sci:master commit in: sci-biology/SEECER/files/, sci-biology/SEECER/

Martin Mokrejs Tue, 21 Nov 2017 08:12:06 -0800

commit:     aab87747b89b64107677056a3d4874d8f5ee7bbf
Author:     Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
AuthorDate: Tue Nov 21 16:11:28 2017 +0000
Commit:     Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz>
CommitDate: Tue Nov 21 16:11:28 2017 +0000
URL:        https://gitweb.gentoo.org/proj/sci.git/commit/?id=aab87747


sci-biology/SEECER: execute jellyfish1 instead of jellyfish

Also I wrote a few cleanup patches to expose THREADS
variable and cleanup the code.

Package-Manager: Portage-2.3.14, Repoman-2.3.6

 sci-biology/SEECER/SEECER-0.1.3-r2.ebuild          |  7 ++-
 .../SEECER/files/rename_jellyfish_binary.patch     | 11 ++++
 sci-biology/SEECER/files/run_jellyfish.sh.patch    | 72 ++++++++++++++++++++++
 sci-biology/SEECER/files/run_seecer.sh.patch       | 42 +++++++++++++
 4 files changed, 130 insertions(+), 2 deletions(-)

diff --git a/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild 
b/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild
index 60862d8c8..0b7ec3bfa 100644
--- a/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild
+++ b/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild
@@ -5,7 +5,7 @@ EAPI=6
 
 inherit eutils
 
-DESCRIPTION="SEquencing Error Corrector for RNA-Seq reads"
+DESCRIPTION="SEquence Error Corrector for RNA-Seq reads"
 HOMEPAGE="http://sb.cs.cmu.edu/seecer/";
 SRC_URI="
        http://sb.cs.cmu.edu/seecer/downloads/"${P}".tar.gz
@@ -22,12 +22,15 @@ DEPEND="
        sci-libs/gsl:0=
        sci-biology/seqan:0="
 RDEPEND="${DEPEND}
-       =sci-biology/jellyfish-1.1.11"
+       =sci-biology/jellyfish-1.1.11-r1"
 
 S="${S}"/SEECER
 
 PATCHES=(
        "${FILESDIR}"/remove-hardcoded-paths.patch
+       "${FILESDIR}"/run_seecer.sh.patch
+       "${FILESDIR}"/run_jellyfish.sh.patch
+       "${FILESDIR}"/rename_jellyfish_binary.patch
 )
 
 src_prepare(){

diff --git a/sci-biology/SEECER/files/rename_jellyfish_binary.patch 
b/sci-biology/SEECER/files/rename_jellyfish_binary.patch
new file mode 100644
index 000000000..c6548cee1
--- /dev/null
+++ b/sci-biology/SEECER/files/rename_jellyfish_binary.patch
@@ -0,0 +1,11 @@
+--- SEECER/bin/run_seecer.sh.ori       2017-11-21 16:56:28.808767468 +0100
++++ SEECER/bin/run_seecer.sh   2017-11-21 16:57:07.469835728 +0100
+@@ -26,7 +26,7 @@
+ 
+ 
+ BINDIR='' #this can be hardcoded to /absolute/path/to/SEECER/bin/
+-JF="jellyfish"    #this may be hardcoded to /absolute/path/to/jellyfish/bin/
++JF="jellyfish1"    #this may be hardcoded to 
/absolute/path/to/jellyfish/bin/jellyfish
+ 
+ K=17
+ SEECER_PARAMS=""

diff --git a/sci-biology/SEECER/files/run_jellyfish.sh.patch 
b/sci-biology/SEECER/files/run_jellyfish.sh.patch
new file mode 100644
index 000000000..7631f5a4c
--- /dev/null
+++ b/sci-biology/SEECER/files/run_jellyfish.sh.patch
@@ -0,0 +1,72 @@
+--- SEECER-0.1.3/bin/run_jellyfish.sh.ori      2017-11-21 16:41:54.164599838 
+0100
++++ SEECER-0.1.3/bin/run_jellyfish.sh  2017-11-21 16:46:28.022166903 +0100
+@@ -1,18 +1,45 @@
+ #!/bin/bash
++
++# Usage: run_jellyfish.sh jellyfish_binpath tempfile_prefix kmersize mincount 
tmpdir infile1 [infile2] threads
+ JF=$1
+ LCOUNT=$4
+ TMPDIR=$5
++THREADS=${8:-32}
+ 
+ if [ -z "$JF" ]; then
+     echo "No path to jellyfish binary provided, exiting.";
+     exit 255;
+ fi
+ 
++# Usage: jellyfish count [options] file:path+
++#
++# Count k-mers or qmers in fasta or fastq files
++# 
++# Options (default value in (), *required):
++#  -m, --mer-len=uint32                    *Length of mer
++#  -s, --size=uint64                       *Hash size
++#  -t, --threads=uint32                     Number of threads (1)
++#  -o, --output=string                      Output prefix (mer_counts)
++#  -c, --counter-len=Length in bits         Length of counting field (7)
++#      --out-counter-len=Length in bytes    Length of counter field in output 
(4)
++#  -C, --both-strands                       Count both strand, canonical 
representation (false)
++#  -p, --reprobes=uint32                    Maximum number of reprobes (62)
++#  -r, --raw                                Write raw database (false)
++#  -q, --quake                              Quake compatibility mode (false)
++#      --quality-start=uint32               Starting ASCII for quality values 
(64)
++#      --min-quality=uint32                 Minimum quality. A base with 
lesser quality becomes an N (0)
++#  -L, --lower-count=uint64                 Don't output k-mer with count < 
lower-count
++#  -U, --upper-count=uint64                 Don't output k-mer with count > 
upper-count
++#      --invalid-char=warn|ignore|error     How to treat invalid characters. 
The char is changed to a N. (warn)
++#      --matrix=Matrix file                 Hash function binary matrix
++#      --timing=Timing file                 Print timing information
++#      --stats=Stats file                   Print stats
++#
+ if [ "$#" -eq "4" ];
+ then
+-$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t 32 --both-strands $6 || 
exit 255
++$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t $THREADS --both-strands 
$6 || exit 255
+ else
+-$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t 32 --both-strands $6 $7 
|| exit 255
++$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t $THREADS --both-strands 
$6 $7 || exit 255
+ fi;
+ 
+ # merge
+@@ -25,5 +52,21 @@
+     rm $TMPDIR/jf_tmp_*
+ fi
+ 
++#
++# Usage: jellyfish dump [options] db:path
++# 
++# Dump k-mer counts
++# 
++# By default, dump in a fasta format where the header is the count and
++# the sequence is the sequence of the k-mer. The column format is a 2
++# column output: k-mer count.
++# 
++# Options (default value in (), *required):
++#  -c, --column                             Column format (false)
++#  -t, --tab                                Tab separator (false)
++#  -L, --lower-count=uint64                 Don't output k-mer with count < 
lower-count
++#  -U, --upper-count=uint64                 Don't output k-mer with count > 
upper-count
++#  -o, --output=string                      Output file
++#
+ $JF dump --lower-count=$LCOUNT -o $2 -c $TMPDIR/jf_merged_$3 || exit 255
+ rm $TMPDIR/jf_merged_$3

diff --git a/sci-biology/SEECER/files/run_seecer.sh.patch 
b/sci-biology/SEECER/files/run_seecer.sh.patch
new file mode 100644
index 000000000..a20c7917f
--- /dev/null
+++ b/sci-biology/SEECER/files/run_seecer.sh.patch
@@ -0,0 +1,42 @@
+--- SEECER/bin/run_seecer.sh.old       2013-10-02 18:55:24.000000000 +0200
++++ SEECER/bin/run_seecer.sh   2017-11-21 16:24:24.065584149 +0100
+@@ -33,6 +33,7 @@
+ SeecerStep=1
+ LCOUNT=3
+ TMPDIR=''
++THREADS=32
+ 
+ usage=$(cat << EOF
+    # This script runs the SEECER pipeline of 4 steps:
+@@ -54,11 +55,12 @@
+       -j <v> : specify the location of JELLYFISH binary (default = $JF).
+       -p <v> : specify extra SEECER parameters (default = '').
+       -s <v> : specify the starting step ( default = 1). Values = 1,2,3,4.
++      -c <v> : number of threads (default = 32).
+       -h : help message
+ EOF
+ );
+ 
+-while getopts ":j:p:k:s:t:h" opt; do
++while getopts ":j:p:k:s:t:c:h" opt; do
+   case $opt in
+     t)
+       TMPDIR=$OPTARG
+@@ -75,6 +77,8 @@
+     s)
+       SeecerStep=$OPTARG
+       ;;
++    c)
++      THREADS=$OPTARG
+     \?)
+       echo "Invalid option: -$OPTARG" >&2
+       echo "$usage"
+@@ -170,7 +177,7 @@
+ then
+     echo "++ Step 2: Running JELLYFISH to count kmers ..."
+     echo
+-    bash "${BINDIR}"run_jellyfish.sh $JF $TMPDIR/counts_${K}_${LCOUNT} $K 
$LCOUNT $TMPDIR $Read1_N $Read2_N || exit 255
++    bash "${BINDIR}"run_jellyfish.sh $JF $TMPDIR/counts_${K}_${LCOUNT} $K 
$LCOUNT $TMPDIR $Read1_N $Read2_N $THREADS || exit 255
+ fi;
+ 
+ if [ ! -r $TMPDIR/counts_${K}_${LCOUNT} ];

[gentoo-commits] proj/sci:master commit in: sci-biology/SEECER/files/, sci-biology/SEECER/

Reply via email to