This is an automated email from the git hooks/post-receive script. satta pushed a commit to branch master in repository fastaq.
commit 11f33d4fe0209ef719ed2e95748b130362a517df Author: Sascha Steinbiss <sa...@debian.org> Date: Sun Jun 18 17:21:40 2017 +0000 Revert "Revert "Merge tag 'upstream/3.15.0'"" This reverts commit fbcbefdb8d1c46dc72186a52466ecc15216dd975. --- README.md | 1 + pyfastaq/runners/make_random_contigs.py | 2 +- pyfastaq/runners/sort_by_name.py | 14 ++++++++++++++ pyfastaq/tasks.py | 12 ++++++++++++ pyfastaq/tests/data/tasks_test_sort_by_name.in.fa | 16 ++++++++++++++++ pyfastaq/tests/data/tasks_test_sort_by_name.out.fa | 16 ++++++++++++++++ pyfastaq/tests/tasks_test.py | 9 +++++++++ scripts/fastaq | 3 ++- setup.py | 2 +- 9 files changed, 72 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c17c54f..675cb2f 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,7 @@ Available commands | scaffolds_to_contigs | Creates a file of contigs from a file of scaffolds | | search_for_seq | Find all exact matches to a string (and its reverse complement) | | sequence_trim | Trim exact matches to a given string off the start of every sequence | +| sort_by_name | Sorts sequences in lexographical (name) order | | sort_by_size | Sorts sequences in length order | | split_by_base_count | Split multi sequence file into separate files | | strip_illumina_suffix | Strips /1 or /2 off the end of every read name | diff --git a/pyfastaq/runners/make_random_contigs.py b/pyfastaq/runners/make_random_contigs.py index 5337120..6b5febb 100644 --- a/pyfastaq/runners/make_random_contigs.py +++ b/pyfastaq/runners/make_random_contigs.py @@ -9,7 +9,7 @@ def run(description): parser.add_argument('--name_by_letters', action='store_true', help='Name the contigs A,B,C,... will start at A again if you get to Z') parser.add_argument('--prefix', help='Prefix to add to start of every sequence name', default='') parser.add_argument('--seed', type=int, help='Seed for random number generator. Default is to use python\'s default', default=None) - parser.add_argument('contigs', type=int, help='Nunber of contigs to make') + parser.add_argument('contigs', type=int, help='Number of contigs to make') parser.add_argument('length', type=int, help='Length of each contig') parser.add_argument('outfile', help='Name of output file') options = parser.parse_args() diff --git a/pyfastaq/runners/sort_by_name.py b/pyfastaq/runners/sort_by_name.py new file mode 100644 index 0000000..f57911f --- /dev/null +++ b/pyfastaq/runners/sort_by_name.py @@ -0,0 +1,14 @@ +import argparse +from pyfastaq import tasks + +def run(description): + parser = argparse.ArgumentParser( + description = description, + usage = 'fastaq sort_by_name <infile> <outfile>') + parser.add_argument('infile', help='Name of input file') + parser.add_argument('outfile', help='Name of output file') + options = parser.parse_args() + tasks.sort_by_name( + options.infile, + options.outfile + ) diff --git a/pyfastaq/tasks.py b/pyfastaq/tasks.py index 3107672..b788672 100644 --- a/pyfastaq/tasks.py +++ b/pyfastaq/tasks.py @@ -556,6 +556,18 @@ def sort_by_size(infile, outfile, smallest_first=False): utils.close(fout) +def sort_by_name(infile, outfile): + '''Sorts input sequence file by sort -d -k1,1, writes sorted output file.''' + seqs = {} + file_to_dict(infile, seqs) + #seqs = list(seqs.values()) + #seqs.sort() + fout = utils.open_file_write(outfile) + for name in sorted(seqs): + print(seqs[name], file=fout) + utils.close(fout) + + def to_fastg(infile, outfile, circular=None): '''Writes a FASTG file in SPAdes format from input file. Currently only whether or not a sequence is circular is supported. Put circular=set of ids, or circular=filename to make those sequences circular in the output. Puts coverage=1 on all contigs''' if circular is None: diff --git a/pyfastaq/tests/data/tasks_test_sort_by_name.in.fa b/pyfastaq/tests/data/tasks_test_sort_by_name.in.fa new file mode 100644 index 0000000..26c1d8f --- /dev/null +++ b/pyfastaq/tests/data/tasks_test_sort_by_name.in.fa @@ -0,0 +1,16 @@ +>scaffold1 +AGTCA +>scaffold2 +ACGTTT +>scaffold10 +A +>scaffold12 +ACG +>contig1 +AGTCA +>contig2 +ACGTTT +>contig10 +A +>contig12 +ACG \ No newline at end of file diff --git a/pyfastaq/tests/data/tasks_test_sort_by_name.out.fa b/pyfastaq/tests/data/tasks_test_sort_by_name.out.fa new file mode 100644 index 0000000..662b583 --- /dev/null +++ b/pyfastaq/tests/data/tasks_test_sort_by_name.out.fa @@ -0,0 +1,16 @@ +>contig1 +AGTCA +>contig10 +A +>contig12 +ACG +>contig2 +ACGTTT +>scaffold1 +AGTCA +>scaffold10 +A +>scaffold12 +ACG +>scaffold2 +ACGTTT diff --git a/pyfastaq/tests/tasks_test.py b/pyfastaq/tests/tasks_test.py index b77dbf8..5db41d4 100644 --- a/pyfastaq/tests/tasks_test.py +++ b/pyfastaq/tests/tasks_test.py @@ -595,6 +595,15 @@ class TestSortBySize(unittest.TestCase): self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'tasks_test_sort_by_size.out.rev.fa'), tmpfile, shallow=False)) os.unlink(tmpfile) +class TestSortByName(unittest.TestCase): + def test_sort_by_name(self): + '''Test sort_by_name''' + infile = os.path.join(data_dir, 'tasks_test_sort_by_name.in.fa') + tmpfile = 'tmp.sort_by_name.fa' + tasks.sort_by_name(infile, tmpfile) + self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'tasks_test_sort_by_name.out.fa'), tmpfile, shallow=False)) + os.unlink(tmpfile) + class TestStripIlluminaSuffix(unittest.TestCase): def test_strip_illumina_suffix(self): diff --git a/scripts/fastaq b/scripts/fastaq index e0c470a..881af29 100755 --- a/scripts/fastaq +++ b/scripts/fastaq @@ -25,8 +25,9 @@ tasks = { 'scaffolds_to_contigs': 'Creates a file of contigs from a file of scaffolds', 'search_for_seq': 'Find all exact matches to a string (and its reverse complement)', 'sequence_trim': 'Trim exact matches to a given string off the start of every sequence', + 'sort_by_name': 'Sorts sequences in lexographical (name) order', + 'sort_by_size': 'Sorts sequences in length order', 'split_by_base_count': 'Split multi sequence file into separate files', - 'sort_by_size': 'Sorts sequences in length order', 'strip_illumina_suffix': 'Strips /1 or /2 off the end of every read name', 'to_boulderio': 'Converts to Boulder-IO format, used by primer3', 'to_fasta': 'Converts a variety of input formats to nicely formatted FASTA format', diff --git a/setup.py b/setup.py index f9a6ed2..46f813f 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ from setuptools import setup, find_packages setup( name='pyfastaq', - version='3.14.0', + version='3.15.0', description='Script to manipulate FASTA and FASTQ files, plus API for developers', packages = find_packages(), author='Martin Hunt', -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq.git _______________________________________________ debian-med-commit mailing list debian-med-commit@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit