Source code for tentacle.mappers.usearch

#!/usr/bin/env python
# coding: UTF-8
# Fredrik Boulund 2013
# Anders Sjögren 2013
#  Copyright (C) 2014  Fredrik Boulund and Anders Sjögren
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
# 

from subprocess import PIPE#, Popen
from gevent.subprocess import Popen
from mapper import Mapper

from ..utils import resolve_executable
from ..utils import mapping_utils
from ..parsers import blast8

__all__ = ["Usearch"]

[docs]class Usearch(Mapper): """ USEARCH """ def __init__(self, logger, mapper_name): self.logger = logger self.mapper_string = mapper_name self.mapper = resolve_executable(mapper_name) self.options = {} self.input_reads_format = "FASTA" self.output_parser = blast8.parse_blast8 @staticmethod
[docs] def create_argparser(): """ Creates a parser for mapping options. """ import argparse parser = argparse.ArgumentParser(add_help=False) mapping_group = parser.add_argument_group("USEARCH mapping options") mapping_group.add_argument("--usearchID", dest="usearchID", type=float, default="0.9", metavar="I", help="usearch: Sequence similarity for usearch_global [default: %(default)s]") mapping_group.add_argument("--usearchQueryCov", dest="usearchQueryCov", type=str, default="1.0", metavar="C", help="usearch: Query coverage in range 0.0-1.0.") mapping_group.add_argument("--usearchDBName", dest="usearchDBName", type=str, default="", metavar="DBNAME", required=True, help="usearch: Name of the FASTA file in the database tarball (i.e. including .fasta extension). It must share basename with the .udb in the tarball.") mapping_group.add_argument("--usearchStrand", dest="usearchStrand", type=str, default="both", metavar="S", help="usearch: If searching nucleotide sequences, specify either 'both' or 'plus' [default: %(default)s]") mapping_group.add_argument("--usearchOther", dest="usearchOther", type=str, default="", metavar="S", help="usearch: Quoted string containing usearch arguments. Warning: No checks of"+\ " validity are made.") return parser
[docs] def prepare_references(self, remote_files, local_files, options, rebase_to_local_tmp=None): """ Transfers and prepares reference DB for usearch. """ mapping_utils.copy_untar_ref_db(remote_files.contigs, local_files.contigs, self.logger) return local_files._replace(contigs=rebase_to_local_tmp(options.usearchDBName))
[docs] def construct_mapper_call(self, local_files, options): """ Parses options and creates a mapper call (python list) that can be used with Popen. """ output_filename = local_files.reads+".mapped" mapper_call = [self.mapper, "-usearch_local", str(local_files.reads), "-query_cov", str(options.usearchQueryCov), "-db", options.usearchDBName.split(".",1)[0]+".udb", "-id", str(options.usearchID), "-blast6out", output_filename] if options.usearchQueryCov: mapper_call.append("-query_cov") mapper_call.append(str(options.usearchQueryCov)) if options.usearchStrand: mapper_call.append("-strand") mapper_call.append(str(options.usearchStrand)) if options.usearchOther: import shlex usearchOtherOptions = shlex.split(options.usearchOther) for token in usearchOtherOptions: mapper_call.append(token) return mapper_call, output_filename
[docs] def assert_mapping_results(self, output_filename): """ Makes a quick check that the mapping appears successful. """ pass # TODO: Assert mapping results