strange output when using flags in python

2018-06-14 22:32:48

I'm currently writing a script in python that takes a number of flags. This is my first attempt at such a program, and I am getting an output from the bash script that I don't quite understand. For example when I run the script in the bash shell:

$ python my_script.py -f <input_file.txt> -k <test_string> -c <user_input>

I get this output before my script's output:

usage: rm [-f | -i] [-dPRrvW] file ...
       unlink file

I can't seem to get rid of this, which is frustrating for the prettiness of the output. Any help would be great!

The code I'm using:

import sys, getopt, re, subprocess, collections, itertools

def find_kmers( arguments=sys.argv[1:] ):

    required_opts = ['-f','-c','-k']



    opts, args = getopt.getopt(arguments,'f:k:c:')



    opt_dic = dict(opts)



    for opt in required_opts:

        if opt not in opt_dic:

            return "incorrect arguments, please format as: python_script.py -f <filename> -k <kmer> -c <chromosome_name>"



    def rev_comp(sequence):

        reversed_dic = {'A':'T','T':'A','C':'G','G':'C'}

        return ''.join(reversed_dic[_] for _ in sequence[::-1])



    kmer = opt_dic['-k']

    subprocess.call(['bash','-c',"grep '>' S288C_R64.fasta > grep.tmp"])

    chromosomes = [_[1:].strip() for _ in open('grep.tmp')]

    subprocess.call(['bash','-c','rm','grep.tmp'])

    found = False

    if any(opt_dic['-c']==_ for _ in chromosomes):

        found = True



    def get_sequence(file):

        sequence = ''

        for line in file:

            if line.startswith('>'): break

            sequence += line.strip()

        return sequence.upper()



    ofile = open(opt_dic['-f'])

    if found == True:

        for line in ofile:

            if line.startswith('>'):

                if line[1:].strip() == opt_dic['-c']:

                    sequence = get_sequence(ofile)

                    break



    else:

        return 'chromosome not found in %s. n chromosomes in file are:%s'%(opt_dic['-f'],', '.join(str(_) for _ in chromosomes))





    kmer_matches1 = re.finditer('(?=%s)'%opt_dic['-k'],sequence)

    kmer_matches2 = re.finditer('(?=%s)'%opt_dic['-k'],rev_comp(sequence))







    def print_statement(start,strand):



        return '%sthw1_scripttkmer=%st%st%st.t%st.tID=S288C;Name=S288Cn'%(opt_dic['-c'],opt_dic['-k'],start,start+len(opt_dic['-k'])-1,strand)



    pos_strand = collections.deque()

    neg_strand = collections.deque()

    for match1,match2 in itertools.izip(kmer_matches1,kmer_matches2):

        pos_strand.append(match1.start()+1)

        neg_strand.append(match2.start()+1)



    wfile = open('answer.gff3','w')

    while len(pos_strand)>0 and len(neg_strand)>0:

        if pos_strand[0]<neg_strand[0]:

            start = pos_strand.popleft()

            wfile.write(print_statement(start,'+'))

        else:

            start = neg_strand.popleft()

            wfile.write(print_statement(start,'-'))



    while len(pos_strand)>0:

        start = pos_strand.popleft()

        wfile.write(print_statement(start,'+'))



    while len(neg_strand)>0:

        start = neg_strand.popleft()

        wfile.write(print_statement(start,'-'))



    wfile.close()



    return 'percent-GC = %s'%str(sum(sequence.count(gc) for gc in ["G","C"])/float(len(sequence)))



if __name__ == '__main__':

    print find_kmers()

Invoking bash one-liners requires that the bash commands be a single string. Change:

subprocess.call(['bash','-c','rm','grep.tmp'])

to:

subprocess.call(['bash', '-c', 'rm grep.tmp'])

Or, more reasonably, don't use subprocesses for this, just do:

os.unlink('grep.tmp')  # Or os.remove; same thing, different names

which is much faster and less error prone.

In fact, all of your subprocess usage could be replaced with real Python code, and it would improve it substantially (and much of the Python code simplifies too):

def find_kmers( arguments=sys.argv[1:] ):
    required_opts = ['-f','-c','-k']

    opts, args = getopt.getopt(arguments,'f:k:c:')

    opt_dic = dict(opts)

    for opt in required_opts:
        if opt not in opt_dic:
            return "incorrect arguments, please format as: python_script.py -f <filename> -k <kmer> -c <chromosome_name>"

    def rev_comp(sequence):
        reversed_dic = {'A':'T','T':'A','C':'G','G':'C'}
        return ''.join(reversed_dic[_] for _ in sequence[::-1])

    kmer = opt_dic['-k']
    # Replaces grep with temp file with trivial Python equivalent
    with open('S288C_R64.fasta') as f:
        chromosomes = [line[1:].strip() for line in f if '>' in line]

    # No need for any loop when just checking for exact value
    if opt_dic['-c'] not in chromosomes:
        return 'chromosome not found in %s. n chromosomes in file are:%s'%(opt_dic['-f'],', '.join(str(_) for _ in chromosomes))


    def get_sequence(file):
        sequence = ''
        for line in file:
            if line.startswith('>'): break
            sequence += line.strip()
        return sequence.upper()

    with open(opt_dic['-f']) as ofile:
        for line in ofile:
            if line.startswith('>'):
                if line[1:].strip() == opt_dic['-c']:
                    sequence = get_sequence(ofile)
                    break


    kmer_matches1 = re.finditer('(?=%s)'%opt_dic['-k'],sequence)
    kmer_matches2 = re.finditer('(?=%s)'%opt_dic['-k'],rev_comp(sequence))

    def print_statement(start,strand):
        return '%sthw1_scripttkmer=%st%st%st.t%st.tID=S288C;Name=S288Cn'%(opt_dic['-c'],opt_dic['-k'],start,start+len(opt_dic['-k'])-1,strand)

    pos_strand = collections.deque()
    neg_strand = collections.deque()
    for match1,match2 in itertools.izip(kmer_matches1,kmer_matches2):
        pos_strand.append(match1.start()+1)
        neg_strand.append(match2.start()+1)

    with open('answer.gff3','w') as wfile:
        while pos_strand and neg_strand:
            if pos_strand[0]<neg_strand[0]:
                start = pos_strand.popleft()
                wfile.write(print_statement(start,'+'))
            else:
                start = neg_strand.popleft()
                wfile.write(print_statement(start,'-'))

        for start in pos_strand:
            wfile.write(print_statement(start,'+'))
        for start in neg_strand:
            wfile.write(print_statement(start,'-'))

    return 'percent-GC = %s'%str(sum(sequence.count(gc) for gc in ["G","C"])/float(len(sequence)))

链接地址: http://www.djcxy.com/p/42430.html

上一篇: 在Python代码中删除文件时出错

下一篇: 在Python中使用标志时奇怪的输出