strange output when using flags in python
I'm currently writing a script in python that takes a number of flags. This is my first attempt at such a program, and I am getting an output from the bash script that I don't quite understand. For example when I run the script in the bash shell:
$ python my_script.py -f <input_file.txt> -k <test_string> -c <user_input>
I get this output before my script's output:
usage: rm [-f | -i] [-dPRrvW] file ...
unlink file
I can't seem to get rid of this, which is frustrating for the prettiness of the output. Any help would be great!
The code I'm using:
import sys, getopt, re, subprocess, collections, itertools
def find_kmers( arguments=sys.argv[1:] ):
required_opts = ['-f','-c','-k']
opts, args = getopt.getopt(arguments,'f:k:c:')
opt_dic = dict(opts)
for opt in required_opts:
if opt not in opt_dic:
return "incorrect arguments, please format as: python_script.py -f <filename> -k <kmer> -c <chromosome_name>"
def rev_comp(sequence):
reversed_dic = {'A':'T','T':'A','C':'G','G':'C'}
return ''.join(reversed_dic[_] for _ in sequence[::-1])
kmer = opt_dic['-k']
subprocess.call(['bash','-c',"grep '>' S288C_R64.fasta > grep.tmp"])
chromosomes = [_[1:].strip() for _ in open('grep.tmp')]
subprocess.call(['bash','-c','rm','grep.tmp'])
found = False
if any(opt_dic['-c']==_ for _ in chromosomes):
found = True
def get_sequence(file):
sequence = ''
for line in file:
if line.startswith('>'): break
sequence += line.strip()
return sequence.upper()
ofile = open(opt_dic['-f'])
if found == True:
for line in ofile:
if line.startswith('>'):
if line[1:].strip() == opt_dic['-c']:
sequence = get_sequence(ofile)
break
else:
return 'chromosome not found in %s. n chromosomes in file are:%s'%(opt_dic['-f'],', '.join(str(_) for _ in chromosomes))
kmer_matches1 = re.finditer('(?=%s)'%opt_dic['-k'],sequence)
kmer_matches2 = re.finditer('(?=%s)'%opt_dic['-k'],rev_comp(sequence))
def print_statement(start,strand):
return '%sthw1_scripttkmer=%st%st%st.t%st.tID=S288C;Name=S288Cn'%(opt_dic['-c'],opt_dic['-k'],start,start+len(opt_dic['-k'])-1,strand)
pos_strand = collections.deque()
neg_strand = collections.deque()
for match1,match2 in itertools.izip(kmer_matches1,kmer_matches2):
pos_strand.append(match1.start()+1)
neg_strand.append(match2.start()+1)
wfile = open('answer.gff3','w')
while len(pos_strand)>0 and len(neg_strand)>0:
if pos_strand[0]<neg_strand[0]:
start = pos_strand.popleft()
wfile.write(print_statement(start,'+'))
else:
start = neg_strand.popleft()
wfile.write(print_statement(start,'-'))
while len(pos_strand)>0:
start = pos_strand.popleft()
wfile.write(print_statement(start,'+'))
while len(neg_strand)>0:
start = neg_strand.popleft()
wfile.write(print_statement(start,'-'))
wfile.close()
return 'percent-GC = %s'%str(sum(sequence.count(gc) for gc in ["G","C"])/float(len(sequence)))
if __name__ == '__main__':
print find_kmers()
Invoking bash
one-liners requires that the bash commands be a single string. Change:
subprocess.call(['bash','-c','rm','grep.tmp'])
to:
subprocess.call(['bash', '-c', 'rm grep.tmp'])
Or, more reasonably, don't use subprocesses for this, just do:
os.unlink('grep.tmp') # Or os.remove; same thing, different names
which is much faster and less error prone.
In fact, all of your subprocess usage could be replaced with real Python code, and it would improve it substantially (and much of the Python code simplifies too):
def find_kmers( arguments=sys.argv[1:] ):
required_opts = ['-f','-c','-k']
opts, args = getopt.getopt(arguments,'f:k:c:')
opt_dic = dict(opts)
for opt in required_opts:
if opt not in opt_dic:
return "incorrect arguments, please format as: python_script.py -f <filename> -k <kmer> -c <chromosome_name>"
def rev_comp(sequence):
reversed_dic = {'A':'T','T':'A','C':'G','G':'C'}
return ''.join(reversed_dic[_] for _ in sequence[::-1])
kmer = opt_dic['-k']
# Replaces grep with temp file with trivial Python equivalent
with open('S288C_R64.fasta') as f:
chromosomes = [line[1:].strip() for line in f if '>' in line]
# No need for any loop when just checking for exact value
if opt_dic['-c'] not in chromosomes:
return 'chromosome not found in %s. n chromosomes in file are:%s'%(opt_dic['-f'],', '.join(str(_) for _ in chromosomes))
def get_sequence(file):
sequence = ''
for line in file:
if line.startswith('>'): break
sequence += line.strip()
return sequence.upper()
with open(opt_dic['-f']) as ofile:
for line in ofile:
if line.startswith('>'):
if line[1:].strip() == opt_dic['-c']:
sequence = get_sequence(ofile)
break
kmer_matches1 = re.finditer('(?=%s)'%opt_dic['-k'],sequence)
kmer_matches2 = re.finditer('(?=%s)'%opt_dic['-k'],rev_comp(sequence))
def print_statement(start,strand):
return '%sthw1_scripttkmer=%st%st%st.t%st.tID=S288C;Name=S288Cn'%(opt_dic['-c'],opt_dic['-k'],start,start+len(opt_dic['-k'])-1,strand)
pos_strand = collections.deque()
neg_strand = collections.deque()
for match1,match2 in itertools.izip(kmer_matches1,kmer_matches2):
pos_strand.append(match1.start()+1)
neg_strand.append(match2.start()+1)
with open('answer.gff3','w') as wfile:
while pos_strand and neg_strand:
if pos_strand[0]<neg_strand[0]:
start = pos_strand.popleft()
wfile.write(print_statement(start,'+'))
else:
start = neg_strand.popleft()
wfile.write(print_statement(start,'-'))
for start in pos_strand:
wfile.write(print_statement(start,'+'))
for start in neg_strand:
wfile.write(print_statement(start,'-'))
return 'percent-GC = %s'%str(sum(sequence.count(gc) for gc in ["G","C"])/float(len(sequence)))
链接地址: http://www.djcxy.com/p/42430.html
上一篇: 在Python代码中删除文件时出错
下一篇: 在Python中使用标志时奇怪的输出