2015-12-03 77 views
0

我有一个脚本,它从文件中收集给定URL的response header信息。我现在通过的Argument/Input File来自外部。 Execution Method: python collect.py <Input.txt>Python:命令行参数问题

输入文件:

1,http://www.example.com 
2,http://www.blahblah.com 
3,...... 

现在,我希望与URL传递ID作为一个参数一样,

python collect.py 1,http://www.example.com 

和执行结果并将其写入到outfile。

#!/usr/bin/python 
import subprocess 
import json 
import sys 
import httplib 
import urlparse 
import pickle 
import sys 



class HeaderFetcher: 
     def __init__(self,url,headers={'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'}): 
       self.report = {} 
       self.initial_url = url 
       self.request_headers = headers 

     def fetch(self): 
       self.fetchheaders(self.initial_url,self.request_headers) 

     def fetchheaders(self,url,req_headers): 
       try: 
         u = urlparse.urlparse(url) 
         scheme = u.scheme 
         ServerConnection = None 
         con = None 
         if 'http' == scheme: 
           ServerConnection = httplib.HTTPConnection 
         elif 'https' == scheme: 
           ServerConnection = httplib.HTTPSConnection 
         if None != ServerConnection: 
           con = ServerConnection(u.hostname,u.port,timeout=10) 
           con.request("GET",url,None,req_headers) 
           res = con.getresponse() 
           #print res 
           self.report[url] = res.getheaders() 
           if 301 == res.status or 302 == res.status: 
             redirect_url = res.getheader('Location') 
             if not redirect_url in self.report: 
               if len(self.report.keys())<40: 
                 self.fetchheaders(redirect_url,req_headers) 
       except: 
         test="Error In Loading" 
         #print test 

def process(infile='Input.txt'): 
     #f = open('Input.txt','r') 
     f = open(sys.argv[1],"r") 
     agents= {'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'} 
     finalJson = {} 
     for line in f.readlines(): 
      App=line.strip().split(',') 
      #print "Processing " + App[1] 
      App_URL=App[1] 
      ua=agents 
      fetcher = HeaderFetcher(App_URL,{'User-Agent':ua}) 
      fetcher.fetch() 
      url=fetcher.report 
      keys = list(url) 
      headerJson = {} 
      for k in keys: 
       headers=url[k] 
       for header in headers: 
        headerJson[header[0]] = header[1] 
      finalJson[App[0]] = headerJson 
     header_final=json.dumps(finalJson,indent=4) 
     #server_details = json.loads(response.read(header_final)) 
     #print header_final 
     #json_data=open(header_final) 
     #server_details = json.loads(header_final) 
     server_details=json.loads(unicode(header_final),"ISO-8859-1") 
     with open("Headers_Final_List.txt",'wb') as f : 
       for appid, headers in server_details.iteritems(): 
         htypes = [h for h in headers if h in (
         'x-powered-by','server','x-aspnet-version','x-aspnetmvc-version')] 
         headers='{},{}'.format(appid, ','.join(htypes)) 
         f.write(headers+'\n') 
     f.close() 

if __name__ == '__main__': 
     process(sys.argv[-1]) 

有关如何从命令行解析单个参数的任何建议。

+0

你能解释一下哪些不适用于你当前的代码? – Stuart

+0

在将参数传递给'open'之前,在参数上做一个'split'。 –

回答

1

sys.argv不是一个文件,但你正在尝试读取它像一个。

这样的:

def process(infile='Input.txt'): 
     #f = open('Input.txt','r') 
     f = open(sys.argv[1],"r") 
     agents= {'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'} 
     finalJson = {} 
     for line in f.readlines(): 

应该更像:

def process(): 
     #f = open('Input.txt','r') 
     argu = sys.argv[1].split(',') 
     ID = argu[0]   
     URL = argu[1] 

然后你可以使用你的另一种方法的ID和网址。

0
#!/usr/bin/python 

import sys 

print 'Number of arguments:', len(sys.argv), 'arguments.' 
print 'Argument List:', str(sys.argv) 
n = len(sys.argv) -1 
args = [] 
for i in sys.argv[n].split(','): 
args.append(i) 
for i in args: 
print i 

蟒arg.py 1的abc.txt

Number of arguments: 2 arguments. 
Argument List: ['arg.py', '1,abc.txt'] 
1 
abc.txt 

此方法不与IndexError翻倒:列表索引超出范围,如果程序被不带一个参数或参数在它没有逗号

0
id, url = sys.argv[1].split(',')