2013-08-27 126 views
0

我用这个脚本Python的HTTP请求

from twisted.internet import reactor, threads 
from urlparse import urlparse 
import httplib 
import itertools 


concurrent = 200 
finished=itertools.count(1) 
reactor.suggestThreadPoolSize(concurrent) 

def getStatus(ourl): 
    url = urlparse(ourl) 
    conn = httplib.HTTPConnection(url.netloc) 
    conn.request("HEAD", url.path) 
    res = conn.getresponse() 
    return res.status 

def processResponse(response,url): 
    print response, url 
    processedOne() 

def processError(error,url): 
    print "error", url#, error 
    processedOne() 

def processedOne(): 
    if finished.next()==added: 
     reactor.stop() 

def addTask(url): 
    req = threads.deferToThread(getStatus, url) 
    req.addCallback(processResponse, url) 
    req.addErrback(processError, url) 

added=0 
for url in open('urllist.txt'): 
    added+=1 
    addTask(url.strip()) 

try: 
    reactor.run() 
except KeyboardInterrupt: 
    reactor.stop() 

当我尝试运行脚本$蟒蛇test.py

它只是打印网址不做卷曲或发送HTTP请求..

我怎么能发送的每一个

感谢

+0

哪里都是你的函数添加到'反应堆'? – dg123

+0

@ user1436026你能解释更多请 – SimpleojbC

+1

为什么你在这里使用'httplib'而不是Twisted的HTTP代码?或者,如果你想为每个连接使用'httplib'和一个线程,你为什么要使用Twisted并启动一个反应堆? – abarnert

回答

0

这应该工作,如果,如果你的URL的格式不包含的“http:// “不过, 如果他们确实包含的‘http://’有在评论

import httplib 

def requester(url): 
    host = url.split('/')[0] 
    #if urls do contain 'http://' --> host = url.split('/')[2].replace('http://','') 
    req = url[url.find(host)+len(host):] 
    conn = httplib.HTTPConnection(host) 
    conn.request("HEAD","/"+req) 
    response = conn.getresponse() 
    print response.status, response.reason 

    #if you want data... 
    #data = response.read() 
    #print data 

for url in open(urls.txt): 
    try: 
     requester(url) 
    except Error,e: 
     print Error, e 

而且应该是一个解决方案,我建议检查出httplib

+0

这就是我所寻找的,非常感谢 – SimpleojbC

0

测试代码HTTP或卷曲的过程中,我们inlineCallbacksdeferToThread。还使用defer.gatherResults知道当所有的deferreds已被处理(而不是在OP柜台方式):

from twisted.internet import reactor, defer, utils 
from twisted.internet.threads import deferToThread 
from urlparse import urlparse 
import httplib 

threadDeferred = deferToThread.__get__ 

@threadDeferred 
def get_url_head(url_arg): 
    url = urlparse(url_arg) 
    conn = httplib.HTTPConnection(url.netloc) 
    conn.request("HEAD", url.path) 
    res = conn.getresponse() 
    conn.close() 
    return res.status 

@defer.inlineCallbacks 
def check_url(sem,url_arg): 
    yield sem.acquire() 
    try: 
    result = yield get_url_head(url_arg) 
    defer.returnValue(result) 
    finally: 
    sem.release() 

@defer.inlineCallbacks 
def run(reactor,SEMAPHORE_SIZE=10): 
    sem = defer.DeferredSemaphore(SEMAPHORE_SIZE) 
    deferreds = [] 
    failed_urls = [] 
    responded_urls = [] 
    with open('urllist.txt','r') as f: 
    for line in f: 
     url_arg = line.strip() 
     d = check_url(sem,url_arg) 
     d.addCallback(processResult,url_arg,responded_urls).addErrback(processErr,url_arg,failed_urls) 
     deferreds.append(d) 
    res = yield defer.gatherResults(deferreds) 
    # Do something else with failed_urls and responded_urls 
    reactor.callLater(0,reactor.stop) 

def main(): 
    from twisted.internet import reactor 
    reactor.callWhenRunning(run,reactor) 
    reactor.run() 

def processResult(result,url_arg,responded_urls): 
    print "Reponse %s from %s" % (result,url_arg) 
    responded_urls.append((url_arg,result)) 

def processErr(err,url_arg,failed_urls): 
    print "Error checking %s: %s" % (url_arg,repr(err.value)) 
    failed_urls.append((url_arg,err.value)) 

if __name__ == '__main__': 
    main() 
+0

我用它,迅速结果非常感谢 – SimpleojbC