2011-12-18 40 views
2

我编写了下面的代码来检索S & P 500中的股票数据。代码有效,但由于urlopen请求的数量,它非常缓慢。我可以采用什么策略来加速这个过程?使用urllib.request获取股票数据

from urllib.request import urlopen 
import csv 


class StockQuote: 
    """gets stock data from Yahoo Finance""" 

    def __init__(self, quote): 
     self.quote = quote 

    def lastPrice(self): 
     url = 'http://finance.yahoo.com/d/quotes.csv?s={ticker}&f=l1'.format(ticker=self.quote) 
     return bytes.decode((urlopen(url).read().strip())) 

    def volume(self): 
     url = 'http://finance.yahoo.com/d/quotes.csv?s={ticker}&f=v0'.format(ticker=self.quote) 
     return bytes.decode((urlopen(url).read().strip())) 

    def yearrange(self): 
     url = 'http://finance.yahoo.com/d/quotes.csv?s={ticker}&f=w0'.format(ticker=self.quote) 
     return bytes.decode((urlopen(url).read().strip())) 

    def PEratio(self): 
     url = 'http://finance.yahoo.com/d/quotes.csv?s={ticker}&f=r0'.format(ticker=self.quote) 
     return bytes.decode((urlopen(url).read().strip())) 

    def bookValue(self): 
     url = 'http://finance.yahoo.com/d/quotes.csv?s={ticker}&f=b4'.format(ticker=self.quote) 
     return bytes.decode((urlopen(url).read().strip())) 

    def EBITDA(self): 
     url = 'http://finance.yahoo.com/d/quotes.csv?s={ticker}&f=j4'.format(ticker=self.quote) 
     return bytes.decode((urlopen(url).read().strip())) 

    def PEGRatio(self): 
     url = 'http://finance.yahoo.com/d/quotes.csv?s={ticker}&f=r5'.format(ticker=self.quote) 
     return bytes.decode((urlopen(url).read().strip())) 

    def ticker(self): 
     url = 'http://finance.yahoo.com/d/quotes.csv?s={ticker}&f=s0'.format(ticker=self.quote) 
     return bytes.decode((urlopen(url).read().strip())) 


def openSP500file(): 
    SP500 = csv.reader(open(r'C:\Users\dev\Desktop\SP500.csv', 'r'), delimiter=',') 
    for x in SP500: 
     indStk = x[0] 
     printdata(indStk) 

def printdata(stk): 
    stkObj = StockQuote(stk) 
    stkdata= {} 
    stkdata['Ticker'] = stkObj.ticker() 
    stkdata['Price'] = stkObj.lastPrice() 
    stkdata['PE Ratio'] = stkObj.PEratio() 
    stkdata['Volume'] = stkObj.volume() 
    stkdata['Year Range'] = stkObj.yearrange() 
    stkdata['Book Value per Share'] = stkObj.bookValue() 
    stkdata['EBITDA'] = stkObj.EBITDA() 
    stkdata['PEG Ratio'] = stkObj.PEGRatio() 
    print(stkdata) 

def main(): 
    openSP500file() 


if __name__ == '__main__': 
    main() 

谢谢!

回答

2

您可以使用threadingmultiprocessing模块在同一时间获取所有这些网址,因此您可以节省大量时间,因为提取都与各个人无关。

+0

谢谢!我从来没有使用过任何一个模块,但我会采取一些措施,看看我能否实现它。 – 2011-12-18 00:47:33

3

如果你所有的请求都去了同一个域,我会建议使用urllib3。它不在标准的python安装中,但它实现了连接池,所以所有的单个请求都更快。

+0

谢谢!在我琢磨一下python中的线程模块后,我会检查该库。 – 2011-12-18 00:47:04

3

你可以用一个呼叫请求信息多股request.urlopen

import urllib.request as request 
import urllib.parse as parse 
import csv 
import codecs 
import pprint 

def printdata(stks): 
    params = parse.urlencode((('s', '+'.join(stks)), ('f', 'sl1rvwb4j4r5'))) 
    url = 'http://finance.yahoo.com/d/quotes.csv' 
    url = '?'.join((url, params)) 
    req = request.urlopen(url) 
    f = codecs.getreader('utf8')(req) 
    fields = '''Ticker Price PE_Ratio Volume Year_Range Book_Value_per_Share 
       EBITDA PEG_Ratio'''.split() 
    for row in csv.reader(f): 
     stkdata = dict(zip(fields, row))   
     pprint.pprint(stkdata) 

printdata('YHOO GOOG MSFT'.split()) 

产量

{'Book_Value_per_Share': '10.051', 
'EBITDA': '1.406B', 
'PEG_Ratio': '1.47', 
'PE_Ratio': '18.56', 
'Price': '14.96', 
'Ticker': 'YHOO', 
'Volume': '32625192', 
'Year_Range': '11.09 - 18.84'} 
{'Book_Value_per_Share': '169.355', 
'EBITDA': '13.446B', 
'PEG_Ratio': '0.89', 
'PE_Ratio': '21.12', 
'Price': '625.96', 
'Ticker': 'GOOG', 
'Volume': '4459782', 
'Year_Range': '473.02 - 642.96'} 
{'Book_Value_per_Share': '7.062', 
'EBITDA': '30.146B', 
'PEG_Ratio': '0.98', 
'PE_Ratio': '9.29', 
'Price': '26.00', 
'Ticker': 'MSFT', 
'Volume': '101410080', 
'Year_Range': '23.65 - 29.46'}