import httplib2
import logging
import os
import re
import sys
import base.logger
import default_stock_list
class KWebCrawler():
def __init__(self, stock_symbols):
self._logger = logging.getLogger()
self._stock_symbols = stock_symbols
def Crawl(self):
return [ self._CrawlStock(_) for _ in self._stock_symbols ]
def _CrawlStock(self, stock_symbol):
rv = {}
rv['StockSymbol'] = stock_symbol
rv['ClosePrice'] = self._CrawlClosePrice(stock_symbol)
company = self._CrawlCompany(stock_symbol)
rv['ROE'] = company['ROE']
rv['BookValue'] = company['BookValue']
rv['K'] = self._GetK(rv)
self._logger.debug(rv)
return rv
def _CrawlCompany(self, stock_symbol):
company = { 'BookValue' : None, 'ROE' : None }
try:
h = httplib2.Http()
url = '''http://tw.stock.yahoo.com/d/s/company_%s.html''' % stock_symbol
resp, content = h.request(url)
p_book_value = '每股淨值: \u3000\u3000'
m_book_value = re.compile('.*' + p_book_value + '.*')
m_roe = re.compile('.*股東權益報酬率.*')
lines = content.splitlines()
for i in range(len(lines)):
str_line = lines[i].decode('big5')
if m_book_value.match(str_line):
begin_index = str_line.index(p_book_value) + len(p_book_value)
end_index = str_line.index('元', begin_index)
company['BookValue'] = str_line[begin_index:end_index]
if m_roe.match(str_line):
next_str_line = lines[i+1].decode('big5')
begin_index = next_str_line.index('>') + 1
end_index = next_str_line.index('%<', begin_index)
company['ROE'] = next_str_line[begin_index:end_index]
except: pass
return company
def _CrawlClosePrice(self, stock_symbol):
try:
h = httplib2.Http()
url = '''http://tw.stock.yahoo.com/q/ts?s=%s''' % stock_symbol
resp, content = h.request(url)
pattern = '<table border="0" cellpadding="4" cellspacing="1" width="100%">'
m = re.compile('.*' + pattern + '.*')
for line in content.splitlines():
str_line = line.decode('big5')
if m.match(str_line):
begin_index = [p.start() for p in re.finditer('>', str_line)][39] + 1
end_index = str_line.index('<', begin_index)
return str_line[begin_index: end_index]
except:
return None
def _GetK(self, basic_info):
try:
ROE = float(basic_info['ROE']) * 4
P = float(basic_info['ClosePrice'])
B = float(basic_info['BookValue'])
return '''%.2f''' % ( ROE * B / P )
except:
return None
def main():
base.logger.config_root(level=logging.INFO)
crawl_msci_taiwan()
crawl_taiwan_50()
crawl_taiwan_100()
crawl_taiwan_titc()
crawl_taiwan_e()
crawl_taiwan_divid()
def crawl_msci_taiwan():
stock_list = default_stock_list.DefaultStockList()
crawler = KWebCrawler(stock_list.GetMsciTaiwanList())
for rv in crawler.Crawl():
print(rv)
def crawl_taiwan_50():
stock_list = default_stock_list.DefaultStockList()
crawler = KWebCrawler(stock_list.GetTaiwan50List())
for rv in crawler.Crawl():
print(rv)
def crawl_taiwan_100():
stock_list = default_stock_list.DefaultStockList()
crawler = KWebCrawler(stock_list.GetTaiwan100List())
for rv in crawler.Crawl():
print(rv)
def crawl_taiwan_100():
stock_list = default_stock_list.DefaultStockList()
crawler = KWebCrawler(stock_list.GetTaiwan100List())
for rv in crawler.Crawl():
print(rv)
def crawl_taiwan_titc():
stock_list = default_stock_list.DefaultStockList()
crawler = KWebCrawler(stock_list.GetTaiwanTitcList())
for rv in crawler.Crawl():
print(rv)
def crawl_taiwan_e():
stock_list = default_stock_list.DefaultStockList()
crawler = KWebCrawler(stock_list.GetTaiwanEList())
for rv in crawler.Crawl():
print(rv)
def crawl_taiwan_divid():
stock_list = default_stock_list.DefaultStockList()
crawler = KWebCrawler(stock_list.GetTaiwanDividList())
for rv in crawler.Crawl():
print(rv)
if __name__ == '__main__':
sys.exit(main())
default_stock_list.py
import logging
import os
import sys
import base.logger
class DefaultStockList():
def __init__(self):
self._logger = logging.getLogger()
def GetMsciTaiwanList(self):
"""
Source: http://www.msci.com/eqb/custom_indices/tw_performance.html
"""
return [
'2330', '2317', '2454', '2412', '1301', '2002', '1303', '1326', '2357', '2882',
'1216', '2382', '2498', '3045', '2308', '2891', '2886', '2881', '2303', '2311',
'2105', '4904', '2892', '2885', '2324', '1101', '2325', '6505', '1402', '2912',
'2883', '2354', '2474', '2347', '2880', '3673', '5880', '2887', '2301', '2890',
'3231', '2409', '2801', '1102', '2353', '2884', '9904', '3008', '4938', '6176',
'1722', '2207', '3697', '3702', '3034', '6121', '3481', '2888', '2823', '2201',
'2448', '9921', '3037', '6239', '2049', '2103', '1504', '1314', '9933', '1605',
'2610', '2385', '9945', '2395', '2337', '1802', '8299', '2903', '3044', '2618',
'2915', '2603', '2384', '2834', '2379', '8069', '6286', '3189', '2392', '2356',
'1434', '1227', '2101', '2006', '2542', '2606', '2015', '5522', '1704', '2362',
'1717', '2615', '2609', '6005', '5483', '4958', '2393', '2204', '2451', '1590',
'2707', '3474', '8046', '6244'
]
def GetTaiwan50List(self):
"""
http://www.twse.com.tw/ch/trading/indices/twco/tai50i.php
"""
return [
'3673', '3697', '3481', '2330', '2303', '2882', '2357', '1303', '2883', '1301',
'2002', '2311', '2317', '1402', '2324', '2892', '2880', '2801', '1216', '1101',
'1102', '2201', '2382', '2308', '1326', '2886', '2891', '2325', '2353', '1722',
'2105', '2412', '2409', '2207', '2301', '2912', '2354', '2347', '2474', '3045',
'2454', '2881', '4904', '2885', '3008', '2498', '2890', '3231', '6505', '5880'
]
def GetTaiwan100List(self):
"""
http://www.twse.com.tw/ch/trading/indices/tmcc/tai100i.php
"""
return [
'3406', '3474', '8046', '2049', '4938', '8422', '1789', '2723', '1590', '2727',
'5871', '4958', '4725', '3149', '2344', '2371', '2356', '2888', '2609', '2337',
'2388', '1504', '1507', '2603', '2501', '2610', '2204', '1605', '1314', '1434',
'2809', '2812', '1440', '1907', '1802', '2379', '2393', '2834', '2707', '1723',
'3702', '2384', '2385', '2542', '2545', '2392', '2823', '2395', '2362', '2360',
'9933', '1717', '2607', '2845', '2903', '2015', '9921', '2504', '2106', '1704',
'9914', '2101', '1710', '9904', '2511', '9945', '2915', '1227', '2103', '9917',
'9907', '1319', '2006', '2606', '1304', '2615', '2327', '3037', '2855', '6005',
'2618', '9940', '5522', '2548', '3034', '8008', '2887', '3044', '2451', '2448',
'2450', '6176', '2884', '2889', '6239', '6285', '6269', '6286', '8078', '3189'
]
def GetTaiwanTitcList(self):
"""
http://www.twse.com.tw/ch/trading/indices/titc/taititc.php
"""
return [
'3474', '4938', '3697', '3481', '2330', '2303', '2357', '2344', '2311', '2324',
'2371', '2356', '2382', '2337', '2388', '2325', '2353', '2379', '3702', '2385',
'2395', '2362', '2409', '2347', '3034', '2454', '8008', '2451', '2448', '2450',
'2498', '6239', '3231', '6285', '6286', '8078', '3189'
]
def GetTaiwanEList(self):
"""
http://www.twse.com.tw/ch/trading/indices/twei/taiei.php
"""
return [
'3406', '8046', '2049', '8422', '1789', '3673', '2723', '1590', '2727', '4958',
'4725', '3149', '1303', '1301', '2002', '2317', '1402', '1216', '1101', '1102',
'2201', '2609', '2308', '1326', '1504', '1507', '2603', '2610', '2204', '1605',
'1314', '1434', '1440', '1907', '1802', '2393', '2707', '1722', '1723', '2384',
'2545', '2392', '2105', '2360', '9933', '2412', '1717', '2607', '2903', '2015',
'9921', '2504', '2207', '2106', '1704', '2301', '9914', '2101', '1710', '9904',
'2912', '9945', '2915', '2354', '1227', '2103', '9917', '9907', '1319', '2006',
'2606', '1304', '2615', '2327', '3037', '2618', '2474', '3045', '3044', '4904',
'3008', '6176', '6269', '6505'
]
def GetTaiwanDividList(self):
"""
http://www.twse.com.tw/ch/trading/indices/twdp/taidividi.php
"""
return [
'8422', '4725', '2303', '1303', '1301', '2324', '1101', '2382', '1326', '2886',
'2325', '2379', '1723', '2385', '2542', '2412', '2301', '1710', '2103', '2606',
'1304', '5522', '2548', '3045', '3034', '2454', '2451', '4904', '3231', '6285'
]
def main():
base.logger.config_root(level=logging.DEBUG)
stock_list = DefaultStockList()
print(stock_list.GetMsciTaiwanList())
print(stock_list.GetTaiwan50List())
print(stock_list.GetTaiwan100List())
print(stock_list.GetTaiwanTitcList())
print(stock_list.GetTaiwanEList())
print(stock_list.GetTaiwanDividList())
if __name__ == '__main__':
sys.exit(main())
沒有留言:
張貼留言