2012年10月15日 星期一
《西遊記》前五十回
怪越打越強。
王下七武海鷹眼說,王者最厲害的功夫不是武力,而是化敵為友的能力,這個才可怕。魯夫連打都不用打,女帝就變自己人了 ((羞))。孫悟空也是。以現在的術語來說,就是 CALL OUT KING,唐三藏是談判籌碼,豬八戒是個丑角搞笑用的,沙悟淨是萬年留守人員。當然啦,孫悟空對唐三藏也是有情有義。就是個搞笑小說,害我笑到快岔氣了。
略過不題。說點三大法人交易記錄。TWSE 對我很好心,每次抓檔都不用故意等幾秒鐘,因此我一下就抓完了。
Schema:
create table if not exists TradingSummary
(
creation_dt datetime default current_timestamp,
trading_date datetime not null,
item text not null,
buy real,
sell real,
diff real,
unique (trading_date, item) on conflict ignore
);
Source codes:
import csv
import logging
import os
import sqlite3
from ..common import logger
class Sourcing():
def __init__(self):
self.LOGGER = logging.getLogger()
self.URL_TEMPLATE = '''http://www.twse.com.tw/ch/trading/fund/BFI82U/BFI82U_print.php?begin_date=%s&end_date=&report_type=day&language=ch&save=csv'''
self.DATES = []
self.CSV_DIR = '''./dataset/trading_summary/csv/'''
self.DB_FILE = './db/stocktotal.db'
self.SQL_INSERT = '''insert or ignore into
TradingSummary(trading_date, item, buy, sell, diff) values(?, ?, ?, ?, ?)'''
def source(self, begin_date, end_date):
self.init_dates(begin_date, end_date)
#self.source_url_to_csv(self.CSV_DIR)
self.source_csv_to_sqlite(self.CSV_DIR, self.DB_FILE, self.SQL_INSERT)
def init_dates(self, begin_date, end_date):
from datetime import date
from datetime import datetime
from datetime import timedelta
begin = datetime.strptime(begin_date, '%Y-%m-%d')
end = datetime.strptime(end_date, '%Y-%m-%d')
self.DATES = [begin + timedelta(n) for n in range(int((end - begin).days + 1))]
def source_url_to_csv(self, dest_dir):
if not os.path.exists(dest_dir):
os.makedirs(dest_dir)
for date in self.DATES:
url = self.URL_TEMPLATE % date.strftime('%Y%m%d')
dest_file = os.path.join(dest_dir, date.strftime('%Y-%m-%d'))
self.__wget(url, dest_file)
def source_csv_to_sqlite(self, src_dir, dest_db, sql_insert):
assert os.path.isfile(dest_db)
for date in self.DATES:
src_file = os.path.join(src_dir, date.strftime('%Y-%m-%d'))
self.source_csv_to_sqlite_single(src_file, dest_db, sql_insert)
def source_csv_to_sqlite_single(self, src_file, dest_db, sql_insert):
self.LOGGER.debug('''%s => %s''' % (src_file, dest_db))
csv_reader = csv.reader(open(src_file, 'r'))
rows = [_ for _ in csv_reader]
if len(rows) is 1:
self.LOGGER.info('''%s => No record''' % src_file)
return
elif len(rows) is not 6:
self.LOGGER.info('''%s => Error''' % src_file)
return
conn = sqlite3.connect(dest_db)
cursor = conn.cursor()
for n in range(2, 6):
r = self.__build_db_record(src_file, rows[n])
cursor.execute(self.SQL_INSERT, r)
self.LOGGER.debug(r)
conn.commit()
cursor.close()
conn.close()
def __wget(self, url, dest_file):
wget = os.path.abspath('./src/thirdparty/wget/wget.exe')
assert os.path.isfile(wget)
wget_cmdline = '''%s -N \"%s\" --waitretry=3 -O \"%s\"''' % (wget, url, dest_file)
os.system(wget_cmdline)
def __build_db_record(self, src_file, row):
trading_date = os.path.basename(src_file)
item = row[0]
buy = row[1].replace(',','')
sell = row[2].replace(',','')
diff = row[3].replace(',','')
return [trading_date, item, buy, sell, diff]
訂閱:
張貼留言 (Atom)
沒有留言:
張貼留言