Files
NewStock/main/data/update/update_money_flow.ipynb

287 lines
10 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b94bb1f2-5332-485e-ae1b-eea01f938106",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:57:40.184418Z",
"start_time": "2025-04-09T14:57:39.137312Z"
}
},
"outputs": [],
"source": [
"import tushare as ts\n",
"\n",
"ts.set_token('3a0741c702ee7e5e5f2bf1f0846bafaafe4e320833240b2a7e4a685f')\n",
"pro = ts.pro_api()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "742c29d453b9bb38",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:10.515830Z",
"start_time": "2025-04-09T14:57:40.190466Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 9413748 entries, 0 to 25875\n",
"Data columns (total 2 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 ts_code object\n",
" 1 trade_date object\n",
"dtypes: object(2)\n",
"memory usage: 215.5+ MB\n",
"None\n",
"20260206\n",
"start_date: 20260209\n"
]
}
],
"source": [
"import pandas as pd\n",
"import time\n",
"\n",
"h5_filename = '/mnt/d/PyProject/NewStock/data/money_flow.h5'\n",
"key = '/money_flow'\n",
"max_date = None\n",
"with pd.HDFStore(h5_filename, mode='r') as store:\n",
" df = store[key][['ts_code', 'trade_date']]\n",
" print(df.info())\n",
" max_date = df['trade_date'].max()\n",
"\n",
"print(max_date)\n",
"trade_cal = pro.trade_cal(exchange='', start_date='20170101', end_date='20260310')\n",
"trade_cal = trade_cal[trade_cal['is_open'] == 1] # 只保留交易日\n",
"trade_dates = trade_cal[trade_cal['cal_date'] > max_date]['cal_date'].tolist()\n",
"start_date = min(trade_dates)\n",
"print(f'start_date: {start_date}')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "679ce40e-8d62-4887-970c-e1d8cbdeee6b",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:17.197319Z",
"start_time": "2025-04-09T14:58:10.724923Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"任务 20260310 完成\n",
"任务 20260309 完成\n",
"任务 20260306 完成\n",
"任务 20260305 完成\n",
"任务 20260304 完成\n",
"任务 20260303 完成\n",
"任务 20260302 完成\n",
"任务 20260227 完成\n",
"任务 20260226 完成\n",
"任务 20260225 完成\n",
"任务 20260224 完成\n",
"任务 20260213 完成\n",
"任务 20260212 完成\n",
"任务 20260211 完成\n",
"任务 20260210 完成\n",
"任务 20260209 完成\n"
]
}
],
"source": [
"from concurrent.futures import ThreadPoolExecutor, as_completed\n",
"\n",
"all_daily_data = []\n",
"\n",
"# API 调用计数和时间控制变量\n",
"api_call_count = 0\n",
"batch_start_time = time.time()\n",
"\n",
"\n",
"def get_data(trade_date):\n",
" time.sleep(0.1)\n",
" money_flow_data = pro.moneyflow(trade_date=trade_date)\n",
" if money_flow_data is not None and not money_flow_data.empty:\n",
" return money_flow_data\n",
"\n",
"\n",
"with ThreadPoolExecutor(max_workers=2) as executor:\n",
" future_to_date = {executor.submit(get_data, td): td for td in trade_dates}\n",
"\n",
" for future in as_completed(future_to_date):\n",
" trade_date = future_to_date[future] # 获取对应的交易日期\n",
" try:\n",
" result = future.result() # 获取任务执行的结果\n",
" all_daily_data.append(result)\n",
" print(f\"任务 {trade_date} 完成\")\n",
" except Exception as e:\n",
" print(f\"获取 {trade_date} 数据时出错: {e}\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "9af80516849d4e80",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:17.214168Z",
"start_time": "2025-04-09T14:58:17.210734Z"
}
},
"outputs": [],
"source": [
"all_daily_data_df = pd.concat(all_daily_data, ignore_index=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "a2b05187-437f-4053-bc43-bd80d4cf8b0e",
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-09T14:58:19.633456Z",
"start_time": "2025-04-09T14:58:17.229837Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"所有每日基础数据获取并保存完毕!\n"
]
}
],
"source": [
"\n",
"# 将所有数据合并为一个 DataFrame\n",
"\n",
"# 将数据保存为 HDF5 文件table 格式)\n",
"all_daily_data_df.to_hdf(h5_filename, key='money_flow', mode='a', format='table', append=True, data_columns=True)\n",
"\n",
"print(\"所有每日基础数据获取并保存完毕!\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "e6f2a2fe",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ts_code trade_date buy_sm_vol buy_sm_amount sell_sm_vol \\\n",
"0 300587.SZ 20260213 154110 9661.44 160598 \n",
"1 601000.SH 20260213 150959 6301.54 197344 \n",
"2 002338.SZ 20260213 9215 5012.78 8260 \n",
"3 688373.SH 20260213 29166 1845.78 30329 \n",
"4 002226.SZ 20260213 101435 6137.98 79302 \n",
"... ... ... ... ... ... \n",
"25887 603713.SH 20260209 9898 6481.74 10208 \n",
"25888 300004.SZ 20260209 41923 5934.14 50255 \n",
"25889 300975.SZ 20260209 198244 30367.70 159191 \n",
"25890 603381.SH 20260209 85934 22581.16 95505 \n",
"25891 002836.SZ 20260209 27160 4238.25 22047 \n",
"\n",
" sell_sm_amount buy_md_vol buy_md_amount sell_md_vol sell_md_amount \\\n",
"0 10061.76 183752 11498.51 186971 11701.15 \n",
"1 8233.85 84549 3527.41 71932 3003.18 \n",
"2 4495.09 9488 5163.47 9035 4920.33 \n",
"3 1918.49 10043 635.49 10005 633.64 \n",
"4 4796.49 104000 6296.98 78239 4741.21 \n",
"... ... ... ... ... ... \n",
"25887 6683.01 6735 4413.49 7402 4854.70 \n",
"25888 7133.88 53004 7510.03 58384 8274.31 \n",
"25889 24423.94 164520 25222.01 193044 29612.42 \n",
"25890 25109.67 65977 17316.88 71008 18669.88 \n",
"25891 3438.16 21123 3293.94 20955 3271.07 \n",
"\n",
" buy_lg_vol buy_lg_amount sell_lg_vol sell_lg_amount buy_elg_vol \\\n",
"0 139274 8711.00 168037 10519.91 60907 \n",
"1 57471 2398.28 37552 1567.23 21177 \n",
"2 5873 3195.64 5312 2890.59 514 \n",
"3 8238 521.88 7113 451.02 0 \n",
"4 50394 3048.72 69021 4177.39 9335 \n",
"... ... ... ... ... ... \n",
"25887 3515 2307.87 4056 2660.17 2867 \n",
"25888 49767 7060.10 39566 5596.12 8820 \n",
"25889 116306 17856.14 144536 22152.02 51550 \n",
"25890 46270 12156.66 38176 10028.63 11944 \n",
"25891 9676 1508.43 11383 1772.18 1000 \n",
"\n",
" buy_elg_amount sell_elg_vol sell_elg_amount net_mf_vol \\\n",
"0 3816.45 22437 1404.58 142435 \n",
"1 882.70 7328 305.66 -48700 \n",
"2 280.52 2483 1346.40 -483 \n",
"3 0.00 0 0.00 982 \n",
"4 565.27 38602 2333.86 -103058 \n",
"... ... ... ... ... \n",
"25887 1885.53 1349 890.75 1237 \n",
"25888 1251.47 5309 751.42 8180 \n",
"25889 7924.73 33850 5182.21 -41375 \n",
"25890 3157.32 5436 1403.84 -11696 \n",
"25891 155.40 4574 714.61 1257 \n",
"\n",
" net_mf_amount \n",
"0 8918.59 \n",
"1 -2025.73 \n",
"2 -250.58 \n",
"3 64.60 \n",
"4 -6231.26 \n",
"... ... \n",
"25887 814.61 \n",
"25888 1173.63 \n",
"25889 -6267.77 \n",
"25890 -3055.51 \n",
"25891 191.86 \n",
"\n",
"[25892 rows x 20 columns]\n"
]
}
],
"source": [
"print(all_daily_data_df)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "stock",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}