Python投資信託データ取得・可視化ボットの実装

環境構築

開発環境は以下の通り構築する：

OS: Linux、Mac、Windowsいずれも対応
Python 3.7以上
必要なライブラリ: pandas、requests、akshare、matplotlib、dataframe-image

投資信託データの取得方法

投資信託のデータは金融サイトから取得可能。ここではakshareライブラリを使用した方法と、Web APIを直接呼び出す方法の2つを紹介する。

akshareを利用したデータ取得

akshareを使用すると、投資信託の履歴データを簡単に取得できる。ただし、期間指定は対応していないため、全データを取得後にフィルタリングする必要がある。

import akshare as ak

def fetch_fund_history(fund_code):
    """投資信託の全履歴データを取得"""
    dataset = ak.fund_em_open_fund_info(fund=fund_code, indicator='单位净值走势')
    return dataset

Web APIを直接呼び出す方法

東方財富のAPIを利用して、指定期間のデータを直接取得することも可能。

import requests
import pandas as pd
import re

class FundDataFetcher:
    BASE_URL = 'http://fund.eastmoney.com/f10/F10DataApi.aspx'
    
    def __init__(self, fund_code, start, end):
        self.code = fund_code
        self.start_date = start
        self.end_date = end
    
    def fetch_page(self, page_num=1, per_page=40):
        """単一ページのデータを取得"""
        params = {
            'type': 'lsjz',
            'code': self.code,
            'page': page_num,
            'sdate': self.start_date,
            'edate': self.end_date,
            'per': per_page
        }
        response = requests.get(self.BASE_URL, params=params)
        return response.text
    
    def extract_table(self, raw_html):
        """HTMLからデータテーブルを抽出"""
        pattern = r'content:"<table(.*)</table>",'
        match = re.search(pattern, raw_html)
        if match:
            table_html = '<table' + match.group(1) + '</table>'
            return pd.read_html(table_html)[0]
        return None
    
    def get_total_pages(self, raw_html):
        """総ページ数を取得"""
        pattern = r'pages:(.*),'
        match = re.search(pattern, raw_html)
        if match:
            return int(match.group(1))
        return 0
    
    def fetch_all_data(self):
        """指定期間の全データを取得"""
        first_response = self.fetch_page()
        total_pages = self.get_total_pages(first_response)
        
        dataframes = [self.extract_table(first_response)]
        
        for p in range(2, total_pages + 1):
            page_html = self.fetch_page(page_num=p)
            df = self.extract_table(page_html)
            if df is not None:
                dataframes.append(df)
        
        return pd.concat(dataframes, ignore_index=True)

定期データ更新の実装

毎日自動的にデータを更新するため、定期実行タスクを設定する。

import time
import random
import os

class FundDataScheduler:
    def __init__(self, data_dir='./fund_data'):
        self.data_dir = data_dir
        os.makedirs(data_dir, exist_ok=True)
    
    def load_fund_list(self, filepath):
        """監視対象の投資信託コードリストを読み込み"""
        with open(filepath, 'r') as f:
            return [line.strip() for line in f.readlines()]
    
    def update_all_funds(self, fund_list_file):
        """全投資信託のデータを更新"""
        fund_codes = self.load_fund_list(fund_list_file)
        results = []
        
        for code in fund_codes:
            try:
                data = fetch_fund_history(code)
                data = data.sort_values('净值日期', ascending=False)
                save_path = os.path.join(self.data_dir, f'{code}.csv')
                data.to_csv(save_path, index=False)
                results.append(f'{code}: 更新成功')
                time.sleep(random.randint(1, 5))
            except Exception as err:
                results.append(f'{code}: エラー - {err}')
        
        return '\n'.join(results)

def filter_by_date(fund_code, start, end, data_dir='./fund_data'):
    """指定期間のデータをフィルタリング"""
    file_path = os.path.join(data_dir, f'{fund_code}.csv')
    df = pd.read_csv(file_path)
    filtered = df.query(f"'{start}' <= 净值日期 <= '{end}'")
    return filtered

データの可視化

取得したデータをユーザーにわかりやすく提示するため、データ量に応じて表示方法を切り替える。30件以下の場合はテーブル画像を、それ以上の場合はトレンドグラフを生成する。

データテーブルの画像化

import dataframe_image as dfi

def create_table_image(df, fund_code, start, end, output_dir='./images'):
    """データフレームを画像に変換"""
    os.makedirs(output_dir, exist_ok=True)
    
    if len(df) <= 1:
        output_path = os.path.join(output_dir, f'{fund_code}_{start}_{end}.png')
        dfi.export(df, output_path)
        return output_path
    
    # スタイリング: 最大値・最小値をハイライト
    styled_df = df.style\
        .highlight_max(subset=['单位净值'], color='#ff6b6b')\
        .highlight_min(subset=['单位净值'], color='#51cf66')\
        .format({'日增长率': '{:.2f}%'})
    
    output_path = os.path.join(output_dir, f'{fund_code}_{start}_{end}.png')
    dfi.export(styled_df, output_path)
    return output_path

トレンドグラフの作成

import matplotlib.pyplot as plt
import matplotlib

matplotlib.use('Agg')
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

def create_trend_chart(df, fund_code, start, end, output_dir='./images'):
    """净值推移グラフを作成"""
    os.makedirs(output_dir, exist_ok=True)
    
    fig, ax = plt.subplots(figsize=(10, 5))
    
    # 枠線の設定
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.grid(axis='y', linestyle='--', alpha=0.7)
    
    # 最大値・最小値の注釈
    max_row = df.loc[df['单位净值'].idxmax()]
    min_row = df.loc[df['单位净值'].idxmin()]
    
    ax.annotate(f'最高: {max_row["单位净值"]}', 
                xy=(max_row['净值日期'], max_row['单位净值']),
                color='red', fontsize=9)
    ax.annotate(f'最低: {min_row["单位净值"]}', 
                xy=(min_row['净值日期'], min_row['单位净值']),
                color='green', fontsize=9)
    
    # グラフ描画
    ax.plot(df['净值日期'], df['单位净值'], color='#339af0', linewidth=2)
    ax.set_title('投資信託単位净值推移', fontsize=14)
    ax.set_xlabel('日付')
    ax.set_ylabel('単位净值')
    plt.xticks(rotation=45)
    plt.tight_layout()
    
    output_path = os.path.join(output_dir, f'{fund_code}_{start}_{end}.png')
    plt.savefig(output_path, dpi=150)
    plt.close()
    return output_path

出力処理の統合

def generate_output(fund_code, start, end, data_dir='./fund_data', img_dir='./images'):
    """データ量に応じた出力を生成"""
    filename = f'{fund_code}_{start}_{end}.png'
    output_path = os.path.join(img_dir, filename)
    
    if os.path.exists(output_path):
        return output_path
    
    df = filter_by_date(fund_code, start, end, data_dir)
    
    if len(df) <= 30:
        return create_table_image(df, fund_code, start, end, img_dir)
    else:
        df = df.sort_values('净值日期')
        return create_trend_chart(df, fund_code, start, end, img_dir)

画像配信サーバーの構築

生成した画像を配信するため、Flaskで簡易サーバーを構築する。

from flask import Flask

app = Flask(__name__, static_folder='./images', static_url_path='/static')

# 画像は http://サーバーIP:ポート/static/ファイル名 でアクセス可能

トラブルシューティング

Linux環境でのdataframe-imageエラー

Linux環境でdataframe-imageを使用する際、Chromeのサンドボックス問題が発生する場合がある。解決策として、root以外のユーザーで実行するか、chromiumの引数に--no-sandboxを追加する。

# 新規ユーザーの作成
useradd devuser
chown -R devuser /project_dir
su - devuser

matplotlibの日本語表示問題

グラフで日本語を正しく表示するには、日本語フォントをインストールし、matplotlibの設定を更新する必要がある。

# フォントディレクトリの作成
mkdir -p /usr/share/fonts/custom

# フォントファイルをコピー後、キャッシュを更新
fc-cache -fv

# インストール確認
fc-list :lang=ja

運用例

実装したシステムは以下の形式でクエリを受け付ける：

指定期間照会: F [ファンドコード] [開始日] [終了日]
例: F005827 2024-01-01 2024-03-31
簡易照会: F [ファンドコード]（直近10日間のデータを表示）
例: F005827

データ件数が30件以下の場合はテーブル形式で、それ以上の場合は推移グラフで表示される。

タグ: Python 投資信託データ可視化 flask matplotlib

6月23日 19:28 投稿

異端開発室