Resolve issue where backtest results were incorrect when stopping out and perform backtests in parallel using multiprocessing

This commit is contained in:
moshferatu 2024-01-30 08:49:18 -08:00
parent f940607dee
commit f223238bd5

View File

@ -4,6 +4,7 @@ import os
import pandas as pd import pandas as pd
import plotly.express as px import plotly.express as px
from concurrent.futures import ProcessPoolExecutor
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime from datetime import datetime
from dotenv import load_dotenv from dotenv import load_dotenv
@ -149,13 +150,20 @@ def get_spread_history(historical_option_data: pd.DataFrame, option_strat: Delta
return spread_history return spread_history
def _backtest_iron_condor( def _backtest_iron_condor(
historical_option_data: pd.DataFrame, historical_data_file: str,
call_spread_strategy: OptionSpreadStrategy, call_spread_strategy: OptionSpreadStrategy,
put_spread_strategy: OptionSpreadStrategy put_spread_strategy: OptionSpreadStrategy
) -> BacktestResult: ) -> BacktestResult:
call_spread_history = get_spread_history(historical_option_data, call_spread_strategy) print('Processing File:', historical_data_file)
put_spread_history = get_spread_history(historical_option_data, put_spread_strategy) historical_option_data = pd.read_csv(historical_data_file)
if isinstance(call_spread_strategy, CreditTargetStrategy):
call_spread_history = get_spread_history_credit(historical_option_data, call_spread_strategy)
put_spread_history = get_spread_history_credit(historical_option_data, put_spread_strategy)
else:
call_spread_history = get_spread_history(historical_option_data, call_spread_strategy)
put_spread_history = get_spread_history(historical_option_data, put_spread_strategy)
current_date = call_spread_history.iloc[0].name[:10] current_date = call_spread_history.iloc[0].name[:10]
@ -246,7 +254,6 @@ def _backtest_iron_condor(
call_spread_stopped_out = True call_spread_stopped_out = True
exit_time = call_spread.name[-8:] exit_time = call_spread.name[-8:]
logging.info('Call Spread Stopped Out') logging.info('Call Spread Stopped Out')
break
if not put_spread_stopped_out: if not put_spread_stopped_out:
if current_put_spread_price >= ((put_spread_strategy.stop_loss_multiple + 1) * original_put_spread_price): if current_put_spread_price >= ((put_spread_strategy.stop_loss_multiple + 1) * original_put_spread_price):
@ -256,7 +263,6 @@ def _backtest_iron_condor(
put_spread_stopped_out = True put_spread_stopped_out = True
exit_time = call_spread.name[-8:] exit_time = call_spread.name[-8:]
logging.info('Put Spread Stopped Out') logging.info('Put Spread Stopped Out')
break
if not (call_spread_stopped_out and put_spread_stopped_out): if not (call_spread_stopped_out and put_spread_stopped_out):
if current_call_spread_price > current_put_spread_price: if current_call_spread_price > current_put_spread_price:
@ -339,34 +345,36 @@ def backtest_iron_condor(
total_trades = 0.0 total_trades = 0.0
total_wins = 0.0 total_wins = 0.0
result_dates = []
result_pnl = []
backtest_results = []
start_year = start_date.year start_year = start_date.year
end_year = end_date.year end_year = end_date.year
for year in range(start_year, end_year + 1):
year_directory = os.path.join(OPTION_DATA_DIRECTORY, str(year))
for file in os.listdir(year_directory): futures = []
historical_data_file = os.path.join(year_directory, file) with ProcessPoolExecutor(max_workers = 10) as executor:
if os.path.isdir(historical_data_file) or not file.endswith('.csv'): for year in range(start_year, end_year + 1):
continue year_directory = os.path.join(OPTION_DATA_DIRECTORY, str(year))
for file in os.listdir(year_directory):
historical_data_file = os.path.join(year_directory, file)
if os.path.isdir(historical_data_file) or not file.endswith('.csv'):
continue
# Assuming file format 'YYYY-MM-DD.csv'. # Assuming file format 'YYYY-MM-DD.csv'.
file_date_str = os.path.splitext(file)[0] file_date_str = os.path.splitext(file)[0]
file_date = datetime.strptime(file_date_str, '%Y-%m-%d') file_date = datetime.strptime(file_date_str, '%Y-%m-%d')
# TODO: This doesn't work as expected when the start date is not set to midnight. # TODO: This doesn't work as expected when the start date is not set to midnight.
if file_date < start_date or file_date > end_date: if file_date < start_date or file_date > end_date:
continue continue
print('Processing File:', historical_data_file) logging.info('Processing File: %s', historical_data_file)
logging.info('Processing File: %s', historical_data_file)
historical_option_data = pd.read_csv(historical_data_file) future = executor.submit(_backtest_iron_condor,
backtest_result = _backtest_iron_condor(historical_option_data, call_spread_strategy, put_spread_strategy) historical_data_file, call_spread_strategy, put_spread_strategy)
futures.append(future)
backtest_results = []
for future in futures:
backtest_result = future.result()
if backtest_result:
total_premium_received += backtest_result.trade_pnl total_premium_received += backtest_result.trade_pnl
backtest_result.profit = total_premium_received backtest_result.profit = total_premium_received
backtest_results.append(backtest_result) backtest_results.append(backtest_result)
@ -375,15 +383,11 @@ def backtest_iron_condor(
total_trades += 1 total_trades += 1
if backtest_result.trade_pnl > 0.0: if backtest_result.trade_pnl > 0.0:
total_wins += 1 total_wins += 1
logging.info('Overall PnL: %f', total_premium_received) logging.info('Overall PnL: %f', total_premium_received)
logging.info('Win Rate: %f', (total_wins / total_trades) if total_trades > 0 else 0.0) logging.info('Win Rate: %f', (total_wins / total_trades) if total_trades > 0 else 0.0)
logging.info('Average Premium Received: %f', (total_premium_received / total_trades) if total_trades > 0 else 0.0) logging.info('Average Premium Received: %f', (total_premium_received / total_trades) if total_trades > 0 else 0.0)
current_date = historical_option_data.iloc[0]['quote_datetime'][:10]
result_dates.append(current_date)
result_pnl.append(total_premium_received)
# TODO: Either look up the symbol in the historical option data or have the client provide it. # TODO: Either look up the symbol in the historical option data or have the client provide it.
backtest_results = pd.DataFrame([{ backtest_results = pd.DataFrame([{
'Date': result.date, 'Date': result.date,