Resolve issue where backtest results were incorrect when stopping out and perform backtests in parallel using multiprocessing

2024-01-30 08:49:18 -08:00 · 2024-01-30 08:49:18 -08:00 · f223238bd5
commit f223238bd5
parent f940607dee
1 changed files with 36 additions and 32 deletions
--- a/backtesting/backtest_iron_condor.py
+++ b/backtesting/backtest_iron_condor.py
@ -4,6 +4,7 @@ import os
 import pandas as pd
 import plotly.express as px

+from concurrent.futures import ProcessPoolExecutor
 from dataclasses import dataclass
 from datetime import datetime
 from dotenv import load_dotenv
@ -149,13 +150,20 @@ def get_spread_history(historical_option_data: pd.DataFrame, option_strat: Delta
        return spread_history

 def _backtest_iron_condor(
-        historical_option_data: pd.DataFrame, 
+        historical_data_file: str, 
        call_spread_strategy: OptionSpreadStrategy, 
        put_spread_strategy: OptionSpreadStrategy
    ) -> BacktestResult:

-    call_spread_history = get_spread_history(historical_option_data, call_spread_strategy)
-    put_spread_history = get_spread_history(historical_option_data, put_spread_strategy)
+    print('Processing File:', historical_data_file)
+    historical_option_data = pd.read_csv(historical_data_file)
+
+    if isinstance(call_spread_strategy, CreditTargetStrategy):
+        call_spread_history = get_spread_history_credit(historical_option_data, call_spread_strategy)
+        put_spread_history = get_spread_history_credit(historical_option_data, put_spread_strategy)
+    else:
+        call_spread_history = get_spread_history(historical_option_data, call_spread_strategy)
+        put_spread_history = get_spread_history(historical_option_data, put_spread_strategy)

    current_date = call_spread_history.iloc[0].name[:10]

@ -246,7 +254,6 @@ def _backtest_iron_condor(
                call_spread_stopped_out = True
                exit_time = call_spread.name[-8:]
                logging.info('Call Spread Stopped Out')
-                break
        
        if not put_spread_stopped_out:
            if current_put_spread_price >= ((put_spread_strategy.stop_loss_multiple + 1) * original_put_spread_price):
@ -256,7 +263,6 @@ def _backtest_iron_condor(
                put_spread_stopped_out = True
                exit_time = call_spread.name[-8:]
                logging.info('Put Spread Stopped Out')
-                break

        if not (call_spread_stopped_out and put_spread_stopped_out):
            if current_call_spread_price > current_put_spread_price:
@ -339,34 +345,36 @@ def backtest_iron_condor(
    total_trades = 0.0
    total_wins = 0.0

-    result_dates = []
-    result_pnl = []
-
-    backtest_results = []
-
    start_year = start_date.year
    end_year = end_date.year
-    for year in range(start_year, end_year + 1):
-        year_directory = os.path.join(OPTION_DATA_DIRECTORY, str(year))

-        for file in os.listdir(year_directory):
-            historical_data_file = os.path.join(year_directory, file)
-            if os.path.isdir(historical_data_file) or not file.endswith('.csv'):
-                continue
+    futures = []
+    with ProcessPoolExecutor(max_workers = 10) as executor:
+        for year in range(start_year, end_year + 1):
+            year_directory = os.path.join(OPTION_DATA_DIRECTORY, str(year))
+            for file in os.listdir(year_directory):
+                historical_data_file = os.path.join(year_directory, file)
+                if os.path.isdir(historical_data_file) or not file.endswith('.csv'):
+                    continue

-            # Assuming file format 'YYYY-MM-DD.csv'.
-            file_date_str = os.path.splitext(file)[0]
-            file_date = datetime.strptime(file_date_str, '%Y-%m-%d')
+                # Assuming file format 'YYYY-MM-DD.csv'.
+                file_date_str = os.path.splitext(file)[0]
+                file_date = datetime.strptime(file_date_str, '%Y-%m-%d')

-            # TODO: This doesn't work as expected when the start date is not set to midnight.
-            if file_date < start_date or file_date > end_date:
-                continue
+                # TODO: This doesn't work as expected when the start date is not set to midnight.
+                if file_date < start_date or file_date > end_date:
+                    continue

-            print('Processing File:', historical_data_file)
-            logging.info('Processing File: %s', historical_data_file)
+                logging.info('Processing File: %s', historical_data_file)

-            historical_option_data = pd.read_csv(historical_data_file)
-            backtest_result = _backtest_iron_condor(historical_option_data, call_spread_strategy, put_spread_strategy)
+                future = executor.submit(_backtest_iron_condor, 
+                    historical_data_file, call_spread_strategy, put_spread_strategy)
+                futures.append(future)
+
+    backtest_results = []
+    for future in futures:
+        backtest_result = future.result()
+        if backtest_result:
            total_premium_received += backtest_result.trade_pnl
            backtest_result.profit = total_premium_received
            backtest_results.append(backtest_result)
@ -380,10 +388,6 @@ def backtest_iron_condor(
            logging.info('Win Rate: %f', (total_wins / total_trades) if total_trades > 0 else 0.0)
            logging.info('Average Premium Received: %f', (total_premium_received / total_trades) if total_trades > 0 else 0.0)

-            current_date = historical_option_data.iloc[0]['quote_datetime'][:10]
-            result_dates.append(current_date)
-            result_pnl.append(total_premium_received)
-
    # TODO: Either look up the symbol in the historical option data or have the client provide it.
    backtest_results = pd.DataFrame([{
        'Date': result.date,