Last active
December 4, 2025 15:27
-
-
Save databento-bot/4792f9cc35c35987c9ba18e31cb3c631 to your computer and use it in GitHub Desktop.
Get BBO of ATM options for an underlying stock using Databento
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| get_atm_options_bbo.py | |
| ============== | |
| Get BBO of ATM options for an underlying stock using Databento. | |
| This example uses the "cbbo-1s" schema for 1-second granularity, but you can substitute | |
| with "cmbp-1" or "cbbo-1m" for higher/lower granularity. | |
| See: https://databento.com/docs/schemas-and-data-formats | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| import databento as db | |
| def fetch_underlying_and_symbols(start_date="2025-11-18", end_date="2025-11-19"): | |
| """ | |
| Fetch underlying price data and options symbol definitions from Databento. | |
| Parameters: | |
| ----------- | |
| start_date : str | |
| Start date in YYYY-MM-DD format | |
| end_date : str | |
| End date in YYYY-MM-DD format | |
| Returns: | |
| -------- | |
| tuple | |
| (df_underlying, df_symbols) - underlying prices and option definitions | |
| """ | |
| client = db.Historical() | |
| # Fetch options symbol definitions | |
| print("Fetching options definitions...") | |
| data = client.timeseries.get_range( | |
| dataset="OPRA.PILLAR", | |
| schema="definition", | |
| stype_in="parent", | |
| symbols=["BAC.OPT"], | |
| start=start_date, | |
| end=end_date, | |
| ) | |
| df_symbols = data.to_df() | |
| df_symbols = df_symbols[['symbol', 'instrument_id', 'strike_price', 'instrument_class']] | |
| print(f" Fetched {len(df_symbols)} option definitions") | |
| # Fetch underlying price data | |
| print("\nFetching underlying data...") | |
| data = client.timeseries.get_range( | |
| dataset="XNAS.BASIC", | |
| schema="cbbo-1s", # Use "cmbp-1" or "cbbo-1m" for higher/lower granularity | |
| symbols=["BAC"], | |
| start=start_date, | |
| end=end_date, | |
| ) | |
| df_underlying = data.to_df() | |
| df_underlying["midprice"] = (df_underlying["bid_px_00"] + df_underlying["ask_px_00"]) / 2 | |
| print(f" Fetched {len(df_underlying)} underlying price records") | |
| return df_underlying, df_symbols | |
| def get_potential_atm_symbols(df_underlying, df_symbols): | |
| """ | |
| Get all put and call symbols with strikes between min and max of underlying midprice. | |
| Parameters: | |
| ----------- | |
| df_underlying : pd.DataFrame | |
| DataFrame with 'midprice' column | |
| df_symbols : pd.DataFrame | |
| DataFrame with 'strike_price', 'symbol', 'instrument_class' columns | |
| Returns: | |
| -------- | |
| list | |
| List of all symbols (puts and calls) that could be ATM during the day | |
| """ | |
| print("\nDetermining symbols for potential ATM strikes...") | |
| # Get min and max midprice | |
| min_price = df_underlying['midprice'].min() | |
| max_price = df_underlying['midprice'].max() | |
| print(f" Underlying price range: ${min_price:.2f} - ${max_price:.2f}") | |
| # Get all available strikes | |
| all_strikes = sorted(df_symbols['strike_price'].unique()) | |
| # Find closest strikes to min and max | |
| closest_min_strike = min(all_strikes, key=lambda x: abs(x - min_price)) | |
| closest_max_strike = min(all_strikes, key=lambda x: abs(x - max_price)) | |
| print(f" Strike range: ${closest_min_strike:.2f} - ${closest_max_strike:.2f}") | |
| # Get all symbols with strikes in this range | |
| atm_range_symbols = df_symbols[ | |
| (df_symbols['strike_price'] >= closest_min_strike) & | |
| (df_symbols['strike_price'] <= closest_max_strike) | |
| ] | |
| # Get unique symbols | |
| unique_symbols = atm_range_symbols['symbol'].unique().tolist() | |
| print(f" Found {len(unique_symbols)} potential ATM symbols") | |
| return unique_symbols | |
| def fetch_options_data(symbols_list, start_date="2025-11-18", end_date="2025-11-19"): | |
| """ | |
| Fetch options bid/ask data from Databento. | |
| Parameters: | |
| ----------- | |
| symbols_list : list | |
| List of option symbols to fetch | |
| start_date : str | |
| Start date in YYYY-MM-DD format | |
| end_date : str | |
| End date in YYYY-MM-DD format | |
| Returns: | |
| -------- | |
| pd.DataFrame | |
| DataFrame with bid/ask data for all symbols | |
| """ | |
| client = db.Historical() | |
| print(f"\nFetching options data for {len(symbols_list)} symbols...") | |
| data = client.timeseries.get_range( | |
| dataset="OPRA.PILLAR", | |
| schema="cbbo-1s", # Use "cmbp-1" or "cbbo-1m" for higher/lower granularity | |
| symbols=symbols_list, | |
| start=start_date, | |
| end=end_date, | |
| ) | |
| df_options = data.to_df() | |
| print(f" Fetched {len(df_options)} option price records") | |
| return df_options | |
| def generate_atm_bidask_output(df_underlying, df_symbols, df_options): | |
| """ | |
| Generate the final output with ATM put/call bid/ask prices. | |
| Parameters: | |
| ----------- | |
| df_underlying : pd.DataFrame | |
| Underlying price data with 'midprice' and 'ts_recv' index | |
| df_symbols : pd.DataFrame | |
| Options symbol definitions | |
| df_options : pd.DataFrame | |
| Options bid/ask data | |
| Returns: | |
| -------- | |
| pd.DataFrame | |
| Final dataframe with ATM put/call bid/ask prices at each timestamp | |
| """ | |
| print("\nGenerating ATM bid/ask output...") | |
| # Get all available strikes as numpy array for vectorization | |
| all_strikes = np.array(sorted(df_symbols['strike_price'].unique())) | |
| # Vectorized closest strike calculation | |
| midprices = df_underlying['midprice'].values | |
| diffs = np.abs(midprices[:, np.newaxis] - all_strikes[np.newaxis, :]) | |
| closest_strike_indices = np.argmin(diffs, axis=1) | |
| atm_strikes = all_strikes[closest_strike_indices] | |
| # Create result dataframe | |
| result = df_underlying.copy() | |
| result['atm_strike'] = atm_strikes | |
| # Create a mapping from (strike, instrument_class) to symbol | |
| # Use the most recent symbol definition for each strike/class combination | |
| symbol_map = {} | |
| for strike in df_symbols['strike_price'].unique(): | |
| for inst_class in ['P', 'C']: | |
| matching = df_symbols[ | |
| (df_symbols['strike_price'] == strike) & | |
| (df_symbols['instrument_class'] == inst_class) | |
| ] | |
| if not matching.empty: | |
| # Take the most recent symbol | |
| symbol_map[(strike, inst_class)] = matching.iloc[-1]['symbol'] | |
| # Map ATM strikes to put and call symbols | |
| result['atm_put_symbol'] = result['atm_strike'].map( | |
| lambda x: symbol_map.get((x, 'P'), None) | |
| ) | |
| result['atm_call_symbol'] = result['atm_strike'].map( | |
| lambda x: symbol_map.get((x, 'C'), None) | |
| ) | |
| # Ensure df_options has symbol column available | |
| if 'symbol' not in df_options.columns and df_options.index.name != 'symbol': | |
| df_options = df_options.reset_index() | |
| # Set ts_recv as index if not already | |
| if 'ts_recv' in df_options.columns: | |
| df_options = df_options.set_index('ts_recv') | |
| # Initialize bid/ask columns with atm_ prefix | |
| result['atm_put_px_bid_00'] = np.nan | |
| result['atm_put_px_ask_00'] = np.nan | |
| result['atm_call_px_bid_00'] = np.nan | |
| result['atm_call_px_ask_00'] = np.nan | |
| # Create sorted options data for efficient lookups | |
| df_options_sorted = df_options.sort_index() | |
| # For each symbol, create a lookup structure | |
| print(" Merging bid/ask data...") | |
| # Group options by symbol for faster access | |
| options_by_symbol = { | |
| symbol: group.sort_index() | |
| for symbol, group in df_options_sorted.groupby('symbol') | |
| } | |
| # Process each timestamp | |
| for idx, (ts, row) in enumerate(result.iterrows()): | |
| if idx % 10000 == 0: | |
| print(f" Processed {idx}/{len(result)} records...") | |
| put_sym = row['atm_put_symbol'] | |
| call_sym = row['atm_call_symbol'] | |
| # Get put bid/ask - use most recent data up to this timestamp | |
| if pd.notna(put_sym) and put_sym in options_by_symbol: | |
| put_data = options_by_symbol[put_sym] | |
| put_before = put_data[put_data.index <= ts] | |
| if not put_before.empty: | |
| latest_put = put_before.iloc[-1] | |
| result.loc[ts, 'atm_put_px_bid_00'] = latest_put.get('bid_px_00', np.nan) | |
| result.loc[ts, 'atm_put_px_ask_00'] = latest_put.get('ask_px_00', np.nan) | |
| # Get call bid/ask - use most recent data up to this timestamp | |
| if pd.notna(call_sym) and call_sym in options_by_symbol: | |
| call_data = options_by_symbol[call_sym] | |
| call_before = call_data[call_data.index <= ts] | |
| if not call_before.empty: | |
| latest_call = call_before.iloc[-1] | |
| result.loc[ts, 'atm_call_px_bid_00'] = latest_call.get('bid_px_00', np.nan) | |
| result.loc[ts, 'atm_call_px_ask_00'] = latest_call.get('ask_px_00', np.nan) | |
| # Forward fill bid/ask prices | |
| print(" Forward filling prices...") | |
| result['atm_put_px_bid_00'] = result['atm_put_px_bid_00'].ffill() | |
| result['atm_put_px_ask_00'] = result['atm_put_px_ask_00'].ffill() | |
| result['atm_call_px_bid_00'] = result['atm_call_px_bid_00'].ffill() | |
| result['atm_call_px_ask_00'] = result['atm_call_px_ask_00'].ffill() | |
| # Reset index to make ts_recv a column, then rename to ts_event | |
| result = result.reset_index() | |
| result = result.rename(columns={'ts_recv': 'ts_event'}) | |
| # Keep only the desired columns | |
| final_columns = [ | |
| 'ts_event', | |
| 'midprice', | |
| 'atm_strike', | |
| 'atm_put_symbol', | |
| 'atm_call_symbol', | |
| 'atm_put_px_bid_00', | |
| 'atm_put_px_ask_00', | |
| 'atm_call_px_bid_00', | |
| 'atm_call_px_ask_00' | |
| ] | |
| result = result[final_columns] | |
| print("Complete!") | |
| return result | |
| def main(): | |
| """ | |
| Main pipeline to get ATM options bid/ask data. | |
| Returns: | |
| -------- | |
| pd.DataFrame | |
| Final dataframe with ATM put/call bid/ask prices | |
| """ | |
| # Step 1: Fetch underlying and symbols | |
| df_underlying, df_symbols = fetch_underlying_and_symbols( | |
| start_date="2025-11-18", | |
| end_date="2025-11-19" | |
| ) | |
| # Step 2: Get potential ATM symbols based on price range | |
| potential_atm_symbols = get_potential_atm_symbols(df_underlying, df_symbols) | |
| # Step 3: Fetch options data for all potential ATM symbols | |
| df_options = fetch_options_data( | |
| potential_atm_symbols, | |
| start_date="2025-11-18", | |
| end_date="2025-11-19" | |
| ) | |
| # Step 4: Generate final output with ATM bid/ask | |
| final_df = generate_atm_bidask_output(df_underlying, df_symbols, df_options) | |
| return final_df | |
| if __name__ == "__main__": | |
| df = main() | |
| print(df) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ts_event ts_event midprice atm_strike atm_put_symbol atm_call_symbol atm_put_px_bid_00 atm_put_px_ask_00 atm_call_px_bid_00 atm_call_px_ask_00 | |
| 0 2025-11-18 09:00:01+00:00 NaT 52.005 52.0 BAC 251121P00052000 BAC 251121C00052000 NaN NaN NaN NaN | |
| 1 2025-11-18 09:00:02+00:00 NaT 51.190 51.0 BAC 251121P00051000 BAC 251128C00051000 NaN NaN NaN NaN | |
| 2 2025-11-18 09:00:04+00:00 NaT 51.195 51.0 BAC 251121P00051000 BAC 251128C00051000 NaN NaN NaN NaN | |
| 3 2025-11-18 09:00:52+00:00 NaT 51.220 51.0 BAC 251121P00051000 BAC 251128C00051000 NaN NaN NaN NaN | |
| 4 2025-11-18 09:00:56+00:00 NaT 51.195 51.0 BAC 251121P00051000 BAC 251128C00051000 NaN NaN NaN NaN | |
| ... ... ... ... ... ... ... ... ... ... ... | |
| 24153 2025-11-18 23:55:38+00:00 2025-11-18 23:55:37.339477049+00:00 51.920 52.0 BAC 251121P00052000 BAC 251121C00052000 0.77 0.81 0.47 0.49 | |
| 24154 2025-11-18 23:56:13+00:00 2025-11-18 23:56:12.157215212+00:00 51.920 52.0 BAC 251121P00052000 BAC 251121C00052000 0.77 0.81 0.47 0.49 | |
| 24155 2025-11-18 23:56:29+00:00 2025-11-18 23:56:28.431490620+00:00 51.920 52.0 BAC 251121P00052000 BAC 251121C00052000 0.77 0.81 0.47 0.49 | |
| 24156 2025-11-18 23:57:22+00:00 2025-11-18 23:57:21.027397618+00:00 51.920 52.0 BAC 251121P00052000 BAC 251121C00052000 0.77 0.81 0.47 0.49 | |
| 24157 2025-11-18 23:58:32+00:00 2025-11-18 23:58:31.992597063+00:00 51.920 52.0 BAC 251121P00052000 BAC 251121C00052000 0.77 0.81 0.47 0.49 | |
| [24158 rows x 10 columns] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment