Skip to content

Instantly share code, notes, and snippets.

@databento-bot
Last active December 4, 2025 15:27
Show Gist options
  • Select an option

  • Save databento-bot/4792f9cc35c35987c9ba18e31cb3c631 to your computer and use it in GitHub Desktop.

Select an option

Save databento-bot/4792f9cc35c35987c9ba18e31cb3c631 to your computer and use it in GitHub Desktop.
Get BBO of ATM options for an underlying stock using Databento
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
get_atm_options_bbo.py
==============
Get BBO of ATM options for an underlying stock using Databento.
This example uses the "cbbo-1s" schema for 1-second granularity, but you can substitute
with "cmbp-1" or "cbbo-1m" for higher/lower granularity.
See: https://databento.com/docs/schemas-and-data-formats
"""
import pandas as pd
import numpy as np
import databento as db
def fetch_underlying_and_symbols(start_date="2025-11-18", end_date="2025-11-19"):
"""
Fetch underlying price data and options symbol definitions from Databento.
Parameters:
-----------
start_date : str
Start date in YYYY-MM-DD format
end_date : str
End date in YYYY-MM-DD format
Returns:
--------
tuple
(df_underlying, df_symbols) - underlying prices and option definitions
"""
client = db.Historical()
# Fetch options symbol definitions
print("Fetching options definitions...")
data = client.timeseries.get_range(
dataset="OPRA.PILLAR",
schema="definition",
stype_in="parent",
symbols=["BAC.OPT"],
start=start_date,
end=end_date,
)
df_symbols = data.to_df()
df_symbols = df_symbols[['symbol', 'instrument_id', 'strike_price', 'instrument_class']]
print(f" Fetched {len(df_symbols)} option definitions")
# Fetch underlying price data
print("\nFetching underlying data...")
data = client.timeseries.get_range(
dataset="XNAS.BASIC",
schema="cbbo-1s", # Use "cmbp-1" or "cbbo-1m" for higher/lower granularity
symbols=["BAC"],
start=start_date,
end=end_date,
)
df_underlying = data.to_df()
df_underlying["midprice"] = (df_underlying["bid_px_00"] + df_underlying["ask_px_00"]) / 2
print(f" Fetched {len(df_underlying)} underlying price records")
return df_underlying, df_symbols
def get_potential_atm_symbols(df_underlying, df_symbols):
"""
Get all put and call symbols with strikes between min and max of underlying midprice.
Parameters:
-----------
df_underlying : pd.DataFrame
DataFrame with 'midprice' column
df_symbols : pd.DataFrame
DataFrame with 'strike_price', 'symbol', 'instrument_class' columns
Returns:
--------
list
List of all symbols (puts and calls) that could be ATM during the day
"""
print("\nDetermining symbols for potential ATM strikes...")
# Get min and max midprice
min_price = df_underlying['midprice'].min()
max_price = df_underlying['midprice'].max()
print(f" Underlying price range: ${min_price:.2f} - ${max_price:.2f}")
# Get all available strikes
all_strikes = sorted(df_symbols['strike_price'].unique())
# Find closest strikes to min and max
closest_min_strike = min(all_strikes, key=lambda x: abs(x - min_price))
closest_max_strike = min(all_strikes, key=lambda x: abs(x - max_price))
print(f" Strike range: ${closest_min_strike:.2f} - ${closest_max_strike:.2f}")
# Get all symbols with strikes in this range
atm_range_symbols = df_symbols[
(df_symbols['strike_price'] >= closest_min_strike) &
(df_symbols['strike_price'] <= closest_max_strike)
]
# Get unique symbols
unique_symbols = atm_range_symbols['symbol'].unique().tolist()
print(f" Found {len(unique_symbols)} potential ATM symbols")
return unique_symbols
def fetch_options_data(symbols_list, start_date="2025-11-18", end_date="2025-11-19"):
"""
Fetch options bid/ask data from Databento.
Parameters:
-----------
symbols_list : list
List of option symbols to fetch
start_date : str
Start date in YYYY-MM-DD format
end_date : str
End date in YYYY-MM-DD format
Returns:
--------
pd.DataFrame
DataFrame with bid/ask data for all symbols
"""
client = db.Historical()
print(f"\nFetching options data for {len(symbols_list)} symbols...")
data = client.timeseries.get_range(
dataset="OPRA.PILLAR",
schema="cbbo-1s", # Use "cmbp-1" or "cbbo-1m" for higher/lower granularity
symbols=symbols_list,
start=start_date,
end=end_date,
)
df_options = data.to_df()
print(f" Fetched {len(df_options)} option price records")
return df_options
def generate_atm_bidask_output(df_underlying, df_symbols, df_options):
"""
Generate the final output with ATM put/call bid/ask prices.
Parameters:
-----------
df_underlying : pd.DataFrame
Underlying price data with 'midprice' and 'ts_recv' index
df_symbols : pd.DataFrame
Options symbol definitions
df_options : pd.DataFrame
Options bid/ask data
Returns:
--------
pd.DataFrame
Final dataframe with ATM put/call bid/ask prices at each timestamp
"""
print("\nGenerating ATM bid/ask output...")
# Get all available strikes as numpy array for vectorization
all_strikes = np.array(sorted(df_symbols['strike_price'].unique()))
# Vectorized closest strike calculation
midprices = df_underlying['midprice'].values
diffs = np.abs(midprices[:, np.newaxis] - all_strikes[np.newaxis, :])
closest_strike_indices = np.argmin(diffs, axis=1)
atm_strikes = all_strikes[closest_strike_indices]
# Create result dataframe
result = df_underlying.copy()
result['atm_strike'] = atm_strikes
# Create a mapping from (strike, instrument_class) to symbol
# Use the most recent symbol definition for each strike/class combination
symbol_map = {}
for strike in df_symbols['strike_price'].unique():
for inst_class in ['P', 'C']:
matching = df_symbols[
(df_symbols['strike_price'] == strike) &
(df_symbols['instrument_class'] == inst_class)
]
if not matching.empty:
# Take the most recent symbol
symbol_map[(strike, inst_class)] = matching.iloc[-1]['symbol']
# Map ATM strikes to put and call symbols
result['atm_put_symbol'] = result['atm_strike'].map(
lambda x: symbol_map.get((x, 'P'), None)
)
result['atm_call_symbol'] = result['atm_strike'].map(
lambda x: symbol_map.get((x, 'C'), None)
)
# Ensure df_options has symbol column available
if 'symbol' not in df_options.columns and df_options.index.name != 'symbol':
df_options = df_options.reset_index()
# Set ts_recv as index if not already
if 'ts_recv' in df_options.columns:
df_options = df_options.set_index('ts_recv')
# Initialize bid/ask columns with atm_ prefix
result['atm_put_px_bid_00'] = np.nan
result['atm_put_px_ask_00'] = np.nan
result['atm_call_px_bid_00'] = np.nan
result['atm_call_px_ask_00'] = np.nan
# Create sorted options data for efficient lookups
df_options_sorted = df_options.sort_index()
# For each symbol, create a lookup structure
print(" Merging bid/ask data...")
# Group options by symbol for faster access
options_by_symbol = {
symbol: group.sort_index()
for symbol, group in df_options_sorted.groupby('symbol')
}
# Process each timestamp
for idx, (ts, row) in enumerate(result.iterrows()):
if idx % 10000 == 0:
print(f" Processed {idx}/{len(result)} records...")
put_sym = row['atm_put_symbol']
call_sym = row['atm_call_symbol']
# Get put bid/ask - use most recent data up to this timestamp
if pd.notna(put_sym) and put_sym in options_by_symbol:
put_data = options_by_symbol[put_sym]
put_before = put_data[put_data.index <= ts]
if not put_before.empty:
latest_put = put_before.iloc[-1]
result.loc[ts, 'atm_put_px_bid_00'] = latest_put.get('bid_px_00', np.nan)
result.loc[ts, 'atm_put_px_ask_00'] = latest_put.get('ask_px_00', np.nan)
# Get call bid/ask - use most recent data up to this timestamp
if pd.notna(call_sym) and call_sym in options_by_symbol:
call_data = options_by_symbol[call_sym]
call_before = call_data[call_data.index <= ts]
if not call_before.empty:
latest_call = call_before.iloc[-1]
result.loc[ts, 'atm_call_px_bid_00'] = latest_call.get('bid_px_00', np.nan)
result.loc[ts, 'atm_call_px_ask_00'] = latest_call.get('ask_px_00', np.nan)
# Forward fill bid/ask prices
print(" Forward filling prices...")
result['atm_put_px_bid_00'] = result['atm_put_px_bid_00'].ffill()
result['atm_put_px_ask_00'] = result['atm_put_px_ask_00'].ffill()
result['atm_call_px_bid_00'] = result['atm_call_px_bid_00'].ffill()
result['atm_call_px_ask_00'] = result['atm_call_px_ask_00'].ffill()
# Reset index to make ts_recv a column, then rename to ts_event
result = result.reset_index()
result = result.rename(columns={'ts_recv': 'ts_event'})
# Keep only the desired columns
final_columns = [
'ts_event',
'midprice',
'atm_strike',
'atm_put_symbol',
'atm_call_symbol',
'atm_put_px_bid_00',
'atm_put_px_ask_00',
'atm_call_px_bid_00',
'atm_call_px_ask_00'
]
result = result[final_columns]
print("Complete!")
return result
def main():
"""
Main pipeline to get ATM options bid/ask data.
Returns:
--------
pd.DataFrame
Final dataframe with ATM put/call bid/ask prices
"""
# Step 1: Fetch underlying and symbols
df_underlying, df_symbols = fetch_underlying_and_symbols(
start_date="2025-11-18",
end_date="2025-11-19"
)
# Step 2: Get potential ATM symbols based on price range
potential_atm_symbols = get_potential_atm_symbols(df_underlying, df_symbols)
# Step 3: Fetch options data for all potential ATM symbols
df_options = fetch_options_data(
potential_atm_symbols,
start_date="2025-11-18",
end_date="2025-11-19"
)
# Step 4: Generate final output with ATM bid/ask
final_df = generate_atm_bidask_output(df_underlying, df_symbols, df_options)
return final_df
if __name__ == "__main__":
df = main()
print(df)
ts_event ts_event midprice atm_strike atm_put_symbol atm_call_symbol atm_put_px_bid_00 atm_put_px_ask_00 atm_call_px_bid_00 atm_call_px_ask_00
0 2025-11-18 09:00:01+00:00 NaT 52.005 52.0 BAC 251121P00052000 BAC 251121C00052000 NaN NaN NaN NaN
1 2025-11-18 09:00:02+00:00 NaT 51.190 51.0 BAC 251121P00051000 BAC 251128C00051000 NaN NaN NaN NaN
2 2025-11-18 09:00:04+00:00 NaT 51.195 51.0 BAC 251121P00051000 BAC 251128C00051000 NaN NaN NaN NaN
3 2025-11-18 09:00:52+00:00 NaT 51.220 51.0 BAC 251121P00051000 BAC 251128C00051000 NaN NaN NaN NaN
4 2025-11-18 09:00:56+00:00 NaT 51.195 51.0 BAC 251121P00051000 BAC 251128C00051000 NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ...
24153 2025-11-18 23:55:38+00:00 2025-11-18 23:55:37.339477049+00:00 51.920 52.0 BAC 251121P00052000 BAC 251121C00052000 0.77 0.81 0.47 0.49
24154 2025-11-18 23:56:13+00:00 2025-11-18 23:56:12.157215212+00:00 51.920 52.0 BAC 251121P00052000 BAC 251121C00052000 0.77 0.81 0.47 0.49
24155 2025-11-18 23:56:29+00:00 2025-11-18 23:56:28.431490620+00:00 51.920 52.0 BAC 251121P00052000 BAC 251121C00052000 0.77 0.81 0.47 0.49
24156 2025-11-18 23:57:22+00:00 2025-11-18 23:57:21.027397618+00:00 51.920 52.0 BAC 251121P00052000 BAC 251121C00052000 0.77 0.81 0.47 0.49
24157 2025-11-18 23:58:32+00:00 2025-11-18 23:58:31.992597063+00:00 51.920 52.0 BAC 251121P00052000 BAC 251121C00052000 0.77 0.81 0.47 0.49
[24158 rows x 10 columns]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment