Skip to content

Instantly share code, notes, and snippets.

@betatim
Created December 3, 2025 08:25
Show Gist options
  • Select an option

  • Save betatim/8a77540b9af14dca69724c28bf8fa1c6 to your computer and use it in GitHub Desktop.

Select an option

Save betatim/8a77540b9af14dca69724c28bf8fa1c6 to your computer and use it in GitHub Desktop.
from __future__ import annotations
import warnings
warnings.simplefilter("error", FutureWarning)
from pathlib import Path
from typing import Any
import pandas as pd
from tabarena.benchmark.experiment import AGModelBagExperiment, ExperimentBatchRunner
from tabarena.nips2025_utils.end_to_end import EndToEnd
from tabarena.nips2025_utils.tabarena_context import TabArenaContext
from tabarena.tabarena.website_format import format_leaderboard
if __name__ == '__main__':
expname = str(Path(__file__).parent / "experiments" / "quickstart") # folder location to save all experiment artifacts
eval_dir = Path(__file__).parent / "eval" / "quickstart"
ignore_cache = True # set to True to overwrite existing caches and re-run experiments from scratch
tabarena_context = TabArenaContext()
task_metadata = tabarena_context.task_metadata
# Sample for a quick demo
datasets = ["anneal", "credit-g", "diabetes"] # datasets = list(task_metadata["name"])
folds = [0]
# import your model classes
#from autogluon.tabular.models import LinearModel
from autogluon.tabular.models import RFModel
# This list of methods will be fit sequentially on each task (dataset x fold)
methods = [
# This will be a `config` in EvaluationRepository, because it computes out-of-fold predictions and thus can be used for post-hoc ensemble.
AGModelBagExperiment( # Wrapper for fitting a single bagged model via AutoGluon
# The name you want the config to have
name="test_rf_model",#"LightGBM_c1_BAG_L1_Reproduced",
# The class of the model. Can also be a string if AutoGluon recognizes it, such as `"GBM"`
# Supports any model that inherits from `autogluon.core.models.AbstractModel`
model_cls=RFModel,#LGBModel,
model_hyperparameters={"random_state": None,
"ag.ens.use_child_oof": False},
# "ag_args_ensemble": {"fold_fitting_strategy": "sequential_local"}, # uncomment to fit folds sequentially, allowing for use of a debugger
# }, # The non-default model hyperparameters.
num_bag_folds=8,#8, # num_bag_folds=8 was used in the TabArena 2025 paper
time_limit=300, #3600, # time_limit=3600 was used in the TabArena 2025 paper
),
]
exp_batch_runner = ExperimentBatchRunner(expname=expname, task_metadata=task_metadata)
# Get the run artifacts.
# Fits each method on each task (datasets * folds)
results_lst: list[dict[str, Any]] = exp_batch_runner.run(
datasets=datasets,
folds=folds,
methods=methods,
ignore_cache=ignore_cache,
)
# compute results
end_to_end = EndToEnd.from_raw(results_lst=results_lst, task_metadata=task_metadata, cache=False, cache_raw=False)
end_to_end_results = end_to_end.to_results()
print(f"New Configs Hyperparameters: {end_to_end.configs_hyperparameters()}")
with pd.option_context("display.max_rows", None, "display.max_columns", None, "display.width", 1000):
print(f"Results:\n{end_to_end_results.model_results.head(100)}")
leaderboard: pd.DataFrame = end_to_end_results.compare_on_tabarena(
output_dir=eval_dir,
only_valid_tasks=True, # True: only compare on tasks ran in `results_lst`
use_model_results=True, # If False: Will instead use the ensemble/HPO results
new_result_prefix="Demo_",
)
leaderboard_website = format_leaderboard(df_leaderboard=leaderboard)
print(leaderboard_website.to_markdown(index=False))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment