Skip to content

Instantly share code, notes, and snippets.

@robintux
Created February 15, 2026 22:19
Show Gist options
  • Select an option

  • Save robintux/7b408e7e8a810d5ce209e62ff1b91668 to your computer and use it in GitHub Desktop.

Select an option

Save robintux/7b408e7e8a810d5ce209e62ff1b91668 to your computer and use it in GitHub Desktop.
def temporal_train_test_split(df, predictors, target, test_size=0.2):
"""
División temporal que respeta:
1. Orden cronológico (no leakage)
2. Integridad de series por país (no partir series arbitrariamente)
3. Balance de países entre train/test
"""
# Ordenar por año
df_sorted = df.sort_values('Year').reset_index(drop=True)
# Determinar punto de corte temporal
cutoff_year = df_sorted['Year'].quantile(1 - test_size)
train = df_sorted[df_sorted['Year'] <= cutoff_year]
test = df_sorted[df_sorted['Year'] > cutoff_year]
# Verificar que no haya leakage de países
countries_train = set(train['Country'].unique())
countries_test = set(test['Country'].unique())
countries_overlap = countries_train & countries_test
print(f"División temporal:")
print(f" - Año de corte: {cutoff_year:.0f}")
print(f" - Train: {len(train)} obs ({train['Year'].min()}-{train['Year'].max()})")
print(f" - Test: {len(test)} obs ({test['Year'].min()}-{test['Year'].max()})")
print(f" - Países en ambos sets: {len(countries_overlap)} (evitar overfitting)")
return (
train[predictors].values,
train[target].values,
test[predictors].values,
test[target].values,
train, test
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment