Last active
May 29, 2025 02:12
-
-
Save gbaeza2002/5379cae9a5c374f1c6310cb783fac893 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # IMPLEMENTACIÓN DE MODELOS PREDICTIVOS. CASO PREDICTIVO DE MERMAS | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
| from sklearn.compose import ColumnTransformer | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.linear_model import LinearRegression | |
| from sklearn.ensemble import RandomForestRegressor | |
| from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error | |
| import datetime as dt | |
| from sqlalchemy import create_engine | |
| import pymysql | |
| from xgboost import XGBRegressor | |
| print("*IMPLEMENTACIÓN DE MODELOS PREDICTIVOS. CASO PREDICTIVO DE MERMAS*") | |
| # Conexión a MySQL | |
| try: | |
| engine = create_engine('mysql+pymysql://graciany:Informatica2022@localhost:3306/mermas_productos') | |
| print("Conexión exitosa a la base de datos.") | |
| except Exception as e: | |
| print(f"Error al conectar a la base de datos: {e}") | |
| # Consulta SQL | |
| query = """ | |
| SELECT | |
| fm.unidad_merma, | |
| fm.monto_merma, | |
| dp.marca, | |
| dp.linea, | |
| dt.fecha, | |
| dt.año, | |
| dl.comuna, | |
| dl.region, | |
| dm.motivo, | |
| dpr.pais_origen | |
| FROM fact_Mermas fm | |
| JOIN dim_Productos dp ON fm.producto_id = dp.id | |
| JOIN dim_Tiempo dt ON fm.tiempo_id = dt.id | |
| JOIN dim_Localizacion dl ON fm.localizacion_id = dl.id | |
| JOIN dim_Motivos dm ON fm.motivo_id = dm.id | |
| JOIN dim_Proveedor dpr ON fm.proveedor_id = dpr.id; | |
| """ | |
| # Cargar los datos | |
| data = pd.read_sql(query, engine) | |
| # Filtrar mermas razonables | |
| data = data[(data['unidad_merma'] >= 0.01) & (data['unidad_merma'] <= 110)] | |
| # Características seleccionadas | |
| features = ["marca", "linea", "fecha", "año", "comuna", "region", "motivo", "pais_origen"] | |
| X = data[features] | |
| y = data['unidad_merma'] | |
| # Dividir los datos | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| # Variables categóricas y numéricas | |
| categorical_features = ['marca', 'linea', 'comuna', 'region', 'motivo', 'pais_origen'] | |
| numeric_features = [] | |
| # Preprocesador | |
| preprocessor = ColumnTransformer( | |
| transformers=[ | |
| ('num', StandardScaler(), numeric_features), | |
| ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features) | |
| ]) | |
| # Modelo 1: Regresión Lineal | |
| pipeline_lr = Pipeline(steps=[ | |
| ('preprocessor', preprocessor), | |
| ('regressor', LinearRegression()) | |
| ]) | |
| # Modelo 2: Random Forest | |
| pipeline_rf = Pipeline(steps=[ | |
| ('preprocessor', preprocessor), | |
| ('regressor', RandomForestRegressor(n_estimators=100, random_state=42)) | |
| ]) | |
| # Modelo 3: XGBoost Regressor | |
| pipeline_xgb = Pipeline(steps=[ | |
| ('preprocessor', preprocessor), | |
| ('regressor', XGBRegressor(n_estimators=100, random_state=42)) | |
| ]) | |
| # Entrenamiento | |
| print("Entrenando Regresión Lineal...") | |
| pipeline_lr.fit(X_train, y_train) | |
| print("Entrenando Random Forest...") | |
| pipeline_rf.fit(X_train, y_train) | |
| print("Modelos entrenados correctamente") | |
| print("Entrenando XGBoost...") | |
| pipeline_xgb.fit(X_train, y_train) | |
| print("Modelo XGBoost entrenado correctamente") | |
| # Predicciones | |
| y_pred_lr = pipeline_lr.predict(X_test) | |
| y_pred_rf = pipeline_rf.predict(X_test) | |
| y_pred_xgb = pipeline_xgb.predict(X_test) | |
| # Métricas | |
| mse_lr = mean_squared_error(y_test, y_pred_lr) | |
| mse_rf = mean_squared_error(y_test, y_pred_rf) | |
| rmse_lr = np.sqrt(mse_lr) | |
| rmse_rf = np.sqrt(mse_rf) | |
| mae_lr = mean_absolute_error(y_test, y_pred_lr) | |
| mae_rf = mean_absolute_error(y_test, y_pred_rf) | |
| r2_lr = r2_score(y_test, y_pred_lr) | |
| r2_rf = r2_score(y_test, y_pred_rf) | |
| mse_xgb = mean_squared_error(y_test, y_pred_xgb) | |
| rmse_xgb = np.sqrt(mse_xgb) | |
| mae_xgb = mean_absolute_error(y_test, y_pred_xgb) | |
| r2_xgb = r2_score(y_test, y_pred_xgb) | |
| # Resultados | |
| results_df = pd.DataFrame({ | |
| 'Valor_Real': y_test, | |
| 'Prediccion_LR': y_pred_lr, | |
| 'Prediccion_RF': y_pred_rf, | |
| 'Error_LR': y_test - y_pred_lr, | |
| 'Error_RF': y_test - y_pred_rf, | |
| 'Error_Porcentual_LR': ((y_test - y_pred_lr) / y_test) * 100, | |
| 'Error_Porcentual_RF': ((y_test - y_pred_rf) / y_test) * 100 | |
| }).reset_index(drop=True) | |
| results_df['Prediccion_XGB'] = y_pred_xgb | |
| results_df['Error_XGB'] = y_test.values - y_pred_xgb | |
| results_df['Error_Porcentual_XGB'] = ((y_test.values - y_pred_xgb) / y_test.values) * 100 | |
| X_test_reset = X_test.reset_index(drop=True) | |
| for feature in X_test.columns: | |
| results_df[feature] = X_test_reset[feature] | |
| results_df = results_df.sort_values('Valor_Real', ascending=False) | |
| # Guardar archivo markdown: Regresión Lineal | |
| with open('prediccion_lr.md', 'w') as f: | |
| f.write('# Resultados de Predicción: Regresión Lineal\n\n') | |
| f.write('## Resumen de Métricas\n\n') | |
| f.write(f'- **R²**: {r2_lr:.4f}\n') | |
| f.write(f'- **RMSE**: {rmse_lr:.2f}\n') | |
| f.write(f'- **MAE**: {mae_lr:.2f}\n\n') | |
| f.write('## Interpretación\n\n') | |
| f.write(f'Este modelo explica aproximadamente el {r2_lr*100:.1f}% de la variabilidad en las mermas.\n\n') | |
| f.write('## Muestra de Predicciones (Top 10)\n\n') | |
| f.write('| # | Valor Real | Predicción | Error | Error % | Marca | Región |\n') | |
| f.write('|---|------------|------------|-------|---------|--------|--------|\n') | |
| for i, row in results_df.head(10).iterrows(): | |
| f.write(f"| {i} | {row['Valor_Real']:.2f} | {row['Prediccion_LR']:.2f} | {row['Error_LR']:.2f} | {row['Error_Porcentual_LR']:.1f}% | {row['marca']} | {row['region']} |\n") | |
| # Guardar archivo markdown: Random Forest | |
| with open('prediccion_rf.md', 'w') as f: | |
| f.write('# Resultados de Predicción: Random Forest\n\n') | |
| f.write('## Resumen de Métricas\n\n') | |
| f.write(f'- **R²**: {r2_rf:.4f}\n') | |
| f.write(f'- **RMSE**: {rmse_rf:.2f}\n') | |
| f.write(f'- **MAE**: {mae_rf:.2f}\n\n') | |
| f.write('## Interpretación\n\n') | |
| f.write(f'Este modelo explica aproximadamente el {r2_rf*100:.1f}% de la variabilidad en las mermas.\n\n') | |
| f.write('## Muestra de Predicciones (Top 10)\n\n') | |
| f.write('| # | Valor Real | Predicción | Error | Error % | Marca | Región |\n') | |
| f.write('|---|------------|------------|-------|---------|--------|--------|\n') | |
| for i, row in results_df.head(10).iterrows(): | |
| f.write(f"| {i} | {row['Valor_Real']:.2f} | {row['Prediccion_RF']:.2f} | {row['Error_RF']:.2f} | {row['Error_Porcentual_RF']:.1f}% | {row['marca']} | {row['region']} |\n") | |
| with open('prediccion_xgb.md', 'w') as f: | |
| f.write('# Resultados de Predicción: XGBoost\n\n') | |
| f.write('## Resumen de Métricas\n\n') | |
| f.write(f'- **R²**: {r2_xgb:.4f}\n') | |
| f.write(f'- **RMSE**: {rmse_xgb:.2f}\n') | |
| f.write(f'- **MAE**: {mae_xgb:.2f}\n\n') | |
| f.write('## Interpretación\n\n') | |
| f.write(f'Este modelo explica aproximadamente el {r2_xgb*100:.1f}% de la variabilidad en las mermas.\n\n') | |
| f.write('## Muestra de Predicciones (Top 10)\n\n') | |
| f.write('| # | Valor Real | Predicción | Error | Error % | Marca | Región |\n') | |
| f.write('|---|------------|------------|-------|---------|--------|--------|\n') | |
| for i, row in results_df.head(10).iterrows(): | |
| f.write(f"| {i} | {row['Valor_Real']:.2f} | {row['Prediccion_XGB']:.2f} | {row['Error_XGB']:.2f} | {row['Error_Porcentual_XGB']:.1f}% | {row['marca']} | {row['region']} |\n") | |
| print("Resultados exportados correctamente a 'prediccion_lr.md' y 'prediccion_rf.md'") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment