Created
January 12, 2026 07:34
-
-
Save thinkphp/fc5d18c3c0c3812d9a9787cc41276264 to your computer and use it in GitHub Desktop.
cod complet apps
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from sklearn.linear_model import LinearRegression | |
| from sklearn.cluster import KMeans | |
| from sklearn.preprocessing import PolynomialFeatures | |
| from sklearn.impute import SimpleImputer | |
| from sklearn.pipeline import make_pipeline | |
| from sklearn.metrics import r2_score | |
| # ============================================================================ | |
| # ============================================================================ | |
| print("="*70) | |
| print("VERIFICARE STRUCTURĂ DATE NBA2021.CSV") | |
| print("="*70) | |
| df = pd.read_csv('nba2021.csv') | |
| print(f"Shape: {df.shape}") | |
| print(f"\nColoane disponibile cu indici:") | |
| for i, col in enumerate(df.columns): | |
| print(f" [{i:2d}]: {col}") | |
| print(f"\nPrimele 3 linii:\n{df.head(3)}") | |
| print(f"\nValori lipsă:\n{df.isnull().sum()}") | |
| # ============================================================================ | |
| # EXERCIȚIUL 1: Regresie Liniară - MP vs FGA | |
| # ============================================================================ | |
| print("\n" + "="*70) | |
| print("EXERCIȚIUL 1: Regresie Liniară MP vs FGA") | |
| print("="*70) | |
| df = pd.read_csv('nba2021.csv') | |
| print(f"Shape: {df.shape}") | |
| df_clean = df.dropna() | |
| print(f"Shape după ștergere NaN: {df_clean.shape}") | |
| # Extragere folosind numele coloanelor (mai sigur) | |
| if 'MP' in df_clean.columns and 'FGA' in df_clean.columns: | |
| X = df_clean['MP'].values.reshape(-1, 1) | |
| y = df_clean['FGA'].values | |
| print("✓ Folosim coloanele 'MP' și 'FGA'") | |
| else: | |
| print("⚠ Coloanele 'MP' și 'FGA' nu există, folosim indici") | |
| X = df_clean.iloc[:, 7].values.reshape(-1, 1) | |
| y = df_clean.iloc[:, 9].values | |
| print(f"Shape X: {X.shape}, y: {y.shape}") | |
| # Vizualizare | |
| plt.figure(figsize=(10, 6)) | |
| plt.scatter(X, y, alpha=0.5, edgecolors='k', linewidths=0.5) | |
| plt.xlabel('MP (Minutes Played)') | |
| plt.ylabel('FGA (Field Goals Attempted)') | |
| plt.title('Date antrenare: MP vs FGA') | |
| plt.grid(True, alpha=0.3) | |
| plt.tight_layout() | |
| plt.show() | |
| # Model | |
| model = LinearRegression() | |
| model.fit(X, y) | |
| # R² | |
| r2 = model.score(X, y) | |
| print(f"Scor R²: {r2:.4f}") | |
| # Predicții pe 100 valori | |
| X_new = np.linspace(X.min(), X.max(), 100).reshape(-1, 1) | |
| y_pred = model.predict(X_new) | |
| # Vizualizare | |
| plt.figure(figsize=(10, 6)) | |
| plt.scatter(X, y, alpha=0.5, label='Date antrenare', edgecolors='k', linewidths=0.5) | |
| plt.plot(X_new, y_pred, 'r-', linewidth=2, label='Predicție') | |
| plt.xlabel('MP (Minutes Played)') | |
| plt.ylabel('FGA (Field Goals Attempted)') | |
| plt.title('Regresie Liniară: MP vs FGA') | |
| plt.legend() | |
| plt.grid(True, alpha=0.3) | |
| plt.tight_layout() | |
| plt.show() | |
| # Ecuație | |
| print(f"Ecuația: FGA = {model.intercept_:.2f} + {model.coef_[0]:.2f}*MP") | |
| print("(Trebuie: FGA = -1.95 + 0.45*MP)") | |
| # Inversă | |
| X_inv = y.reshape(-1, 1) | |
| y_inv = X.ravel() | |
| model_inv = LinearRegression() | |
| model_inv.fit(X_inv, y_inv) | |
| print(f"Inversă: MP = {model_inv.intercept_:.2f} + {model_inv.coef_[0]:.2f}*FGA") | |
| # ============================================================================ | |
| # EXERCIȚIUL 2: Regresie Liniară cu Imputare - FG vs 2P | |
| # ============================================================================ | |
| print("\n" + "="*70) | |
| print("EXERCIȚIUL 2: Regresie Liniară FG vs 2P (cu imputare)") | |
| print("="*70) | |
| df = pd.read_csv('nba2021.csv') | |
| # Verificare și extragere coloane | |
| if 'FG' in df.columns and '2P' in df.columns: | |
| X = df['FG'].values.reshape(-1, 1) | |
| y = df['2P'].values | |
| print("✓ Folosim coloanele 'FG' și '2P'") | |
| else: | |
| print("⚠ Folosim indici - verificați că sunt corecți!") | |
| data = df.to_numpy() | |
| # Încercăm să găsim coloanele potrivite | |
| # De obicei: FG, FGA, FG%, 3P, 3PA, 3P%, 2P, 2PA, 2P% | |
| X = data[:, 10] # Ajustați | |
| y = data[:, 12] # Ajustați | |
| # Imputare | |
| imputer = SimpleImputer(strategy='mean') | |
| X = imputer.fit_transform(X.reshape(-1, 1)) | |
| y = imputer.fit_transform(y.reshape(-1, 1)).ravel() #aplatizeaza array-ul | |
| # a = np.array([1,2],[3,4]) a.ravel() [1 2 3 4] flatten() | |
| print(f"Shape după imputare - X: {X.shape}, y: {y.shape}") | |
| # Vizualizare | |
| plt.figure(figsize=(10, 6)) | |
| plt.scatter(X, y, alpha=0.5, edgecolors='k', linewidths=0.5) | |
| plt.xlabel('FG (Field Goals)') | |
| plt.ylabel('2P (2-Point Field Goals)') | |
| plt.title('Date antrenare: FG vs 2P') | |
| plt.grid(True, alpha=0.3) | |
| plt.tight_layout() | |
| plt.show() | |
| # Model | |
| model = LinearRegression() | |
| model.fit(X, y) | |
| r2 = model.score(X, y) | |
| print(f"Scor R²: {r2:.4f}") | |
| # Predicții pe 80 valori | |
| X_new = np.linspace(X.min(), X.max(), 80).reshape(-1, 1) | |
| y_pred = model.predict(X_new) | |
| # Vizualizare | |
| plt.figure(figsize=(10, 6)) | |
| plt.scatter(X, y, alpha=0.5, label='Date antrenare', edgecolors='k', linewidths=0.5) | |
| plt.plot(X_new, y_pred, 'r-', linewidth=2, label='Predicție') | |
| plt.xlabel('FG') | |
| plt.ylabel('2P') | |
| plt.title('Regresie Liniară: FG vs 2P') | |
| plt.legend() | |
| plt.grid(True, alpha=0.3) | |
| plt.tight_layout() | |
| plt.show() | |
| print(f"Ecuația: 2P = {model.intercept_:.2f} + {model.coef_[0]:.2f}*FG") | |
| print("(Trebuie: 2P = -0.13 + 0.74*FG)") | |
| # Inversă | |
| X_inv = y.reshape(-1, 1) | |
| y_inv = X.ravel() | |
| model_inv = LinearRegression() | |
| model_inv.fit(X_inv, y_inv) | |
| print(f"Inversă: FG = {model_inv.intercept_:.2f} + {model_inv.coef_[0]:.2f}*2P") | |
| # ============================================================================ | |
| # EXERCIȚIUL 3: Regresie Liniară - 2P vs 2PA | |
| # ============================================================================ | |
| print("\n" + "="*70) | |
| print("EXERCIȚIUL 3: Regresie Liniară 2P vs 2PA") | |
| print("="*70) | |
| df = pd.read_csv('nba2021.csv') | |
| # Extragere cu verificare | |
| if '2P' in df.columns and '2PA' in df.columns: | |
| X = df['2P'].values.reshape(-1, 1) | |
| y = df['2PA'].values | |
| print("✓ Folosim coloanele '2P' și '2PA'") | |
| else: | |
| print("⚠ Folosim indici") | |
| data = df.to_numpy() | |
| imputer = SimpleImputer(strategy='mean') | |
| data_imputed = imputer.fit_transform(data) | |
| X = data_imputed[:, 12].reshape(-1, 1) | |
| y = data_imputed[:, 13] | |
| # Imputare | |
| imputer = SimpleImputer(strategy='mean') | |
| X = imputer.fit_transform(X) | |
| y = imputer.fit_transform(y.reshape(-1, 1)).ravel() | |
| # Model | |
| model = LinearRegression() | |
| model.fit(X, y) | |
| # Predicții | |
| X_new = np.linspace(X.min(), X.max(), 100).reshape(-1, 1) | |
| y_pred = model.predict(X_new) | |
| # Vizualizare | |
| plt.figure(figsize=(10, 6)) | |
| plt.scatter(X, y, alpha=0.5, label='Date antrenare', edgecolors='k', linewidths=0.5) | |
| plt.plot(X_new, y_pred, 'r-', linewidth=2, label='Predicție') | |
| plt.xlabel('2P (2-Point Field Goals Made)') | |
| plt.ylabel('2PA (2-Point Field Goals Attempted)') | |
| plt.title('Regresie Liniară: 2P vs 2PA') | |
| plt.legend() | |
| plt.grid(True, alpha=0.3) | |
| plt.tight_layout() | |
| plt.show() | |
| r2 = model.score(X, y) | |
| print(f"R²: {r2:.4f}") | |
| print(f"Ecuația: 2PA = {model.intercept_:.2f} + {model.coef_[0]:.2f}*2P") | |
| # Inversă | |
| X_inv = y.reshape(-1, 1) | |
| y_inv = X.ravel() | |
| model_inv = LinearRegression() | |
| model_inv.fit(X_inv, y_inv) | |
| print(f"Inversă: 2P = {model_inv.intercept_:.2f} + {model_inv.coef_[0]:.2f}*2PA") | |
| # ============================================================================ | |
| # EXERCIȚIUL 4: Regresie Polinomială - 2P vs FT | |
| # ============================================================================ | |
| print("\n" + "="*70) | |
| print("EXERCIȚIUL 4: Regresie Polinomială 2P vs FT") | |
| print("="*70) | |
| df = pd.read_csv('nba2021.csv') | |
| # Găsim coloanele 2P și FT | |
| if '2P' in df.columns and 'FT' in df.columns: | |
| X = df['2P'].values.reshape(-1, 1) | |
| y = df['FT'].values | |
| # Imputare | |
| imputer = SimpleImputer(strategy='mean') | |
| X = imputer.fit_transform(X) | |
| y = imputer.fit_transform(y.reshape(-1, 1)).ravel() | |
| print("✓ Folosim coloanele '2P' și 'FT'") | |
| else: | |
| print("⚠ Folosim indici - selectăm doar coloane numerice") | |
| # Selectăm doar coloanele numerice | |
| df_numeric = df.select_dtypes(include=[np.number]) | |
| data = df_numeric.to_numpy() | |
| imputer = SimpleImputer(strategy='mean') | |
| data_imputed = imputer.fit_transform(data) | |
| # Găsim indicii pentru 2P și FT în coloanele numerice | |
| numeric_cols = df_numeric.columns.tolist() | |
| print(f"Coloane numerice: {numeric_cols}") | |
| if '2P' in numeric_cols and 'FT' in numeric_cols: | |
| idx_2p = numeric_cols.index('2P') | |
| idx_ft = numeric_cols.index('FT') | |
| X = data_imputed[:, idx_2p].reshape(-1, 1) | |
| y = data_imputed[:, idx_ft] | |
| print(f"✓ Găsit: 2P la index {idx_2p}, FT la index {idx_ft}") | |
| else: | |
| # Fallback cu indici aproximativi | |
| X = data_imputed[:, 10].reshape(-1, 1) # 2P | |
| y = data_imputed[:, 15] # FT | |
| print("⚠ Folosim indici aproximativi - verificați rezultatele") | |
| # Pipeline cu PolynomialFeatures grad 2 | |
| pipeline = make_pipeline( | |
| PolynomialFeatures(degree=2), | |
| LinearRegression() | |
| ) | |
| pipeline.fit(X, y) | |
| # Predicții | |
| X_new = np.linspace(X.min(), X.max(), 100).reshape(-1, 1) | |
| y_pred = pipeline.predict(X_new) | |
| # Vizualizare | |
| plt.figure(figsize=(10, 6)) | |
| plt.scatter(X, y, alpha=0.5, label='Date antrenare', edgecolors='k', linewidths=0.5) | |
| plt.plot(X_new, y_pred, 'r-', linewidth=2, label='Predicție polinomială (grad 2)') | |
| plt.xlabel('2P (2-Point Field Goals)') | |
| plt.ylabel('FT (Free Throws)') | |
| plt.title('Regresie Polinomială: 2P vs FT') | |
| plt.legend() | |
| plt.grid(True, alpha=0.3) | |
| plt.tight_layout() | |
| plt.show() | |
| # Extragere model din pipeline | |
| model_poly = pipeline['linearregression'] | |
| print(f"Coeficienți: {model_poly.coef_}") | |
| print(f"Intercept: {model_poly.intercept_:.4f}") | |
| # R² în două moduri | |
| r2_1 = pipeline.score(X, y) | |
| y_pred_train = pipeline.predict(X) | |
| r2_2 = r2_score(y, y_pred_train) | |
| print(f"R² (metoda 1 - pipeline.score): {r2_1:.4f}") | |
| print(f"R² (metoda 2 - r2_score): {r2_2:.4f}") | |
| # Inversă | |
| print("\n--- Problema inversă: FT -> 2P ---") | |
| X_inv = y.reshape(-1, 1) | |
| y_inv = X.ravel() | |
| pipeline_inv = make_pipeline(PolynomialFeatures(degree=2), LinearRegression()) | |
| pipeline_inv.fit(X_inv, y_inv) | |
| print(f"R²: {pipeline_inv.score(X_inv, y_inv):.4f}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment