Last active
August 24, 2022 10:49
-
-
Save erdogant/769314eed45d1a175b0241760099f00d to your computer and use it in GitHub Desktop.
hgboost
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ####################################################### | |
| # Import Titanic dataset, and preprocessing | |
| ####################################################### | |
| # Import the Titanic dataset | |
| df = hgb.import_example(data='titanic') | |
| print(df) | |
| # PassengerId Survived Pclass ... Fare Cabin Embarked | |
| # 0 1 0 3 ... 7.2500 NaN S | |
| # 1 2 1 1 ... 71.2833 C85 C | |
| # 2 3 1 3 ... 7.9250 NaN S | |
| # 3 4 1 1 ... 53.1000 C123 S | |
| # 4 5 0 3 ... 8.0500 NaN S | |
| # .. ... ... ... ... ... ... ... | |
| # 886 887 0 2 ... 13.0000 NaN S | |
| # 887 888 1 1 ... 30.0000 B42 S | |
| # 888 889 0 3 ... 23.4500 NaN S | |
| # 889 890 1 1 ... 30.0000 C148 C | |
| # 890 891 0 3 ... 7.7500 NaN Q | |
| # [891 rows x 12 columns] | |
| ####################################################### | |
| # Set target value | |
| y = df['Survived'].values | |
| print(y) | |
| # array([0, 1, 1, 1, 0, 0, 0, ..., 0, 1, 1, 1, 1, 0, 0]) | |
| ####################################################### | |
| # Remove features | |
| df.drop(['Survived', 'PassengerId', 'Name'], axis=1, inplace=True) | |
| ####################################################### | |
| # One-hot encoding | |
| X = hgb.preprocessing(df) | |
| ####################################################### | |
| # Remove missing values | |
| import numpy as np | |
| I = ~np.isnan(y) | |
| X = X.loc[I, :] | |
| y = y[I] | |
| ####################################################### | |
| print(X) | |
| # Pclass_1.0 Pclass_2.0 Pclass_3.0 ... Embarked_None Embarked_Q Embarked_S | |
| # 0 False False True ... False False True | |
| # 1 True False False ... False False False | |
| # 2 False False True ... False False True | |
| # 3 True False False ... False False True | |
| # 4 False False True ... False False True | |
| # .. ... ... ... ... ... ... ... | |
| # 886 False True False ... False False True | |
| # 887 True False False ... False False True | |
| # 888 False False True ... False False True | |
| # 889 True False False ... False False False | |
| # 890 False False True ... False True False | |
| # [891 rows x 203 columns] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment