Last active
August 26, 2022 08:03
-
-
Save erdogant/d8650ebc8385f889b46e9034d0ac78fc to your computer and use it in GitHub Desktop.
hgboost
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ########################################## | |
| # Import Titanc dataset and preprocessing. | |
| ########################################## | |
| df = hgb.import_example(data='titanic') | |
| print(df) | |
| # PassengerId Survived Pclass ... Fare Cabin Embarked | |
| # 0 1 0 3 ... 7.2500 NaN S | |
| # 1 2 1 1 ... 71.2833 C85 C | |
| # 2 3 1 3 ... 7.9250 NaN S | |
| # 3 4 1 1 ... 53.1000 C123 S | |
| # 4 5 0 3 ... 8.0500 NaN S | |
| # .. ... ... ... ... ... ... ... | |
| # 886 887 0 2 ... 13.0000 NaN S | |
| # 887 888 1 1 ... 30.0000 B42 S | |
| # 888 889 0 3 ... 23.4500 NaN S | |
| # 889 890 1 1 ... 30.0000 C148 C | |
| # 890 891 0 3 ... 7.7500 NaN Q | |
| # [891 rows x 12 columns] | |
| ################################################################## | |
| # Set Target value: Age | |
| y = df['Age'].values | |
| print(y) | |
| # [22. 38. 26. 35. 35. 54. 2. 27. 14. ... ] | |
| ################################################################## | |
| # Remove features | |
| df.drop(['Age', 'PassengerId', 'Name'], axis=1, inplace=True) | |
| ################################################################## | |
| # One-hot encoding | |
| X = hgb.preprocessing(df, verbose=0) | |
| ################################################################## | |
| # Remove missing values | |
| import numpy as np | |
| I = ~np.isnan(y) | |
| X = X.loc[I, :] | |
| y = y[I] | |
| ################################################################## | |
| print(X) | |
| # Survived_1.0 Pclass_1.0 ... Embarked_Q Embarked_S | |
| # 0 False False ... False True | |
| # 1 True True ... False False | |
| # 2 True False ... False True | |
| # 3 True True ... False True | |
| # 4 False False ... False True | |
| # .. ... ... ... ... ... | |
| # 885 False False ... True False | |
| # 886 False False ... False True | |
| # 887 True True ... False True | |
| # 889 True True ... False False | |
| # 890 False False ... True False | |
| # [714 rows x 203 columns] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment