Skip to content

Instantly share code, notes, and snippets.

View lucasastorian's full-sized avatar

Lucas Astorian lucasastorian

View GitHub Profile
import os
import json
import openai
openai.api_key = os.environ['OPENAI_API_KEY']
# My Details
NAME = "Lucas Astorian"
FIRST_NAME = "Lucas"
EMAIL = "lucas@distcorp.com"
plt.figure(figsize=(10, feature_importance.shape[0] / 5))
feature_importance.sort_values('mean', ascending=True, inplace=True)
feature_importance['mean'].plot(kind='barh', color='b', alpha=0.25, xerr=feature_importance['std'], error_kw={'ecolor': 'r'}, label="Neg. Log Loss Importance")
feature_importances = pd.DataFrame(columns=['mean', 'std'])
for feature in features:
# Calculate an array of OOS CV scores (placeholder function)
cv_scores = get_cv_scores(rf, X[feature], y, sample_weights)
# Set the mean and std OOS score for each feature
feature_importances[feature, 'mean'] = cv_scores.mean()
feature_importances[feature, 'std'] = cv_scores.std() * cv_scores.shape[0]**-.5
import numpy as np
from sklearn.metrics import log_loss
baseline_scores = pd.Series()
permuted_scores = pd.DataFrame(columns=features)
for i, (train, test) in enumerate(cv.split()):
X_train, y_train, w_train = X.iloc[train, :], y.iloc[train], sample_weights.iloc[train]
X_val, y_val = X.iloc[val, :], y.iloc[val]
import pandas as pd
import numpy as np
total_mdi_scores = pd.DataFrame(columns=features)
for i, (train, test) in enumerate(cv.split()):
rf.fit(X[train], y[train], sample_weights=weights[train])
# Create a DataFrame with the total loss per feature (columns) per tree (rows)
mdi_scores = [i: tree.feature_importances_ for i, tree in enumerate(rf.estimators_)]
def calculate_vpin(buy_volume: pd.Series, sell_volume: pd.Series, window_size: int) -> pd.Series:
"""Calculates VPIN over a historical window
Args:
buy_volume (pd.Series): A Series of containing the classified Buy-Volume in each Volume-Bar
sell_volume (pd.Series): A Series containing the classified Sell-Volume in each Volume-Bar
window_size (int): The historical window over which to estimate VPIN
Returns:
VPIN (pd.Series): A Series containing the calculated VPIN values
@lucasastorian
lucasastorian / modeling_activations_with_lightgbm.py
Last active March 2, 2020 12:17
Use LightGBM to model the normalized activations of the Autoencoder
import lightgbm as lgb
params = {
'max_depth': 7,
'eta': 0.025,
'objective': 'multiclass',
'num_class': 5,
'metric' : 'multi_logloss',
'num_leaves' : 45,
'bagging_fraction' : 0.5,
@lucasastorian
lucasastorian / extract_autoencoder_activations.py
Last active March 2, 2020 12:22
Extract new features from all the layer-wise activations of a trained autoencoder
import lightgbm as lgb
# compute list of activations for each hidden dense layer
layer_outputs = [layer.output for layer in autoencoder.layers if 'dense' in layer.name]
activation_model = tf.keras.models.Model(inputs=autoencoder.input, outputs=layer_outputs)
# compute training and validation activations
train_activations = activation_model.predict(X_train_standard)
val_activations = activation_model.predict(X_val_standard)
@lucasastorian
lucasastorian / compile_and_train.py
Last active March 1, 2020 17:05
Compiling and Training the Autoencoder
def lr_scheduler1(epoch, lr):
return lr * (0.995 ** epoch)
training_generator = model.DAEGenerator(features=X_train_all_standard,
batch_size=256,
swap_noise_rate=0.15)
autoencoder.compile(loss='mean_squared_error', optimizer=optimizers.Adam(lr=0.0005))
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20, min_delta=0.001)
@lucasastorian
lucasastorian / self-normalizing-autoencoder.py
Last active February 26, 2020 11:36
A Self-Normalizing Denoising Autoencoder Architecture
input_nodes = 350
code_nodes = 1500
hidden_nodes = 1500
dropout_rate = 0.25
input_layer = layers.Input(shape=(input_nodes,))
hidden_1 = layers.Dense(hidden_nodes, activation='selu', kernel_initializer='lecun_normal')(input_layer)
dropout_1 = layers.AlphaDropout(dropout_rate)(hidden_1)
code_layer = layers.Dense(code_nodes, activation='selu', kernel_initializer='lecun_normal')(dropout_1)
dropout_2 = layers.AlphaDropout(dropout_rate)(code_layer)