-
-
Save emillykkejensen/aa7535c29538a956d5b9c41e31f731a1 to your computer and use it in GitHub Desktop.
| ####################################### | |
| ### -------- Load libraries ------- ### | |
| # Load Huggingface transformers | |
| from transformers import TFBertModel, BertConfig, BertTokenizerFast | |
| # Then what you need from tensorflow.keras | |
| from tensorflow.keras.layers import Input, Dropout, Dense | |
| from tensorflow.keras.models import Model | |
| from tensorflow.keras.optimizers import Adam | |
| from tensorflow.keras.callbacks import EarlyStopping | |
| from tensorflow.keras.initializers import TruncatedNormal | |
| from tensorflow.keras.losses import CategoricalCrossentropy | |
| from tensorflow.keras.metrics import CategoricalAccuracy | |
| from tensorflow.keras.utils import to_categorical | |
| # And pandas for data import + sklearn because you allways need sklearn | |
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| ####################################### | |
| ### --------- Import data --------- ### | |
| # Import data from csv | |
| data = pd.read_csv('dev/Fun with BERT/complaints.csv') | |
| # Select required columns | |
| data = data[['Consumer complaint narrative', 'Product', 'Issue']] | |
| # Remove a row if any of the three remaining columns are missing | |
| data = data.dropna() | |
| # Remove rows, where the label is present only ones (can't be split) | |
| data = data.groupby('Issue').filter(lambda x : len(x) > 1) | |
| data = data.groupby('Product').filter(lambda x : len(x) > 1) | |
| # Set your model output as categorical and save in new label col | |
| data['Issue_label'] = pd.Categorical(data['Issue']) | |
| data['Product_label'] = pd.Categorical(data['Product']) | |
| # Transform your output to numeric | |
| data['Issue'] = data['Issue_label'].cat.codes | |
| data['Product'] = data['Product_label'].cat.codes | |
| # Split into train and test - stratify over Issue | |
| data, data_test = train_test_split(data, test_size = 0.2, stratify = data[['Issue']]) | |
| ####################################### | |
| ### --------- Setup BERT ---------- ### | |
| # Name of the BERT model to use | |
| model_name = 'bert-base-uncased' | |
| # Max length of tokens | |
| max_length = 100 | |
| # Load transformers config and set output_hidden_states to False | |
| config = BertConfig.from_pretrained(model_name) | |
| config.output_hidden_states = False | |
| # Load BERT tokenizer | |
| tokenizer = BertTokenizerFast.from_pretrained(pretrained_model_name_or_path = model_name, config = config) | |
| # Load the Transformers BERT model | |
| transformer_model = TFBertModel.from_pretrained(model_name, config = config) | |
| ####################################### | |
| ### ------- Build the model ------- ### | |
| # TF Keras documentation: https://www.tensorflow.org/api_docs/python/tf/keras/Model | |
| # Load the MainLayer | |
| bert = transformer_model.layers[0] | |
| # Build your model input | |
| input_ids = Input(shape=(max_length,), name='input_ids', dtype='int32') | |
| # attention_mask = Input(shape=(max_length,), name='attention_mask', dtype='int32') | |
| # inputs = {'input_ids': input_ids, 'attention_mask': attention_mask} | |
| inputs = {'input_ids': input_ids} | |
| # Load the Transformers BERT model as a layer in a Keras model | |
| bert_model = bert(inputs)[1] | |
| dropout = Dropout(config.hidden_dropout_prob, name='pooled_output') | |
| pooled_output = dropout(bert_model, training=False) | |
| # Then build your model output | |
| issue = Dense(units=len(data.Issue_label.value_counts()), kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='issue')(pooled_output) | |
| product = Dense(units=len(data.Product_label.value_counts()), kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='product')(pooled_output) | |
| outputs = {'issue': issue, 'product': product} | |
| # And combine it all in a model object | |
| model = Model(inputs=inputs, outputs=outputs, name='BERT_MultiLabel_MultiClass') | |
| # Take a look at the model | |
| model.summary() | |
| ####################################### | |
| ### ------- Train the model ------- ### | |
| # Set an optimizer | |
| optimizer = Adam( | |
| learning_rate=5e-05, | |
| epsilon=1e-08, | |
| decay=0.01, | |
| clipnorm=1.0) | |
| # Set loss and metrics | |
| loss = {'issue': CategoricalCrossentropy(from_logits = True), 'product': CategoricalCrossentropy(from_logits = True)} | |
| metric = {'issue': CategoricalAccuracy('accuracy'), 'product': CategoricalAccuracy('accuracy')} | |
| # Compile the model | |
| model.compile( | |
| optimizer = optimizer, | |
| loss = loss, | |
| metrics = metric) | |
| # Ready output data for the model | |
| y_issue = to_categorical(data['Issue']) | |
| y_product = to_categorical(data['Product']) | |
| # Tokenize the input (takes some time) | |
| x = tokenizer( | |
| text=data['Consumer complaint narrative'].to_list(), | |
| add_special_tokens=True, | |
| max_length=max_length, | |
| truncation=True, | |
| padding=True, | |
| return_tensors='tf', | |
| return_token_type_ids = False, | |
| return_attention_mask = True, | |
| verbose = True) | |
| # Fit the model | |
| history = model.fit( | |
| # x={'input_ids': x['input_ids'], 'attention_mask': x['attention_mask']}, | |
| x={'input_ids': x['input_ids']}, | |
| y={'issue': y_issue, 'product': y_product}, | |
| validation_split=0.2, | |
| batch_size=64, | |
| epochs=10) | |
| ####################################### | |
| ### ----- Evaluate the model ------ ### | |
| # Ready test data | |
| test_y_issue = to_categorical(data_test['Issue']) | |
| test_y_product = to_categorical(data_test['Product']) | |
| test_x = tokenizer( | |
| text=data_test['Consumer complaint narrative'].to_list(), | |
| add_special_tokens=True, | |
| max_length=max_length, | |
| truncation=True, | |
| padding=True, | |
| return_tensors='tf', | |
| return_token_type_ids = False, | |
| return_attention_mask = False, | |
| verbose = True) | |
| # Run evaluation | |
| model_eval = model.evaluate( | |
| x={'input_ids': test_x['input_ids']}, | |
| y={'issue': test_y_issue, 'product': test_y_product} | |
| ) |
Can you upload the model.predict() part? Or how you can apply model.predict() function for this model in a dataframe of Narratives ?
Predict is the same as evaluate, but without target data (y) - see: https://www.tensorflow.org/api_docs/python/tf/keras/Model#predict
I think your tutorial is great. But, similar to other people requesting you to post the prediction code on the towardsdatascience website,
I too could not get predictions, confusion matrix, and classification report working.
Below is my code. I appreciate any help. I am a beginner.
predicted_raw = model.predict({'input_ids':x_test['input_ids']})
y_predicted = numpy.argmax(predicted_raw, axis = 1)
The error is here: y_predicted = numpy.argmax(predicted_raw, axis = 1). The error message says "axis 1 is out of bounds for array of dimension 1" When I change axis to zero. The new error message is "Singleton array 0 cannot be considered a valid collection." I think what the axis=0 error says is that y_predicted is null. I double checked it with an if statement.
Haven't looked at this for a while, so it's not really fresh in memory sorry. But maybe have a look at your data filter process, looks like you might need to remove some missing’s?