Goals: Add links that are reasonable and good explanations of how stuff works. No hype and no vendor content if possible. Practical first-hand accounts of models in prod eagerly sought.
| """ Enhancing Intent Classification with the Universal Sentence Encoder: | |
| https://medium.com/scalableminds/enhancing-intent-classification-with-the-universal-sentence-encoder-ecbcd7a3005c | |
| """ | |
| from rasa_nlu.featurizers import Featurizer | |
| import tensorflow_hub as hub | |
| import tensorflow as tf | |
| ''' Script for downloading all GLUE data. | |
| Note: for legal reasons, we are unable to host MRPC. | |
| You can either use the version hosted by the SentEval team, which is already tokenized, | |
| or you can download the original data from (https://download.microsoft.com/download/D/4/6/D46FF87A-F6B9-4252-AA8B-3604ED519838/MSRParaphraseCorpus.msi) and extract the data from it manually. | |
| For Windows users, you can run the .msi file. For Mac and Linux users, consider an external library such as 'cabextract' (see below for an example). | |
| You should then rename and place specific files in a folder (see below for an example). | |
| mkdir MRPC | |
| cabextract MSRParaphraseCorpus.msi -d MRPC |
| # https://nlpforhackers.io/named-entity-extraction/ | |
| import os | |
| import string | |
| import collections | |
| import pickle | |
| from collections import Iterable | |
| from nltk.tag import ClassifierBasedTagger | |
| from nltk.chunk import ChunkParserI, conlltags2tree, tree2conlltags |
| from __future__ import print_function | |
| import json | |
| import os | |
| import numpy as np | |
| from gensim.models import Word2Vec | |
| from gensim.utils import simple_preprocess | |
| from keras.engine import Input | |
| from keras.layers import Embedding, merge |