Created
September 14, 2017 15:21
-
-
Save nzw0301/b569c5f73616c58220176fe8576dd2fd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from gensim.models import word2vec\n", | |
| "from gensim.models.keyedvectors import KeyedVectors\n", | |
| "import numpy as np" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "word_vectors = KeyedVectors.load_word2vec_format('./src/main/resources/text8.vec', binary=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 21, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# word_vectors = KeyedVectors.load_word2vec_format('./src/main/resources/enwiki9.txt.vec', binary=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 22, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[('daughter', 0.5485425591468811),\n", | |
| " ('empress', 0.5350010991096497),\n", | |
| " ('prince', 0.5310357809066772),\n", | |
| " ('throne', 0.5217003226280212),\n", | |
| " ('son', 0.517214298248291),\n", | |
| " ('queen', 0.5135539770126343),\n", | |
| " ('minamoto', 0.506763756275177),\n", | |
| " ('viii', 0.5048807859420776),\n", | |
| " ('mary', 0.498077392578125),\n", | |
| " ('heir', 0.49674057960510254)]" | |
| ] | |
| }, | |
| "execution_count": 22, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "word_vectors.most_similar(positive=['woman', 'king'], negative=['man'], topn=10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 23, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[('daughter', 0.5485425591468811),\n", | |
| " ('empress', 0.5350010991096497),\n", | |
| " ('prince', 0.5310357809066772),\n", | |
| " ('throne', 0.5217003226280212),\n", | |
| " ('son', 0.517214298248291),\n", | |
| " ('queen', 0.5135539770126343),\n", | |
| " ('minamoto', 0.506763756275177),\n", | |
| " ('viii', 0.5048807859420776),\n", | |
| " ('mary', 0.498077392578125),\n", | |
| " ('heir', 0.49674057960510254)]" | |
| ] | |
| }, | |
| "execution_count": 23, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "word_vectors.most_similar(positive=['woman', 'king'], negative=['man'], topn=10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 24, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[('file', 0.67220139503479),\n", | |
| " ('compression', 0.6693793535232544),\n", | |
| " ('stored', 0.6680015921592712),\n", | |
| " ('files', 0.6367778778076172),\n", | |
| " ('routing', 0.6301351189613342),\n", | |
| " ('ip', 0.6274353265762329),\n", | |
| " ('format', 0.6236443519592285),\n", | |
| " ('packet', 0.6220601797103882),\n", | |
| " ('bits', 0.619074821472168),\n", | |
| " ('ipv', 0.6162351965904236)]" | |
| ] | |
| }, | |
| "execution_count": 24, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "word_vectors.most_similar(positive=['data'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 30, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[('elizabeth', 0.7907043695449829),\n", | |
| " ('prince', 0.6521976590156555),\n", | |
| " ('king', 0.636025607585907),\n", | |
| " ('princess', 0.6347065567970276),\n", | |
| " ('anne', 0.6185387969017029),\n", | |
| " ('crown', 0.6120067834854126),\n", | |
| " ('monarch', 0.6079266667366028),\n", | |
| " ('victoria', 0.6078702211380005),\n", | |
| " ('isabella', 0.6061561107635498),\n", | |
| " ('majesty', 0.6058646440505981)]" | |
| ] | |
| }, | |
| "execution_count": 30, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "word_vectors.most_similar(positive=['queen'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 26, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[('france', 0.9623157382011414),\n", | |
| " ('hirohito', 0.9062392115592957),\n", | |
| " ('imposition', 0.8845174312591553),\n", | |
| " ('mountbatten', 0.8710086941719055),\n", | |
| " ('feudal', 0.8627457022666931),\n", | |
| " ('formalised', 0.8624369502067566),\n", | |
| " ('discuss', 0.8617070913314819),\n", | |
| " ('revolutionary', 0.8540736436843872),\n", | |
| " ('overturn', 0.8520448207855225),\n", | |
| " ('carthaginian', 0.8508167266845703),\n", | |
| " ('sanction', 0.8474399447441101),\n", | |
| " ('germany', 0.8420551419258118),\n", | |
| " ('declared', 0.8412027359008789),\n", | |
| " ('italy', 0.8404319882392883),\n", | |
| " ('stalin', 0.837766170501709),\n", | |
| " ('tried', 0.83773273229599),\n", | |
| " ('napoleon', 0.8374594449996948),\n", | |
| " ('portugal', 0.8338634371757507),\n", | |
| " ('intervene', 0.8285016417503357),\n", | |
| " ('unification', 0.8258130550384521),\n", | |
| " ('informed', 0.8245867490768433),\n", | |
| " ('impetus', 0.8214332461357117),\n", | |
| " ('ambitions', 0.8208622336387634),\n", | |
| " ('miko', 0.8205797076225281),\n", | |
| " ('undertake', 0.8186885118484497),\n", | |
| " ('possessions', 0.8185335397720337),\n", | |
| " ('kimmei', 0.8183091878890991),\n", | |
| " ('warrant', 0.8182796239852905),\n", | |
| " ('nihonshoki', 0.8158888816833496),\n", | |
| " ('accession', 0.813630998134613)]" | |
| ] | |
| }, | |
| "execution_count": 26, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "word_vectors.most_similar_cosmul(positive=['paris', 'japan'], negative=['tokyo'], topn=30)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 29, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[('france', 0.5960692167282104),\n", | |
| " ('italy', 0.4693019390106201),\n", | |
| " ('germany', 0.4654536843299866),\n", | |
| " ('portugal', 0.4551469683647156),\n", | |
| " ('hirohito', 0.4550001323223114),\n", | |
| " ('napoleon', 0.4326018989086151),\n", | |
| " ('mountbatten', 0.42282170057296753),\n", | |
| " ('china', 0.41148364543914795),\n", | |
| " ('ambitions', 0.40866583585739136),\n", | |
| " ('revolutionary', 0.40459465980529785),\n", | |
| " ('kimmei', 0.40294909477233887),\n", | |
| " ('vienna', 0.40291261672973633),\n", | |
| " ('ferdinand', 0.3996574580669403),\n", | |
| " ('feudal', 0.39800015091896057),\n", | |
| " ('miko', 0.3969685137271881),\n", | |
| " ('korea', 0.3895190358161926),\n", | |
| " ('kojiki', 0.3892173171043396),\n", | |
| " ('declared', 0.3865616023540497),\n", | |
| " ('nihonshoki', 0.3864176273345947),\n", | |
| " ('kun', 0.38556843996047974),\n", | |
| " ('discuss', 0.3840706944465637),\n", | |
| " ('netherlands', 0.3836057186126709),\n", | |
| " ('possessions', 0.38359445333480835),\n", | |
| " ('unification', 0.38304412364959717),\n", | |
| " ('stalin', 0.37952858209609985),\n", | |
| " ('jing', 0.37798911333084106),\n", | |
| " ('isabella', 0.37629127502441406),\n", | |
| " ('britain', 0.37476783990859985),\n", | |
| " ('habsburgs', 0.37454771995544434),\n", | |
| " ('treaty', 0.37411004304885864)]" | |
| ] | |
| }, | |
| "execution_count": 29, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "word_vectors.most_similar(positive=['paris', 'japan'], negative=['tokyo'], topn=30)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.2" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment