Skip to content

Instantly share code, notes, and snippets.

@olivx
Forked from rg3915/FromDict.ipynb
Created December 5, 2018 21:29
Show Gist options
  • Select an option

  • Save olivx/64d657ff67ecfcc38917f5f34cc1495b to your computer and use it in GitHub Desktop.

Select an option

Save olivx/64d657ff67ecfcc38917f5f34cc1495b to your computer and use it in GitHub Desktop.
Annotations of Pandas DataFrame
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import names\n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def gen_names(max_value):\n",
" persons = []\n",
" for _ in range(max_value):\n",
" first_name = names.get_first_name()\n",
" last_name = names.get_last_name()\n",
" full_name = '%s %s' % (first_name, last_name)\n",
" email = '%s@email.com' % first_name.lower()\n",
" ctx = (full_name, email)\n",
" persons.append(ctx)\n",
" return persons"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"names = gen_names(100)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(names, columns=('NAME', 'EMAIL'))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>NAME</th>\n",
" <th>EMAIL</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>Albert Cunningham</td>\n",
" <td>albert@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>Allen Martinez</td>\n",
" <td>allen@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73</th>\n",
" <td>Amalia Mouret</td>\n",
" <td>amalia@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>62</th>\n",
" <td>Amanda Mcmahan</td>\n",
" <td>amanda@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>Ann Rountree</td>\n",
" <td>ann@email.com</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" NAME EMAIL\n",
"68 Albert Cunningham albert@email.com\n",
"72 Allen Martinez allen@email.com\n",
"73 Amalia Mouret amalia@email.com\n",
"62 Amanda Mcmahan amanda@email.com\n",
"28 Ann Rountree ann@email.com"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort_values(by=['EMAIL']).head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>NAME</th>\n",
" <th>EMAIL</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Gregory Crittendon</td>\n",
" <td>gregory@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>Gregory Thomson</td>\n",
" <td>gregory@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>Juan Brown</td>\n",
" <td>juan@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>Juan May</td>\n",
" <td>juan@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>Kathleen Anderson</td>\n",
" <td>kathleen@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>Kathleen Webb</td>\n",
" <td>kathleen@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>Paul Nelson</td>\n",
" <td>paul@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69</th>\n",
" <td>Paul Morris</td>\n",
" <td>paul@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Rene Warthen</td>\n",
" <td>rene@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>Rene Kidd</td>\n",
" <td>rene@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>William Hunter</td>\n",
" <td>william@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>William Hayes</td>\n",
" <td>william@email.com</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" NAME EMAIL\n",
"16 Gregory Crittendon gregory@email.com\n",
"54 Gregory Thomson gregory@email.com\n",
"39 Juan Brown juan@email.com\n",
"86 Juan May juan@email.com\n",
"58 Kathleen Anderson kathleen@email.com\n",
"74 Kathleen Webb kathleen@email.com\n",
"53 Paul Nelson paul@email.com\n",
"69 Paul Morris paul@email.com\n",
"0 Rene Warthen rene@email.com\n",
"24 Rene Kidd rene@email.com\n",
"25 William Hunter william@email.com\n",
"47 William Hayes william@email.com"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"email = df['EMAIL']\n",
"dfd = df[email.isin(email[email.duplicated()])]\n",
"dfd.sort_values(by=['EMAIL'])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"dffinal = df.drop_duplicates('EMAIL').sort_values(by=['EMAIL'])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>NAME</th>\n",
" <th>EMAIL</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>68</td>\n",
" <td>Albert Cunningham</td>\n",
" <td>albert@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>72</td>\n",
" <td>Allen Martinez</td>\n",
" <td>allen@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>73</td>\n",
" <td>Amalia Mouret</td>\n",
" <td>amalia@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>62</td>\n",
" <td>Amanda Mcmahan</td>\n",
" <td>amanda@email.com</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>28</td>\n",
" <td>Ann Rountree</td>\n",
" <td>ann@email.com</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index NAME EMAIL\n",
"0 68 Albert Cunningham albert@email.com\n",
"1 72 Allen Martinez allen@email.com\n",
"2 73 Amalia Mouret amalia@email.com\n",
"3 62 Amanda Mcmahan amanda@email.com\n",
"4 28 Ann Rountree ann@email.com"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dffinal.reset_index().head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Django Shell-Plus",
"language": "python",
"name": "django_extensions"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment