Skip to content

Instantly share code, notes, and snippets.

@olivx
Forked from rg3915/FromDict.ipynb
Created December 5, 2018 21:29
Show Gist options
  • Select an option

  • Save olivx/64d657ff67ecfcc38917f5f34cc1495b to your computer and use it in GitHub Desktop.

Select an option

Save olivx/64d657ff67ecfcc38917f5f34cc1495b to your computer and use it in GitHub Desktop.
Annotations of Pandas DataFrame
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import timeit\n",
"from django.contrib.auth.models import User"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"max_value = 5"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"tic = timeit.default_timer()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"filename = '/tmp/example.xlsx'"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_excel(filename)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>NOME</th>\n",
" <th>EMAIL</th>\n",
" <th>CPF</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Alex Swartz Wertz</td>\n",
" <td>alex.swartz.wertz@email.com</td>\n",
" <td>00000000000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Valerie Berry Mattlin</td>\n",
" <td>valerie.berry.mattlin@email.com</td>\n",
" <td>00000000000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Brooke Coley Roy</td>\n",
" <td>brooke.coley.roy@email.com</td>\n",
" <td>00000000000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Joe Levin Collins</td>\n",
" <td>joe.levin.collins@email.com</td>\n",
" <td>00000000000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Lynn Alford Timmons</td>\n",
" <td>lynn.alford.timmons@email.com</td>\n",
" <td>00000000000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" NOME EMAIL CPF\n",
"0 Alex Swartz Wertz alex.swartz.wertz@email.com 00000000000000\n",
"1 Valerie Berry Mattlin valerie.berry.mattlin@email.com 00000000000000\n",
"2 Brooke Coley Roy brooke.coley.roy@email.com 00000000000000\n",
"3 Joe Levin Collins joe.levin.collins@email.com 00000000000000\n",
"4 Lynn Alford Timmons lynn.alford.timmons@email.com 00000000000000"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"email = df['EMAIL']\n",
"# Retornando os itens duplicados\n",
"# df[email.isin(email[email.duplicated()])]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>NOME</th>\n",
" <th>EMAIL</th>\n",
" <th>CPF</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>49991</th>\n",
" <td>49995</td>\n",
" <td>Warren Stowell Young</td>\n",
" <td>warren.stowell.young@email.com</td>\n",
" <td>00000000000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49992</th>\n",
" <td>49996</td>\n",
" <td>Patricia Cheek Mitchell</td>\n",
" <td>patricia.cheek.mitchell@email.com</td>\n",
" <td>00000000000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49993</th>\n",
" <td>49997</td>\n",
" <td>Linda Denton Breton</td>\n",
" <td>linda.denton.breton@email.com</td>\n",
" <td>00000000000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49994</th>\n",
" <td>49998</td>\n",
" <td>Clyde Lopes Leonard</td>\n",
" <td>clyde.lopes.leonard@email.com</td>\n",
" <td>00000000000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49995</th>\n",
" <td>49999</td>\n",
" <td>Josefina Wilkinson Graves</td>\n",
" <td>josefina.wilkinson.graves@email.com</td>\n",
" <td>00000000000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index NOME EMAIL \\\n",
"49991 49995 Warren Stowell Young warren.stowell.young@email.com \n",
"49992 49996 Patricia Cheek Mitchell patricia.cheek.mitchell@email.com \n",
"49993 49997 Linda Denton Breton linda.denton.breton@email.com \n",
"49994 49998 Clyde Lopes Leonard clyde.lopes.leonard@email.com \n",
"49995 49999 Josefina Wilkinson Graves josefina.wilkinson.graves@email.com \n",
"\n",
" CPF \n",
"49991 00000000000000 \n",
"49992 00000000000000 \n",
"49993 00000000000000 \n",
"49994 00000000000000 \n",
"49995 00000000000000 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Removendo os itens duplicados\n",
"dffinal = df.drop_duplicates('EMAIL').reset_index()\n",
"dffinal.tail()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"49996"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(dffinal)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def create_user2(new_users):\n",
" # Separa full_name em first_name e last_name\n",
" users = []\n",
" for new_user in new_users:\n",
" full_name = new_user['full_name']\n",
" email = new_user['email']\n",
" first_name = full_name.split()[0]\n",
" last_name = full_name.split()[1:]\n",
" last_name = ' '.join(last_name)\n",
" username = email\n",
" sha_password = 'kkkkkkkkkkkkkkkkkkkk'\n",
" user = User(\n",
" first_name=first_name,\n",
" last_name=last_name,\n",
" email=email,\n",
" username=username,\n",
" password=sha_password\n",
" )\n",
" users.append(user)\n",
" return users"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"def import_users2(new_users):\n",
" User.objects.bulk_create(new_users)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"new_users = []\n",
"emails = []\n",
"emails = dffinal['EMAIL'].values.tolist()\n",
"exist_users = User.objects.filter(email__in=emails).values_list('email', flat=True)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# for row in dffinal.head().itertuples():\n",
"# print(row.NOME, row.EMAIL, row.CPF)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"for row in dffinal.head(max_value).itertuples():\n",
" full_name = row.NOME\n",
" email = row.EMAIL\n",
" cpf = row.CPF\n",
" if email not in exist_users:\n",
" data = dict(full_name=full_name, email=email, cpf=cpf)\n",
" new_users.append(data)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"new_users_ = create_user2(new_users)\n",
"import_users2(new_users_)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(0, 'existentes')\n",
"(5, 'novos')\n"
]
}
],
"source": [
"print(len(exist_users), 'existentes')\n",
"print(len(new_users), 'novos')"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"toc = timeit.default_timer()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2.6345059871673584"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"toc - tic"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"users_query_list = User.objects.filter(email__in=new_users_)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"# users_query_list[:5]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"5"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"simpleuser = Group.objects.get(name='simpleuser')\n",
"simpleuser.id"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"user_groups_list = []\n",
"# https://stackoverflow.com/a/34093544\n",
"for user in users_query_list:\n",
" user_groups = User.groups.through(user_id=user.pk, group_id=simpleuser.pk)\n",
" user_groups_list.append(user_groups)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<User_groups: User_groups object>,\n",
" <User_groups: User_groups object>,\n",
" <User_groups: User_groups object>,\n",
" <User_groups: User_groups object>,\n",
" <User_groups: User_groups object>]"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"User.groups.through.objects.bulk_create(user_groups_list)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"toc = timeit.default_timer()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3.3119399547576904"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"toc - tic"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"# UserCPF\n",
"exist_usercpfs = UserCPF.objects.filter(user__email__in=emails).values_list('user__email', flat=True)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"alex.swartz.wertz@email.com\n",
"valerie.berry.mattlin@email.com\n",
"brooke.coley.roy@email.com\n",
"joe.levin.collins@email.com\n",
"lynn.alford.timmons@email.com\n"
]
}
],
"source": [
"new_usercpfs = []\n",
"for row in dffinal.head(max_value).itertuples():\n",
" email = row.EMAIL\n",
" cpf = row.CPF\n",
" # Verifica se UserCPF ja existe\n",
" if email not in exist_usercpfs:\n",
" print(email)\n",
" user = User.objects.get(email=email)\n",
" data = dict(user=user, cpf=cpf)\n",
" new_usercpfs.append(data)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"# função create_usercpf(new_usercpfs)\n",
"new_usercpfs2 = []\n",
"for user in new_usercpfs:\n",
" new_usercpfs2.append(UserCPF(user=user['user'], cpf=user['cpf']))"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<UserCPF: Alex Swartz Wertz>,\n",
" <UserCPF: Valerie Berry Mattlin>,\n",
" <UserCPF: Brooke Coley Roy>,\n",
" <UserCPF: Joe Levin Collins>,\n",
" <UserCPF: Lynn Alford Timmons>]"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"UserCPF.objects.bulk_create(new_usercpfs2)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"toc = timeit.default_timer()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4.112852096557617"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"toc - tic"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"# Employee\n",
"firm = Company.objects.get(name='Empresa')\n",
"exist_employees = Employee.objects.filter(user__email__in=emails).values_list('user__email', flat=True)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Company: Empresa>"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"firm"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"new_employees = []\n",
"for row in dffinal.head(max_value).itertuples():\n",
" email = row.EMAIL\n",
" if email not in exist_employees:\n",
" user = User.objects.get(email=email)\n",
" data = dict(user=user, firm=firm, department='14')\n",
" new_employees.append(data)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'department': '14', 'firm': <Company: Empresa>, 'user': <User: alex.swartz.wertz@email.com>}\n",
"{'department': '14', 'firm': <Company: Empresa>, 'user': <User: valerie.berry.mattlin@email.com>}\n",
"{'department': '14', 'firm': <Company: Empresa>, 'user': <User: brooke.coley.roy@email.com>}\n",
"{'department': '14', 'firm': <Company: Empresa>, 'user': <User: joe.levin.collins@email.com>}\n",
"{'department': '14', 'firm': <Company: Empresa>, 'user': <User: lynn.alford.timmons@email.com>}\n"
]
}
],
"source": [
"# função create_employee(new_employees)\n",
"new_employees2 = []\n",
"for user in new_employees:\n",
" print(user)\n",
" new_employees2.append(Employee(user=user['user'], firm=user['firm'], department=user['department']))"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<Employee: Alex Swartz Wertz>,\n",
" <Employee: Valerie Berry Mattlin>,\n",
" <Employee: Brooke Coley Roy>,\n",
" <Employee: Joe Levin Collins>,\n",
" <Employee: Lynn Alford Timmons>]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Employee.objects.bulk_create(new_employees2)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"# Permissões\n",
"def create_permissions():\n",
" # Criando permissão view_manage_career_pages\n",
" content_type = ContentType.objects.get_for_model(Company)\n",
" permission = Permission.objects.get(\n",
" content_type=content_type,\n",
" codename='view_manage_career_pages'\n",
" )\n",
" # Habilitando permissão para simpleuser\n",
" g = Group.objects.get(name='simpleuser')\n",
" g.permissions.add(permission)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"def create_permissions_quiz():\n",
" # Criando permissão add_quiz\n",
" content_type = ContentType.objects.get_for_model(Quiz)\n",
" permission = Permission.objects.get(\n",
" content_type=content_type,\n",
" codename='add_quiz'\n",
" )\n",
" # Habilitando permissão para simpleuser\n",
" g = Group.objects.get(name='simpleuser')\n",
" g.permissions.add(permission)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"create_permissions()"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"create_permissions_quiz()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"toc = timeit.default_timer()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"5.437417984008789"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"toc - tic"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Django Shell-Plus",
"language": "python",
"name": "django_extensions"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment