-
-
Save rg3915/5fb3a2e7338115bc92e82b7a9a2b372b to your computer and use it in GitHub Desktop.
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import timeit\n", | |
| "from django.contrib.auth.models import User" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "max_value = 5" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "tic = timeit.default_timer()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "filename = '/tmp/example.xlsx'" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df = pd.read_excel(filename)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>NOME</th>\n", | |
| " <th>EMAIL</th>\n", | |
| " <th>CPF</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>Alex Swartz Wertz</td>\n", | |
| " <td>alex.swartz.wertz@email.com</td>\n", | |
| " <td>00000000000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Valerie Berry Mattlin</td>\n", | |
| " <td>valerie.berry.mattlin@email.com</td>\n", | |
| " <td>00000000000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Brooke Coley Roy</td>\n", | |
| " <td>brooke.coley.roy@email.com</td>\n", | |
| " <td>00000000000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Joe Levin Collins</td>\n", | |
| " <td>joe.levin.collins@email.com</td>\n", | |
| " <td>00000000000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Lynn Alford Timmons</td>\n", | |
| " <td>lynn.alford.timmons@email.com</td>\n", | |
| " <td>00000000000000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " NOME EMAIL CPF\n", | |
| "0 Alex Swartz Wertz alex.swartz.wertz@email.com 00000000000000\n", | |
| "1 Valerie Berry Mattlin valerie.berry.mattlin@email.com 00000000000000\n", | |
| "2 Brooke Coley Roy brooke.coley.roy@email.com 00000000000000\n", | |
| "3 Joe Levin Collins joe.levin.collins@email.com 00000000000000\n", | |
| "4 Lynn Alford Timmons lynn.alford.timmons@email.com 00000000000000" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "email = df['EMAIL']\n", | |
| "# Retornando os itens duplicados\n", | |
| "# df[email.isin(email[email.duplicated()])]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>index</th>\n", | |
| " <th>NOME</th>\n", | |
| " <th>EMAIL</th>\n", | |
| " <th>CPF</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>49991</th>\n", | |
| " <td>49995</td>\n", | |
| " <td>Warren Stowell Young</td>\n", | |
| " <td>warren.stowell.young@email.com</td>\n", | |
| " <td>00000000000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>49992</th>\n", | |
| " <td>49996</td>\n", | |
| " <td>Patricia Cheek Mitchell</td>\n", | |
| " <td>patricia.cheek.mitchell@email.com</td>\n", | |
| " <td>00000000000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>49993</th>\n", | |
| " <td>49997</td>\n", | |
| " <td>Linda Denton Breton</td>\n", | |
| " <td>linda.denton.breton@email.com</td>\n", | |
| " <td>00000000000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>49994</th>\n", | |
| " <td>49998</td>\n", | |
| " <td>Clyde Lopes Leonard</td>\n", | |
| " <td>clyde.lopes.leonard@email.com</td>\n", | |
| " <td>00000000000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>49995</th>\n", | |
| " <td>49999</td>\n", | |
| " <td>Josefina Wilkinson Graves</td>\n", | |
| " <td>josefina.wilkinson.graves@email.com</td>\n", | |
| " <td>00000000000000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " index NOME EMAIL \\\n", | |
| "49991 49995 Warren Stowell Young warren.stowell.young@email.com \n", | |
| "49992 49996 Patricia Cheek Mitchell patricia.cheek.mitchell@email.com \n", | |
| "49993 49997 Linda Denton Breton linda.denton.breton@email.com \n", | |
| "49994 49998 Clyde Lopes Leonard clyde.lopes.leonard@email.com \n", | |
| "49995 49999 Josefina Wilkinson Graves josefina.wilkinson.graves@email.com \n", | |
| "\n", | |
| " CPF \n", | |
| "49991 00000000000000 \n", | |
| "49992 00000000000000 \n", | |
| "49993 00000000000000 \n", | |
| "49994 00000000000000 \n", | |
| "49995 00000000000000 " | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Removendo os itens duplicados\n", | |
| "dffinal = df.drop_duplicates('EMAIL').reset_index()\n", | |
| "dffinal.tail()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "49996" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "len(dffinal)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def create_user2(new_users):\n", | |
| " # Separa full_name em first_name e last_name\n", | |
| " users = []\n", | |
| " for new_user in new_users:\n", | |
| " full_name = new_user['full_name']\n", | |
| " email = new_user['email']\n", | |
| " first_name = full_name.split()[0]\n", | |
| " last_name = full_name.split()[1:]\n", | |
| " last_name = ' '.join(last_name)\n", | |
| " username = email\n", | |
| " sha_password = 'kkkkkkkkkkkkkkkkkkkk'\n", | |
| " user = User(\n", | |
| " first_name=first_name,\n", | |
| " last_name=last_name,\n", | |
| " email=email,\n", | |
| " username=username,\n", | |
| " password=sha_password\n", | |
| " )\n", | |
| " users.append(user)\n", | |
| " return users" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def import_users2(new_users):\n", | |
| " User.objects.bulk_create(new_users)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "new_users = []\n", | |
| "emails = []\n", | |
| "emails = dffinal['EMAIL'].values.tolist()\n", | |
| "exist_users = User.objects.filter(email__in=emails).values_list('email', flat=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# for row in dffinal.head().itertuples():\n", | |
| "# print(row.NOME, row.EMAIL, row.CPF)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "for row in dffinal.head(max_value).itertuples():\n", | |
| " full_name = row.NOME\n", | |
| " email = row.EMAIL\n", | |
| " cpf = row.CPF\n", | |
| " if email not in exist_users:\n", | |
| " data = dict(full_name=full_name, email=email, cpf=cpf)\n", | |
| " new_users.append(data)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "new_users_ = create_user2(new_users)\n", | |
| "import_users2(new_users_)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "(0, 'existentes')\n", | |
| "(5, 'novos')\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print(len(exist_users), 'existentes')\n", | |
| "print(len(new_users), 'novos')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "toc = timeit.default_timer()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "2.6345059871673584" | |
| ] | |
| }, | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "toc - tic" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "users_query_list = User.objects.filter(email__in=new_users_)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# users_query_list[:5]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "5" | |
| ] | |
| }, | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "simpleuser = Group.objects.get(name='simpleuser')\n", | |
| "simpleuser.id" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 22, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "user_groups_list = []\n", | |
| "# https://stackoverflow.com/a/34093544\n", | |
| "for user in users_query_list:\n", | |
| " user_groups = User.groups.through(user_id=user.pk, group_id=simpleuser.pk)\n", | |
| " user_groups_list.append(user_groups)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 23, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[<User_groups: User_groups object>,\n", | |
| " <User_groups: User_groups object>,\n", | |
| " <User_groups: User_groups object>,\n", | |
| " <User_groups: User_groups object>,\n", | |
| " <User_groups: User_groups object>]" | |
| ] | |
| }, | |
| "execution_count": 23, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "User.groups.through.objects.bulk_create(user_groups_list)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 24, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "toc = timeit.default_timer()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 25, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "3.3119399547576904" | |
| ] | |
| }, | |
| "execution_count": 25, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "toc - tic" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 26, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# UserCPF\n", | |
| "exist_usercpfs = UserCPF.objects.filter(user__email__in=emails).values_list('user__email', flat=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 27, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "alex.swartz.wertz@email.com\n", | |
| "valerie.berry.mattlin@email.com\n", | |
| "brooke.coley.roy@email.com\n", | |
| "joe.levin.collins@email.com\n", | |
| "lynn.alford.timmons@email.com\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "new_usercpfs = []\n", | |
| "for row in dffinal.head(max_value).itertuples():\n", | |
| " email = row.EMAIL\n", | |
| " cpf = row.CPF\n", | |
| " # Verifica se UserCPF ja existe\n", | |
| " if email not in exist_usercpfs:\n", | |
| " print(email)\n", | |
| " user = User.objects.get(email=email)\n", | |
| " data = dict(user=user, cpf=cpf)\n", | |
| " new_usercpfs.append(data)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 28, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# função create_usercpf(new_usercpfs)\n", | |
| "new_usercpfs2 = []\n", | |
| "for user in new_usercpfs:\n", | |
| " new_usercpfs2.append(UserCPF(user=user['user'], cpf=user['cpf']))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 29, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[<UserCPF: Alex Swartz Wertz>,\n", | |
| " <UserCPF: Valerie Berry Mattlin>,\n", | |
| " <UserCPF: Brooke Coley Roy>,\n", | |
| " <UserCPF: Joe Levin Collins>,\n", | |
| " <UserCPF: Lynn Alford Timmons>]" | |
| ] | |
| }, | |
| "execution_count": 29, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "UserCPF.objects.bulk_create(new_usercpfs2)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 30, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "toc = timeit.default_timer()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 31, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "4.112852096557617" | |
| ] | |
| }, | |
| "execution_count": 31, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "toc - tic" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 32, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Employee\n", | |
| "firm = Company.objects.get(name='Empresa')\n", | |
| "exist_employees = Employee.objects.filter(user__email__in=emails).values_list('user__email', flat=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 33, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<Company: Empresa>" | |
| ] | |
| }, | |
| "execution_count": 33, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "firm" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 34, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "new_employees = []\n", | |
| "for row in dffinal.head(max_value).itertuples():\n", | |
| " email = row.EMAIL\n", | |
| " if email not in exist_employees:\n", | |
| " user = User.objects.get(email=email)\n", | |
| " data = dict(user=user, firm=firm, department='14')\n", | |
| " new_employees.append(data)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 35, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "{'department': '14', 'firm': <Company: Empresa>, 'user': <User: alex.swartz.wertz@email.com>}\n", | |
| "{'department': '14', 'firm': <Company: Empresa>, 'user': <User: valerie.berry.mattlin@email.com>}\n", | |
| "{'department': '14', 'firm': <Company: Empresa>, 'user': <User: brooke.coley.roy@email.com>}\n", | |
| "{'department': '14', 'firm': <Company: Empresa>, 'user': <User: joe.levin.collins@email.com>}\n", | |
| "{'department': '14', 'firm': <Company: Empresa>, 'user': <User: lynn.alford.timmons@email.com>}\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# função create_employee(new_employees)\n", | |
| "new_employees2 = []\n", | |
| "for user in new_employees:\n", | |
| " print(user)\n", | |
| " new_employees2.append(Employee(user=user['user'], firm=user['firm'], department=user['department']))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 36, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[<Employee: Alex Swartz Wertz>,\n", | |
| " <Employee: Valerie Berry Mattlin>,\n", | |
| " <Employee: Brooke Coley Roy>,\n", | |
| " <Employee: Joe Levin Collins>,\n", | |
| " <Employee: Lynn Alford Timmons>]" | |
| ] | |
| }, | |
| "execution_count": 36, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Employee.objects.bulk_create(new_employees2)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 37, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Permissões\n", | |
| "def create_permissions():\n", | |
| " # Criando permissão view_manage_career_pages\n", | |
| " content_type = ContentType.objects.get_for_model(Company)\n", | |
| " permission = Permission.objects.get(\n", | |
| " content_type=content_type,\n", | |
| " codename='view_manage_career_pages'\n", | |
| " )\n", | |
| " # Habilitando permissão para simpleuser\n", | |
| " g = Group.objects.get(name='simpleuser')\n", | |
| " g.permissions.add(permission)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 38, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def create_permissions_quiz():\n", | |
| " # Criando permissão add_quiz\n", | |
| " content_type = ContentType.objects.get_for_model(Quiz)\n", | |
| " permission = Permission.objects.get(\n", | |
| " content_type=content_type,\n", | |
| " codename='add_quiz'\n", | |
| " )\n", | |
| " # Habilitando permissão para simpleuser\n", | |
| " g = Group.objects.get(name='simpleuser')\n", | |
| " g.permissions.add(permission)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 39, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "create_permissions()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 40, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "create_permissions_quiz()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 41, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "toc = timeit.default_timer()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 42, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "5.437417984008789" | |
| ] | |
| }, | |
| "execution_count": 42, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "toc - tic" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Django Shell-Plus", | |
| "language": "python", | |
| "name": "django_extensions" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 2 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython2", | |
| "version": "2.7.12" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Para transformar Query do Django em DataFrame:
df = pd.DataFrame(list(User.objects.all().values()))
Pra mostrar somente algumas colunas:
df[['id', 'email']]
Transformando todos os campos do DataFrame num jSON
df.T.apply(dict).tolist()
Renomeando colunas
df.rename(columns={'old_name': 'new_name'})
Selecionando as colunas de 10 em 10
df.iloc[:, :10]
com transposição
df.T.iloc[:10]
Novo df com algumas colunas.
new = old.filter(['A','B','D'], axis=1)
Como não inserir chaves com valor nulo no dicionário:
df.T.apply(lambda x: dict(x.dropna())).tolist()
Retornando os valores cujo tamanho da string seja maior que...
df[df['foo'].str.len() > 50]['foo']
Mostrando o tamanho de cada célula
df[df['foo'].str.len() > 50]['foo'].str.len()
Mostrando o tamanho de cada célula da coluna
df['foo_len'] = df['foo'].apply(len)
df[['foo_len', 'foo']]
Ou
df['foo'].str.len()
Suponha que você tenha Cliente e Obra.
Pegando o ID do Cliente que está em Obra e trocando pelo nome do Cliente que está no outro DataFrame.
# JSON do Cliente com IDCliente e Cliente
dict_cliente = df_cliente[['IDCliente', 'Cliente']].T.apply(dict).tolist()
[
{'IDCliente': 288, 'Cliente': 'Cliente Um'},
{'IDCliente': 1, 'Cliente': 'Cliente Dois'},
{'IDCliente': 959, 'Cliente': 'Cliente Três'},
]
Montando o dicionário que será usado como busca de cada Cliente a partir do seu ID.
_dict_cliente = {}
for item in dict_cliente:
_dict_cliente[item['IDCliente']] = item['Cliente']
_dict_cliente
{
288: 'Cliente Um',
1: 'Cliente Dois',
959: 'Cliente Três',
}
A partir desse dicionário fazemos a busca no outro DataFrame.
for row in df.itertuples():
nome_cliente = _dict_cliente.get(row.IDCliente)
print(row.IDCliente, nome_cliente)
288 Cliente Um
1 Cliente Dois
959 Cliente Três
Retornando o valor máximo agrupado por ano.
df.groupby(['Ano'])['NumeroOrcamento'].max()
df.reset_index()
Verificando data vazia:
for row in df.itertuples():
if row.DataOrcamento is pd.NaT:
print('Vazio')
else:
print(row.DataOrcamento)
Definindo vários fillna diferentes por coluna:
values = {'last_name': '', 'occupation': '', 'age': 0}
df = df.fillna(value=values)
df.head()
Se tiver problema com liblzma, faça um downgrade do Pandas para pandas==0.24.2.
https://stackoverflow.com/a/57115325
Retorna o tamanho do maior objeto de cada coluna.
dict_sizes = {}
for col in df.columns:
try:
print(f'{col} max length: {df[col].map(len).max()}\n')
dict_sizes[col] = df[col].map(len).max()
except Exception as e:
raise e
dict_sizes
dtype example
df['estoque'] =df['estoque'].fillna(0).astype(int)
Pandas Dataframe df to Django
https://www.laurivan.com/save-pandas-dataframe-as-django-model/
Produto.objects.bulk_create(
Produto(**item) for item in df.to_dict('records')
)
Definindo os tipos das colunas com dtype
dict_types_annot = {
'produto': str,
'ncm': str,
'preco': float,
'estoque': 'Int64',
}
# Define os tipos das colunas
dff = df.astype(dict_types_annot, errors='ignore')
# Troca 'nan' por None e float por None.
dff = dff.replace({'nan': None, float('nan'): None})
dff.to_dict('records')
Produto.objects.bulk_create(
Produto(**item) for item in dff.to_dict('records')
)
Intersecção de dataframes
import pandas as pd
import numpy as np
import datetime
from random import randint
df1 = pd.DataFrame({
'letters': ['A', 'B', 'C', 'D', 'E', 'J', 'K', 'M'],
'B': np.random.randint(0, 10, 8),
})
df1
df2 = pd.DataFrame({
'letters': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'U', 'Z'],
'B': np.random.randint(0, 10, 26),
})
df2
# Retorna o que tem de comum nos dois dataframes.
pd.merge(df1, df2, how='inner', on='fruits')
# Retorna o que tem de comum, considerando o df1.
pd.merge(df1, df2, how='left', on='fruits')
# Retorna o que tem de comum, considerando o df2.
pd.merge(df1, df2, how='right', on='fruits')
Código que substitui da célula [6] em diante do
separe_email.ipynb:Um detalhe é a falta de um índice único dos registros. Então, supondo o índice automático decorrente da importação do arquivo Excel como ID válido, esse ID é o usado no "join" (
merge).