rg3915/FromDict.ipynb

## escola.xlsx

      
    Raw
  

              escola.xlsx
            
          
            View raw
        
    
## FromDict.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              FromDict.ipynb
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## gen_names_xlsx.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              gen_names_xlsx.ipynb
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## gen_random_values.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              gen_random_values.ipynb
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## import_xlsx_to_django_database.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import timeit\n",
    "import xlrd\n",
    "from django.contrib.auth.models import User"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "tic = timeit.default_timer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "filename = '/tmp/example.xlsx'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel(filename)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NAME</th>\n",
       "      <th>EMAIL</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2011</th>\n",
       "      <td>Angela Brown</td>\n",
       "      <td>angela-brown@email.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2824</th>\n",
       "      <td>Angela Brown</td>\n",
       "      <td>angela-brown@email.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3599</th>\n",
       "      <td>Angela Jones</td>\n",
       "      <td>angela-jones@email.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1227</th>\n",
       "      <td>Angela Jones</td>\n",
       "      <td>angela-jones@email.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2605</th>\n",
       "      <td>Anthony Evans</td>\n",
       "      <td>anthony-evans@email.com</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               NAME                    EMAIL\n",
       "2011   Angela Brown   angela-brown@email.com\n",
       "2824   Angela Brown   angela-brown@email.com\n",
       "3599   Angela Jones   angela-jones@email.com\n",
       "1227   Angela Jones   angela-jones@email.com\n",
       "2605  Anthony Evans  anthony-evans@email.com"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "email = df['EMAIL']\n",
    "# Retornando os itens duplicados\n",
    "df[email.isin(email[email.duplicated()])].sort_values(by=['EMAIL']).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>NAME</th>\n",
       "      <th>EMAIL</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4957</th>\n",
       "      <td>4995</td>\n",
       "      <td>Helen Mcallister</td>\n",
       "      <td>helen-mcallister@email.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4958</th>\n",
       "      <td>4996</td>\n",
       "      <td>Scott Hall</td>\n",
       "      <td>scott-hall@email.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4959</th>\n",
       "      <td>4997</td>\n",
       "      <td>Dawn Dowling</td>\n",
       "      <td>dawn-dowling@email.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4960</th>\n",
       "      <td>4998</td>\n",
       "      <td>John Campbell</td>\n",
       "      <td>john-campbell@email.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4961</th>\n",
       "      <td>4999</td>\n",
       "      <td>Barbara Alldredge</td>\n",
       "      <td>barbara-alldredge@email.com</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      index               NAME                        EMAIL\n",
       "4957   4995   Helen Mcallister   helen-mcallister@email.com\n",
       "4958   4996         Scott Hall         scott-hall@email.com\n",
       "4959   4997       Dawn Dowling       dawn-dowling@email.com\n",
       "4960   4998      John Campbell      john-campbell@email.com\n",
       "4961   4999  Barbara Alldredge  barbara-alldredge@email.com"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Removendo os itens duplicados\n",
    "dffinal = df.drop_duplicates('EMAIL').reset_index()\n",
    "dffinal.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_user(new_users):\n",
    "    # Separa full_name em first_name e last_name\n",
    "    users = []\n",
    "    for new_user in new_users:\n",
    "        full_name = new_user['full_name']\n",
    "        email = new_user['email']\n",
    "        first_name = full_name.split()[0]\n",
    "        last_name = full_name.split()[1:]\n",
    "        last_name = ' '.join(last_name)\n",
    "        username = email\n",
    "        user = User(\n",
    "            first_name=first_name,\n",
    "            last_name=last_name,\n",
    "            email=email,\n",
    "            username=username\n",
    "        )\n",
    "        users.append(user)\n",
    "    return users"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def import_users(new_users):\n",
    "    User.objects.bulk_create(new_users)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_users = []\n",
    "emails = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "emails = dffinal['EMAIL'].values.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "exist_users = User.objects.filter(email__in=emails).values_list('email', flat=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Manuel Marshall manuel-marshall@email.com\n",
      "Joy Flores joy-flores@email.com\n",
      "Nicholas Waits nicholas-waits@email.com\n",
      "Laura Cross laura-cross@email.com\n",
      "Felipe Garland felipe-garland@email.com\n"
     ]
    }
   ],
   "source": [
    "for row in dffinal.head().itertuples():\n",
    "    print(row.NAME, row.EMAIL)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "for row in dffinal.itertuples():\n",
    "    full_name = row.NAME\n",
    "    email = row.EMAIL\n",
    "    if email not in exist_users:\n",
    "        data = dict(full_name=full_name, email=email)\n",
    "        new_users.append(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_users_ = create_user(new_users)\n",
    "import_users(new_users_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "17 existentes\n",
      "4945 novos\n"
     ]
    }
   ],
   "source": [
    "print(len(exist_users), 'existentes')\n",
    "print(len(new_users), 'novos')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "toc = timeit.default_timer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2.1476561470044544"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "toc - tic"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Django Shell-Plus",
   "language": "python",
   "name": "django_extensions"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}

## import_xlsx_to_django_database2.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              import_xlsx_to_django_database2.ipynb
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## OrcamentosNumOrcNovo.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              OrcamentosNumOrcNovo.ipynb
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## pandas.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              pandas.ipynb
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## remove_duplicated.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              remove_duplicated.ipynb
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## separe_email.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              separe_email.ipynb
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import timeit\n",
	"import xlrd\n",
	"from django.contrib.auth.models import User"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"tic = timeit.default_timer()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"filename = '/tmp/example.xlsx'"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"df = pd.read_excel(filename)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>NAME</th>\n",
	" <th>EMAIL</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>2011</th>\n",
	" <td>Angela Brown</td>\n",
	" <td>angela-brown@email.com</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2824</th>\n",
	" <td>Angela Brown</td>\n",
	" <td>angela-brown@email.com</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3599</th>\n",
	" <td>Angela Jones</td>\n",
	" <td>angela-jones@email.com</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1227</th>\n",
	" <td>Angela Jones</td>\n",
	" <td>angela-jones@email.com</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2605</th>\n",
	" <td>Anthony Evans</td>\n",
	" <td>anthony-evans@email.com</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" NAME EMAIL\n",
	"2011 Angela Brown angela-brown@email.com\n",
	"2824 Angela Brown angela-brown@email.com\n",
	"3599 Angela Jones angela-jones@email.com\n",
	"1227 Angela Jones angela-jones@email.com\n",
	"2605 Anthony Evans anthony-evans@email.com"
	]
	},
	"execution_count": 20,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"email = df['EMAIL']\n",
	"# Retornando os itens duplicados\n",
	"df[email.isin(email[email.duplicated()])].sort_values(by=['EMAIL']).head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>index</th>\n",
	" <th>NAME</th>\n",
	" <th>EMAIL</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>4957</th>\n",
	" <td>4995</td>\n",
	" <td>Helen Mcallister</td>\n",
	" <td>helen-mcallister@email.com</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4958</th>\n",
	" <td>4996</td>\n",
	" <td>Scott Hall</td>\n",
	" <td>scott-hall@email.com</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4959</th>\n",
	" <td>4997</td>\n",
	" <td>Dawn Dowling</td>\n",
	" <td>dawn-dowling@email.com</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4960</th>\n",
	" <td>4998</td>\n",
	" <td>John Campbell</td>\n",
	" <td>john-campbell@email.com</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4961</th>\n",
	" <td>4999</td>\n",
	" <td>Barbara Alldredge</td>\n",
	" <td>barbara-alldredge@email.com</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" index NAME EMAIL\n",
	"4957 4995 Helen Mcallister helen-mcallister@email.com\n",
	"4958 4996 Scott Hall scott-hall@email.com\n",
	"4959 4997 Dawn Dowling dawn-dowling@email.com\n",
	"4960 4998 John Campbell john-campbell@email.com\n",
	"4961 4999 Barbara Alldredge barbara-alldredge@email.com"
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Removendo os itens duplicados\n",
	"dffinal = df.drop_duplicates('EMAIL').reset_index()\n",
	"dffinal.tail()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [],
	"source": [
	"def create_user(new_users):\n",
	" # Separa full_name em first_name e last_name\n",
	" users = []\n",
	" for new_user in new_users:\n",
	" full_name = new_user['full_name']\n",
	" email = new_user['email']\n",
	" first_name = full_name.split()[0]\n",
	" last_name = full_name.split()[1:]\n",
	" last_name = ' '.join(last_name)\n",
	" username = email\n",
	" user = User(\n",
	" first_name=first_name,\n",
	" last_name=last_name,\n",
	" email=email,\n",
	" username=username\n",
	" )\n",
	" users.append(user)\n",
	" return users"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [],
	"source": [
	"def import_users(new_users):\n",
	" User.objects.bulk_create(new_users)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [],
	"source": [
	"new_users = []\n",
	"emails = []"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {},
	"outputs": [],
	"source": [
	"emails = dffinal['EMAIL'].values.tolist()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 29,
	"metadata": {},
	"outputs": [],
	"source": [
	"exist_users = User.objects.filter(email__in=emails).values_list('email', flat=True)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Manuel Marshall manuel-marshall@email.com\n",
	"Joy Flores joy-flores@email.com\n",
	"Nicholas Waits nicholas-waits@email.com\n",
	"Laura Cross laura-cross@email.com\n",
	"Felipe Garland felipe-garland@email.com\n"
	]
	}
	],
	"source": [
	"for row in dffinal.head().itertuples():\n",
	" print(row.NAME, row.EMAIL)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [],
	"source": [
	"for row in dffinal.itertuples():\n",
	" full_name = row.NAME\n",
	" email = row.EMAIL\n",
	" if email not in exist_users:\n",
	" data = dict(full_name=full_name, email=email)\n",
	" new_users.append(data)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [],
	"source": [
	"new_users_ = create_user(new_users)\n",
	"import_users(new_users_)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"17 existentes\n",
	"4945 novos\n"
	]
	}
	],
	"source": [
	"print(len(exist_users), 'existentes')\n",
	"print(len(new_users), 'novos')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [],
	"source": [
	"toc = timeit.default_timer()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"2.1476561470044544"
	]
	},
	"execution_count": 18,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"toc - tic"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Django Shell-Plus",
	"language": "python",
	"name": "django_extensions"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}
No results found