-
-
Save rg3915/5fb3a2e7338115bc92e82b7a9a2b372b to your computer and use it in GitHub Desktop.
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df = pd.read_excel('Orcamentos.xlsx')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df = df[['IDOrcamento', 'NumOrc', 'NumOrc2', 'Opcao', 'Ano']]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>IDOrcamento</th>\n", | |
| " <th>NumOrc</th>\n", | |
| " <th>NumOrc2</th>\n", | |
| " <th>Opcao</th>\n", | |
| " <th>Ano</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>2938</td>\n", | |
| " <td>1390</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>A</td>\n", | |
| " <td>2016-01-04 10:01:14</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>2940</td>\n", | |
| " <td>1391</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>A</td>\n", | |
| " <td>2016-01-04 14:27:44</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>2941</td>\n", | |
| " <td>1392</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>A</td>\n", | |
| " <td>2016-01-04 14:47:23</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>2942</td>\n", | |
| " <td>1393</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>A</td>\n", | |
| " <td>2016-01-04 15:03:33</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>2943</td>\n", | |
| " <td>1394</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>A</td>\n", | |
| " <td>2016-01-04 15:18:16</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " IDOrcamento NumOrc NumOrc2 Opcao Ano\n", | |
| "0 2938 1390 NaN A 2016-01-04 10:01:14\n", | |
| "1 2940 1391 NaN A 2016-01-04 14:27:44\n", | |
| "2 2941 1392 NaN A 2016-01-04 14:47:23\n", | |
| "3 2942 1393 NaN A 2016-01-04 15:03:33\n", | |
| "4 2943 1394 NaN A 2016-01-04 15:18:16" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "datetime.datetime(2016, 1, 4, 10, 1, 14)" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df['Ano'][0].to_pydatetime()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df['NOrc'] = df['NumOrc'].astype(str) + '.' + df['Opcao'] + '/' + df['Ano'].dt.year.astype(str)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>IDOrcamento</th>\n", | |
| " <th>NumOrc</th>\n", | |
| " <th>NumOrc2</th>\n", | |
| " <th>Opcao</th>\n", | |
| " <th>Ano</th>\n", | |
| " <th>NOrc</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>70</th>\n", | |
| " <td>3012</td>\n", | |
| " <td>1450</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>A</td>\n", | |
| " <td>2016-01-22 08:20:33</td>\n", | |
| " <td>1450.A/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>71</th>\n", | |
| " <td>3013</td>\n", | |
| " <td>1451</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>A</td>\n", | |
| " <td>2016-01-22 11:19:26</td>\n", | |
| " <td>1451.A/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>72</th>\n", | |
| " <td>3014</td>\n", | |
| " <td>1305</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>B</td>\n", | |
| " <td>2015-11-26 15:00:40</td>\n", | |
| " <td>1305.B/2015</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>73</th>\n", | |
| " <td>3015</td>\n", | |
| " <td>1452</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>A</td>\n", | |
| " <td>2016-01-26 10:11:53</td>\n", | |
| " <td>1452.A/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>74</th>\n", | |
| " <td>3016</td>\n", | |
| " <td>1394</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>B</td>\n", | |
| " <td>2016-04-01 15:18:16</td>\n", | |
| " <td>1394.B/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>75</th>\n", | |
| " <td>3017</td>\n", | |
| " <td>1453</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>A</td>\n", | |
| " <td>2016-01-26 12:26:44</td>\n", | |
| " <td>1453.A/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>76</th>\n", | |
| " <td>3018</td>\n", | |
| " <td>1454</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>A</td>\n", | |
| " <td>2016-01-26 15:54:54</td>\n", | |
| " <td>1454.A/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>77</th>\n", | |
| " <td>3019</td>\n", | |
| " <td>1454</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>B</td>\n", | |
| " <td>2016-01-26 15:54:54</td>\n", | |
| " <td>1454.B/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>78</th>\n", | |
| " <td>3020</td>\n", | |
| " <td>1454</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>C</td>\n", | |
| " <td>2016-01-26 15:54:54</td>\n", | |
| " <td>1454.C/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>79</th>\n", | |
| " <td>3021</td>\n", | |
| " <td>1454</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>D</td>\n", | |
| " <td>2016-01-26 15:54:54</td>\n", | |
| " <td>1454.D/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>80</th>\n", | |
| " <td>3022</td>\n", | |
| " <td>1454</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>E</td>\n", | |
| " <td>2016-01-26 15:54:54</td>\n", | |
| " <td>1454.E/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>81</th>\n", | |
| " <td>3023</td>\n", | |
| " <td>1454</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>F</td>\n", | |
| " <td>2016-01-26 15:54:54</td>\n", | |
| " <td>1454.F/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>82</th>\n", | |
| " <td>3024</td>\n", | |
| " <td>1305</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>C</td>\n", | |
| " <td>2015-11-26 15:00:40</td>\n", | |
| " <td>1305.C/2015</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " IDOrcamento NumOrc NumOrc2 Opcao Ano NOrc\n", | |
| "70 3012 1450 NaN A 2016-01-22 08:20:33 1450.A/2016\n", | |
| "71 3013 1451 NaN A 2016-01-22 11:19:26 1451.A/2016\n", | |
| "72 3014 1305 NaN B 2015-11-26 15:00:40 1305.B/2015\n", | |
| "73 3015 1452 NaN A 2016-01-26 10:11:53 1452.A/2016\n", | |
| "74 3016 1394 NaN B 2016-04-01 15:18:16 1394.B/2016\n", | |
| "75 3017 1453 NaN A 2016-01-26 12:26:44 1453.A/2016\n", | |
| "76 3018 1454 NaN A 2016-01-26 15:54:54 1454.A/2016\n", | |
| "77 3019 1454 NaN B 2016-01-26 15:54:54 1454.B/2016\n", | |
| "78 3020 1454 NaN C 2016-01-26 15:54:54 1454.C/2016\n", | |
| "79 3021 1454 NaN D 2016-01-26 15:54:54 1454.D/2016\n", | |
| "80 3022 1454 NaN E 2016-01-26 15:54:54 1454.E/2016\n", | |
| "81 3023 1454 NaN F 2016-01-26 15:54:54 1454.F/2016\n", | |
| "82 3024 1305 NaN C 2015-11-26 15:00:40 1305.C/2015" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.loc[70:82]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>IDOrcamento</th>\n", | |
| " <th>NumOrc</th>\n", | |
| " <th>NumOrc2</th>\n", | |
| " <th>Opcao</th>\n", | |
| " <th>Ano</th>\n", | |
| " <th>NOrc</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>3099</th>\n", | |
| " <td>6132</td>\n", | |
| " <td>1429</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>A</td>\n", | |
| " <td>2018-12-21 11:37:06</td>\n", | |
| " <td>1429.A/2018</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " IDOrcamento NumOrc NumOrc2 Opcao Ano NOrc\n", | |
| "3099 6132 1429 NaN A 2018-12-21 11:37:06 1429.A/2018" | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df[df['NOrc'].str.contains('1429.A/2018')]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "6132" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df[df['NOrc'].str.contains('1429.A/2018')]['IDOrcamento'].iloc[0]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "dff = pd.DataFrame(columns=['IDOrcamento', 'NumOrc2', 'NOrc'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "2016\n", | |
| "2016\n", | |
| "2016\n", | |
| "2016\n", | |
| "2016\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "for row in df.head().itertuples():\n", | |
| " print(str(row.Ano.year))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "for row in df.itertuples():\n", | |
| " try:\n", | |
| " dff = dff.append({'IDOrcamento': row.IDOrcamento, 'NumOrc2': df[df['NOrc'].str.contains(str(row.NumOrc)+'.A/'+str(row.Ano.year))]['IDOrcamento'].iloc[0], 'NOrc': row.NOrc}, ignore_index=True)\n", | |
| " # print(row.IDOrcamento, df[df['NOrc'].str.contains(str(row.NumOrc)+'.A')]['IDOrcamento'].iloc[0]) # , row.NOrc\n", | |
| " except IndexError:\n", | |
| " pass" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>IDOrcamento</th>\n", | |
| " <th>NumOrc2</th>\n", | |
| " <th>NOrc</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>3059</th>\n", | |
| " <td>6128</td>\n", | |
| " <td>6128</td>\n", | |
| " <td>1426.A/2018</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3060</th>\n", | |
| " <td>6129</td>\n", | |
| " <td>6129</td>\n", | |
| " <td>1427.A/2018</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3061</th>\n", | |
| " <td>6130</td>\n", | |
| " <td>6130</td>\n", | |
| " <td>1428.A/2018</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3062</th>\n", | |
| " <td>6131</td>\n", | |
| " <td>6090</td>\n", | |
| " <td>1403.B/2018</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3063</th>\n", | |
| " <td>6132</td>\n", | |
| " <td>6132</td>\n", | |
| " <td>1429.A/2018</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " IDOrcamento NumOrc2 NOrc\n", | |
| "3059 6128 6128 1426.A/2018\n", | |
| "3060 6129 6129 1427.A/2018\n", | |
| "3061 6130 6130 1428.A/2018\n", | |
| "3062 6131 6090 1403.B/2018\n", | |
| "3063 6132 6132 1429.A/2018" | |
| ] | |
| }, | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dff.tail()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>IDOrcamento</th>\n", | |
| " <th>NumOrc2</th>\n", | |
| " <th>NOrc</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>70</th>\n", | |
| " <td>3016</td>\n", | |
| " <td>2943</td>\n", | |
| " <td>1394.B/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>71</th>\n", | |
| " <td>3017</td>\n", | |
| " <td>3017</td>\n", | |
| " <td>1453.A/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>72</th>\n", | |
| " <td>3018</td>\n", | |
| " <td>3018</td>\n", | |
| " <td>1454.A/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>73</th>\n", | |
| " <td>3019</td>\n", | |
| " <td>3018</td>\n", | |
| " <td>1454.B/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>74</th>\n", | |
| " <td>3020</td>\n", | |
| " <td>3018</td>\n", | |
| " <td>1454.C/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>75</th>\n", | |
| " <td>3021</td>\n", | |
| " <td>3018</td>\n", | |
| " <td>1454.D/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>76</th>\n", | |
| " <td>3022</td>\n", | |
| " <td>3018</td>\n", | |
| " <td>1454.E/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>77</th>\n", | |
| " <td>3023</td>\n", | |
| " <td>3018</td>\n", | |
| " <td>1454.F/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>78</th>\n", | |
| " <td>3025</td>\n", | |
| " <td>3025</td>\n", | |
| " <td>1455.A/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>79</th>\n", | |
| " <td>3026</td>\n", | |
| " <td>2943</td>\n", | |
| " <td>1394.C/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>80</th>\n", | |
| " <td>3027</td>\n", | |
| " <td>3027</td>\n", | |
| " <td>1456.A/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>81</th>\n", | |
| " <td>3028</td>\n", | |
| " <td>3028</td>\n", | |
| " <td>1457.A/2016</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>82</th>\n", | |
| " <td>3029</td>\n", | |
| " <td>3029</td>\n", | |
| " <td>1458.A/2016</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " IDOrcamento NumOrc2 NOrc\n", | |
| "70 3016 2943 1394.B/2016\n", | |
| "71 3017 3017 1453.A/2016\n", | |
| "72 3018 3018 1454.A/2016\n", | |
| "73 3019 3018 1454.B/2016\n", | |
| "74 3020 3018 1454.C/2016\n", | |
| "75 3021 3018 1454.D/2016\n", | |
| "76 3022 3018 1454.E/2016\n", | |
| "77 3023 3018 1454.F/2016\n", | |
| "78 3025 3025 1455.A/2016\n", | |
| "79 3026 2943 1394.C/2016\n", | |
| "80 3027 3027 1456.A/2016\n", | |
| "81 3028 3028 1457.A/2016\n", | |
| "82 3029 3029 1458.A/2016" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dff.loc[70:82]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "dff.to_excel('OrcamentosNumOrcNovo.xlsx')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Django Shell-Plus", | |
| "language": "python", | |
| "name": "django_extensions" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.0" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Para transformar Query do Django em DataFrame:
df = pd.DataFrame(list(User.objects.all().values()))
Pra mostrar somente algumas colunas:
df[['id', 'email']]
Transformando todos os campos do DataFrame num jSON
df.T.apply(dict).tolist()
Renomeando colunas
df.rename(columns={'old_name': 'new_name'})
Selecionando as colunas de 10 em 10
df.iloc[:, :10]
com transposição
df.T.iloc[:10]
Novo df com algumas colunas.
new = old.filter(['A','B','D'], axis=1)
Como não inserir chaves com valor nulo no dicionário:
df.T.apply(lambda x: dict(x.dropna())).tolist()
Retornando os valores cujo tamanho da string seja maior que...
df[df['foo'].str.len() > 50]['foo']
Mostrando o tamanho de cada célula
df[df['foo'].str.len() > 50]['foo'].str.len()
Mostrando o tamanho de cada célula da coluna
df['foo_len'] = df['foo'].apply(len)
df[['foo_len', 'foo']]
Ou
df['foo'].str.len()
Suponha que você tenha Cliente e Obra.
Pegando o ID do Cliente que está em Obra e trocando pelo nome do Cliente que está no outro DataFrame.
# JSON do Cliente com IDCliente e Cliente
dict_cliente = df_cliente[['IDCliente', 'Cliente']].T.apply(dict).tolist()
[
{'IDCliente': 288, 'Cliente': 'Cliente Um'},
{'IDCliente': 1, 'Cliente': 'Cliente Dois'},
{'IDCliente': 959, 'Cliente': 'Cliente Três'},
]
Montando o dicionário que será usado como busca de cada Cliente a partir do seu ID.
_dict_cliente = {}
for item in dict_cliente:
_dict_cliente[item['IDCliente']] = item['Cliente']
_dict_cliente
{
288: 'Cliente Um',
1: 'Cliente Dois',
959: 'Cliente Três',
}
A partir desse dicionário fazemos a busca no outro DataFrame.
for row in df.itertuples():
nome_cliente = _dict_cliente.get(row.IDCliente)
print(row.IDCliente, nome_cliente)
288 Cliente Um
1 Cliente Dois
959 Cliente Três
Retornando o valor máximo agrupado por ano.
df.groupby(['Ano'])['NumeroOrcamento'].max()
df.reset_index()
Verificando data vazia:
for row in df.itertuples():
if row.DataOrcamento is pd.NaT:
print('Vazio')
else:
print(row.DataOrcamento)
Definindo vários fillna diferentes por coluna:
values = {'last_name': '', 'occupation': '', 'age': 0}
df = df.fillna(value=values)
df.head()
Se tiver problema com liblzma, faça um downgrade do Pandas para pandas==0.24.2.
https://stackoverflow.com/a/57115325
Retorna o tamanho do maior objeto de cada coluna.
dict_sizes = {}
for col in df.columns:
try:
print(f'{col} max length: {df[col].map(len).max()}\n')
dict_sizes[col] = df[col].map(len).max()
except Exception as e:
raise e
dict_sizes
dtype example
df['estoque'] =df['estoque'].fillna(0).astype(int)
Pandas Dataframe df to Django
https://www.laurivan.com/save-pandas-dataframe-as-django-model/
Produto.objects.bulk_create(
Produto(**item) for item in df.to_dict('records')
)
Definindo os tipos das colunas com dtype
dict_types_annot = {
'produto': str,
'ncm': str,
'preco': float,
'estoque': 'Int64',
}
# Define os tipos das colunas
dff = df.astype(dict_types_annot, errors='ignore')
# Troca 'nan' por None e float por None.
dff = dff.replace({'nan': None, float('nan'): None})
dff.to_dict('records')
Produto.objects.bulk_create(
Produto(**item) for item in dff.to_dict('records')
)
Intersecção de dataframes
import pandas as pd
import numpy as np
import datetime
from random import randint
df1 = pd.DataFrame({
'letters': ['A', 'B', 'C', 'D', 'E', 'J', 'K', 'M'],
'B': np.random.randint(0, 10, 8),
})
df1
df2 = pd.DataFrame({
'letters': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'U', 'Z'],
'B': np.random.randint(0, 10, 26),
})
df2
# Retorna o que tem de comum nos dois dataframes.
pd.merge(df1, df2, how='inner', on='fruits')
# Retorna o que tem de comum, considerando o df1.
pd.merge(df1, df2, how='left', on='fruits')
# Retorna o que tem de comum, considerando o df2.
pd.merge(df1, df2, how='right', on='fruits')
Código que substitui da célula [6] em diante do
separe_email.ipynb:Um detalhe é a falta de um índice único dos registros. Então, supondo o índice automático decorrente da importação do arquivo Excel como ID válido, esse ID é o usado no "join" (
merge).