Created
July 16, 2020 14:03
-
-
Save PatrickRWright/4ed5d4e5b5aed03b7a1aa5b593dd9b64 to your computer and use it in GitHub Desktop.
Created on Skills Network Labs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Benchmarking base R csv readers with `readr` package functions" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "library(readr)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "6.58102881908417" | |
| ], | |
| "text/latex": [ | |
| "6.58102881908417" | |
| ], | |
| "text/markdown": [ | |
| "6.58102881908417" | |
| ], | |
| "text/plain": [ | |
| "[1] 6.581029" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<style>\n", | |
| ".list-inline {list-style: none; margin:0; padding: 0}\n", | |
| ".list-inline>li {display: inline-block}\n", | |
| ".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n", | |
| "</style>\n", | |
| "<ol class=list-inline><li>8.65839695930481</li><li>6.79847025871277</li><li>6.66255569458008</li><li>7.44221377372742</li><li>6.19600749015808</li><li>6.49950194358826</li><li>6.40926671028137</li><li>5.71379518508911</li><li>6.74125933647156</li><li>6.39990758895874</li></ol>\n" | |
| ], | |
| "text/latex": [ | |
| "\\begin{enumerate*}\n", | |
| "\\item 8.65839695930481\n", | |
| "\\item 6.79847025871277\n", | |
| "\\item 6.66255569458008\n", | |
| "\\item 7.44221377372742\n", | |
| "\\item 6.19600749015808\n", | |
| "\\item 6.49950194358826\n", | |
| "\\item 6.40926671028137\n", | |
| "\\item 5.71379518508911\n", | |
| "\\item 6.74125933647156\n", | |
| "\\item 6.39990758895874\n", | |
| "\\end{enumerate*}\n" | |
| ], | |
| "text/markdown": [ | |
| "1. 8.65839695930481\n", | |
| "2. 6.79847025871277\n", | |
| "3. 6.66255569458008\n", | |
| "4. 7.44221377372742\n", | |
| "5. 6.19600749015808\n", | |
| "6. 6.49950194358826\n", | |
| "7. 6.40926671028137\n", | |
| "8. 5.71379518508911\n", | |
| "9. 6.74125933647156\n", | |
| "10. 6.39990758895874\n", | |
| "\n", | |
| "\n" | |
| ], | |
| "text/plain": [ | |
| " [1] 8.658397 6.798470 6.662556 7.442214 6.196007 6.499502 6.409267 5.713795\n", | |
| " [9] 6.741259 6.399908" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "link <- \"https://raw.githubusercontent.com/PatrickRWright/share/master/sample.csv\"\n", | |
| "\n", | |
| "# base R read.csv\n", | |
| "times <- c()\n", | |
| "for (i in 1:10) {\n", | |
| " t1 <- Sys.time()\n", | |
| " d_base_csv <- read.csv(link)\n", | |
| " t2 <- Sys.time()\n", | |
| " times <- c(times, (t2 - t1))\n", | |
| "}\n", | |
| "median(times)\n", | |
| "times" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "6.40133798122406" | |
| ], | |
| "text/latex": [ | |
| "6.40133798122406" | |
| ], | |
| "text/markdown": [ | |
| "6.40133798122406" | |
| ], | |
| "text/plain": [ | |
| "[1] 6.401338" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<style>\n", | |
| ".list-inline {list-style: none; margin:0; padding: 0}\n", | |
| ".list-inline>li {display: inline-block}\n", | |
| ".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n", | |
| "</style>\n", | |
| "<ol class=list-inline><li>6.60946822166443</li><li>7.10293197631836</li><li>6.49969267845154</li><li>7.35772824287415</li><li>6.42883324623108</li><li>6.37384271621704</li><li>5.78275942802429</li><li>6.21433424949646</li><li>6.22111487388611</li><li>6.04599523544312</li></ol>\n" | |
| ], | |
| "text/latex": [ | |
| "\\begin{enumerate*}\n", | |
| "\\item 6.60946822166443\n", | |
| "\\item 7.10293197631836\n", | |
| "\\item 6.49969267845154\n", | |
| "\\item 7.35772824287415\n", | |
| "\\item 6.42883324623108\n", | |
| "\\item 6.37384271621704\n", | |
| "\\item 5.78275942802429\n", | |
| "\\item 6.21433424949646\n", | |
| "\\item 6.22111487388611\n", | |
| "\\item 6.04599523544312\n", | |
| "\\end{enumerate*}\n" | |
| ], | |
| "text/markdown": [ | |
| "1. 6.60946822166443\n", | |
| "2. 7.10293197631836\n", | |
| "3. 6.49969267845154\n", | |
| "4. 7.35772824287415\n", | |
| "5. 6.42883324623108\n", | |
| "6. 6.37384271621704\n", | |
| "7. 5.78275942802429\n", | |
| "8. 6.21433424949646\n", | |
| "9. 6.22111487388611\n", | |
| "10. 6.04599523544312\n", | |
| "\n", | |
| "\n" | |
| ], | |
| "text/plain": [ | |
| " [1] 6.609468 7.102932 6.499693 7.357728 6.428833 6.373843 5.782759 6.214334\n", | |
| " [9] 6.221115 6.045995" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# base R read.table\n", | |
| "times <- c()\n", | |
| "for (i in 1:10) {\n", | |
| " t1 <- Sys.time()\n", | |
| " d_base_table <- read.table(link, sep = \",\", header = TRUE)\n", | |
| " t2 <- Sys.time()\n", | |
| " times <- c(times, (t2 - t1))\n", | |
| "}\n", | |
| "median(times)\n", | |
| "times" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "4.67636919021606" | |
| ], | |
| "text/latex": [ | |
| "4.67636919021606" | |
| ], | |
| "text/markdown": [ | |
| "4.67636919021606" | |
| ], | |
| "text/plain": [ | |
| "[1] 4.676369" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<style>\n", | |
| ".list-inline {list-style: none; margin:0; padding: 0}\n", | |
| ".list-inline>li {display: inline-block}\n", | |
| ".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n", | |
| "</style>\n", | |
| "<ol class=list-inline><li>7.23066449165344</li><li>4.62348866462708</li><li>4.85642409324646</li><li>4.76284122467041</li><li>4.61672377586365</li><li>4.72148060798645</li><li>4.56368541717529</li><li>4.68296408653259</li><li>4.54851007461548</li><li>4.66977429389954</li></ol>\n" | |
| ], | |
| "text/latex": [ | |
| "\\begin{enumerate*}\n", | |
| "\\item 7.23066449165344\n", | |
| "\\item 4.62348866462708\n", | |
| "\\item 4.85642409324646\n", | |
| "\\item 4.76284122467041\n", | |
| "\\item 4.61672377586365\n", | |
| "\\item 4.72148060798645\n", | |
| "\\item 4.56368541717529\n", | |
| "\\item 4.68296408653259\n", | |
| "\\item 4.54851007461548\n", | |
| "\\item 4.66977429389954\n", | |
| "\\end{enumerate*}\n" | |
| ], | |
| "text/markdown": [ | |
| "1. 7.23066449165344\n", | |
| "2. 4.62348866462708\n", | |
| "3. 4.85642409324646\n", | |
| "4. 4.76284122467041\n", | |
| "5. 4.61672377586365\n", | |
| "6. 4.72148060798645\n", | |
| "7. 4.56368541717529\n", | |
| "8. 4.68296408653259\n", | |
| "9. 4.54851007461548\n", | |
| "10. 4.66977429389954\n", | |
| "\n", | |
| "\n" | |
| ], | |
| "text/plain": [ | |
| " [1] 7.230664 4.623489 4.856424 4.762841 4.616724 4.721481 4.563685 4.682964\n", | |
| " [9] 4.548510 4.669774" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# readr package read_csv\n", | |
| "times <- c()\n", | |
| "for (i in 1:10) {\n", | |
| " t1 <- Sys.time()\n", | |
| " d_readr_csv <- suppressMessages(read_csv(link))\n", | |
| " t2 <- Sys.time()\n", | |
| " times <- c(times, (t2 - t1))\n", | |
| "}\n", | |
| "median(times)\n", | |
| "times" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "4.60905647277832" | |
| ], | |
| "text/latex": [ | |
| "4.60905647277832" | |
| ], | |
| "text/markdown": [ | |
| "4.60905647277832" | |
| ], | |
| "text/plain": [ | |
| "[1] 4.609056" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<style>\n", | |
| ".list-inline {list-style: none; margin:0; padding: 0}\n", | |
| ".list-inline>li {display: inline-block}\n", | |
| ".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n", | |
| "</style>\n", | |
| "<ol class=list-inline><li>4.49884939193726</li><li>4.90030837059021</li><li>4.82013702392578</li><li>4.90754318237305</li><li>4.53757643699646</li><li>4.59374642372131</li><li>4.62436652183533</li><li>4.73889517784119</li><li>4.51502418518066</li><li>4.5224289894104</li></ol>\n" | |
| ], | |
| "text/latex": [ | |
| "\\begin{enumerate*}\n", | |
| "\\item 4.49884939193726\n", | |
| "\\item 4.90030837059021\n", | |
| "\\item 4.82013702392578\n", | |
| "\\item 4.90754318237305\n", | |
| "\\item 4.53757643699646\n", | |
| "\\item 4.59374642372131\n", | |
| "\\item 4.62436652183533\n", | |
| "\\item 4.73889517784119\n", | |
| "\\item 4.51502418518066\n", | |
| "\\item 4.5224289894104\n", | |
| "\\end{enumerate*}\n" | |
| ], | |
| "text/markdown": [ | |
| "1. 4.49884939193726\n", | |
| "2. 4.90030837059021\n", | |
| "3. 4.82013702392578\n", | |
| "4. 4.90754318237305\n", | |
| "5. 4.53757643699646\n", | |
| "6. 4.59374642372131\n", | |
| "7. 4.62436652183533\n", | |
| "8. 4.73889517784119\n", | |
| "9. 4.51502418518066\n", | |
| "10. 4.5224289894104\n", | |
| "\n", | |
| "\n" | |
| ], | |
| "text/plain": [ | |
| " [1] 4.498849 4.900308 4.820137 4.907543 4.537576 4.593746 4.624367 4.738895\n", | |
| " [9] 4.515024 4.522429" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# readr package read_delim\n", | |
| "times <- c()\n", | |
| "for (i in 1:10) {\n", | |
| " t1 <- Sys.time()\n", | |
| " d_readr_delim <- suppressMessages(read_delim(link, delim = \",\"))\n", | |
| " t2 <- Sys.time()\n", | |
| " times <- c(times, (t2 - t1))\n", | |
| "}\n", | |
| "median(times)\n", | |
| "times" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<style>\n", | |
| ".list-inline {list-style: none; margin:0; padding: 0}\n", | |
| ".list-inline>li {display: inline-block}\n", | |
| ".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n", | |
| "</style>\n", | |
| "<ol class=list-inline><li>1000000</li><li>4</li></ol>\n" | |
| ], | |
| "text/latex": [ | |
| "\\begin{enumerate*}\n", | |
| "\\item 1000000\n", | |
| "\\item 4\n", | |
| "\\end{enumerate*}\n" | |
| ], | |
| "text/markdown": [ | |
| "1. 1000000\n", | |
| "2. 4\n", | |
| "\n", | |
| "\n" | |
| ], | |
| "text/plain": [ | |
| "[1] 1000000 4" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<style>\n", | |
| ".list-inline {list-style: none; margin:0; padding: 0}\n", | |
| ".list-inline>li {display: inline-block}\n", | |
| ".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n", | |
| "</style>\n", | |
| "<ol class=list-inline><li>1000000</li><li>4</li></ol>\n" | |
| ], | |
| "text/latex": [ | |
| "\\begin{enumerate*}\n", | |
| "\\item 1000000\n", | |
| "\\item 4\n", | |
| "\\end{enumerate*}\n" | |
| ], | |
| "text/markdown": [ | |
| "1. 1000000\n", | |
| "2. 4\n", | |
| "\n", | |
| "\n" | |
| ], | |
| "text/plain": [ | |
| "[1] 1000000 4" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<style>\n", | |
| ".list-inline {list-style: none; margin:0; padding: 0}\n", | |
| ".list-inline>li {display: inline-block}\n", | |
| ".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n", | |
| "</style>\n", | |
| "<ol class=list-inline><li>1000000</li><li>4</li></ol>\n" | |
| ], | |
| "text/latex": [ | |
| "\\begin{enumerate*}\n", | |
| "\\item 1000000\n", | |
| "\\item 4\n", | |
| "\\end{enumerate*}\n" | |
| ], | |
| "text/markdown": [ | |
| "1. 1000000\n", | |
| "2. 4\n", | |
| "\n", | |
| "\n" | |
| ], | |
| "text/plain": [ | |
| "[1] 1000000 4" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<style>\n", | |
| ".list-inline {list-style: none; margin:0; padding: 0}\n", | |
| ".list-inline>li {display: inline-block}\n", | |
| ".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n", | |
| "</style>\n", | |
| "<ol class=list-inline><li>1000000</li><li>4</li></ol>\n" | |
| ], | |
| "text/latex": [ | |
| "\\begin{enumerate*}\n", | |
| "\\item 1000000\n", | |
| "\\item 4\n", | |
| "\\end{enumerate*}\n" | |
| ], | |
| "text/markdown": [ | |
| "1. 1000000\n", | |
| "2. 4\n", | |
| "\n", | |
| "\n" | |
| ], | |
| "text/plain": [ | |
| "[1] 1000000 4" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# make sure loaded dimensions are the same\n", | |
| "dim(d_base_csv)\n", | |
| "dim(d_base_table)\n", | |
| "dim(d_readr_csv)\n", | |
| "dim(d_readr_delim)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "R version 3.5.1 (2018-07-02)\n", | |
| "Platform: x86_64-conda_cos6-linux-gnu (64-bit)\n", | |
| "Running under: Debian GNU/Linux 10 (buster)\n", | |
| "\n", | |
| "Matrix products: default\n", | |
| "BLAS/LAPACK: /home/jupyterlab/conda/envs/r/lib/R/lib/libRlapack.so\n", | |
| "\n", | |
| "locale:\n", | |
| " [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8 \n", | |
| " [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8 \n", | |
| " [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C \n", | |
| "[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C \n", | |
| "\n", | |
| "attached base packages:\n", | |
| "[1] stats graphics grDevices utils datasets methods base \n", | |
| "\n", | |
| "other attached packages:\n", | |
| "[1] readr_1.3.1\n", | |
| "\n", | |
| "loaded via a namespace (and not attached):\n", | |
| " [1] Rcpp_1.0.4.6 digest_0.6.25 crayon_1.3.4 IRdisplay_0.7.0\n", | |
| " [5] repr_1.1.0 R6_2.4.1 lifecycle_0.2.0 jsonlite_1.6.1 \n", | |
| " [9] magrittr_1.5 evaluate_0.14 pillar_1.4.3 rlang_0.4.5 \n", | |
| "[13] curl_4.3 uuid_0.1-4 ellipsis_0.3.0 vctrs_0.2.4 \n", | |
| "[17] IRkernel_0.8.12 tools_3.5.1 hms_0.5.3 compiler_3.5.1 \n", | |
| "[21] pkgconfig_2.0.3 base64enc_0.1-3 htmltools_0.4.0 pbdZMQ_0.3-3 \n", | |
| "[25] tibble_3.0.1 " | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "sessionInfo()" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "R", | |
| "language": "R", | |
| "name": "conda-env-r-r" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": "r", | |
| "file_extension": ".r", | |
| "mimetype": "text/x-r-source", | |
| "name": "R", | |
| "pygments_lexer": "r", | |
| "version": "3.5.1" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment