Last active
January 8, 2024 04:45
-
-
Save diodon/048925b055a24b7820bcde2db2be3c2a to your computer and use it in GitHub Desktop.
AIMS temp logger parquet data suit of tests
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ## TEST AIMS temp parquet partition | |
| ## Each test will run n times | |
| ## return: vector of executing time for each run | |
| ## libraries | |
| library(arrow) | |
| library(dplyr) | |
| library(lubridate) | |
| ## making the connection | |
| #uri <- <<<ADD HERE THE S3 URI>>> | |
| data_bucket <- s3_bucket(uri) | |
| ## Accessing dataset | |
| df <- open_dataset(data_bucket) | |
| df$schema | |
| #### | |
| ## Tests | |
| ## count the number of records per site | |
| T1 <- function(n=1, verbose=TRUE){ | |
| if (verbose){ | |
| print("count the number of records per site") | |
| print(paste0("Number of runs: ", n)) | |
| } | |
| t <- numeric() | |
| for (i in 1:n){ | |
| tStart <- now() | |
| df_nPoints <- df |> group_by(site) |> summarise(n = n()) |> collect() | |
| tEnd <- as.numeric(now() - tStart) | |
| t <- c(t, tEnd) | |
| } | |
| return(t) | |
| } | |
| ## create a time series of daily means for one site | |
| T2 <- function(n=1, verbose=TRUE){ | |
| if (verbose){ | |
| print("create a time series of daily means for one site") | |
| print(paste0("Number of runs: ", n)) | |
| } | |
| t <- numeric() | |
| for (i in 1:n){ | |
| tStart <- now() | |
| df_ts <- df |> filter(site == "Clerke Reef") |> | |
| group_by(round_date(time, "day")) |> | |
| summarise(tempMean <- mean(qc_val, na.rm=TRUE)) |> | |
| collect() | |
| tEnd <- as.numeric(now() - tStart) | |
| t <- c(t, tEnd) | |
| } | |
| return(t) | |
| } | |
| ## create a daily average temp for one day for all sites | |
| T3 <- function(n=1, verbose=TRUE){ | |
| if (verbose){ | |
| print("create a daily average temp for one day for all sites") | |
| print(paste0("Number of runs: ", n)) | |
| } | |
| t <- numeric() | |
| for (i in 1:n){ | |
| tStart <- now() | |
| df_tempSite <- df |> filter(time==ymd(20190101)) |> | |
| group_by(site) |> | |
| summarise(tempMean = mean(qc_val, na.rm=TRUE)) |> | |
| collect() | |
| tEnd <- as.numeric(now() - tStart) | |
| t <- c(t, tEnd) | |
| } | |
| return(t) | |
| } | |
| ## calculate average temp for summer for all sites for one year | |
| T4 <- function(n=1, verbose=TRUE){ | |
| if (verbose){ | |
| print("calculate average temp for summer for all sites for one year") | |
| print(paste0("Number of runs: ", n)) | |
| } | |
| t <- numeric() | |
| for (i in 1:n){ | |
| tStart <- now() | |
| df_tempSite <- df |> filter(year(time)==2019, yday(time) <=90) |> | |
| group_by(site) |> | |
| summarise(tempMean = mean(qc_val, na.rm=TRUE)) |> | |
| collect() | |
| tEnd <- as.numeric(now() - tStart) | |
| t <- tEnd | |
| } | |
| return(t) | |
| } | |
| ## calculate average temp for each summer day for all sites for one year | |
| T5 <- function(n=1, verbose=TRUE){ | |
| if (verbose){ | |
| print("calculate average temp for each summer day for all sites for one year") | |
| print(paste0("Number of runs: ", n)) | |
| } | |
| t <- numeric() | |
| for (i in 1:n){ | |
| tStart <- now() | |
| df_tempSite <- df |> filter(year(time)==2019, yday(time) <=90) |> | |
| group_by() |> | |
| summarise(tempMean = mean(qc_val, na.rm=TRUE)) |> | |
| collect() | |
| tEnd <- as.numeric(now() - tStart) | |
| t <- c(t, tEnd) | |
| } | |
| return(t) | |
| } | |
| ## Run tests | |
| T1() | |
| T2() | |
| T3() | |
| T4() | |
| T5() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment