Skip to content

Instantly share code, notes, and snippets.

@diodon
Last active January 8, 2024 04:45
Show Gist options
  • Select an option

  • Save diodon/048925b055a24b7820bcde2db2be3c2a to your computer and use it in GitHub Desktop.

Select an option

Save diodon/048925b055a24b7820bcde2db2be3c2a to your computer and use it in GitHub Desktop.
AIMS temp logger parquet data suit of tests
## TEST AIMS temp parquet partition
## Each test will run n times
## return: vector of executing time for each run
## libraries
library(arrow)
library(dplyr)
library(lubridate)
## making the connection
#uri <- <<<ADD HERE THE S3 URI>>>
data_bucket <- s3_bucket(uri)
## Accessing dataset
df <- open_dataset(data_bucket)
df$schema
####
## Tests
## count the number of records per site
T1 <- function(n=1, verbose=TRUE){
if (verbose){
print("count the number of records per site")
print(paste0("Number of runs: ", n))
}
t <- numeric()
for (i in 1:n){
tStart <- now()
df_nPoints <- df |> group_by(site) |> summarise(n = n()) |> collect()
tEnd <- as.numeric(now() - tStart)
t <- c(t, tEnd)
}
return(t)
}
## create a time series of daily means for one site
T2 <- function(n=1, verbose=TRUE){
if (verbose){
print("create a time series of daily means for one site")
print(paste0("Number of runs: ", n))
}
t <- numeric()
for (i in 1:n){
tStart <- now()
df_ts <- df |> filter(site == "Clerke Reef") |>
group_by(round_date(time, "day")) |>
summarise(tempMean <- mean(qc_val, na.rm=TRUE)) |>
collect()
tEnd <- as.numeric(now() - tStart)
t <- c(t, tEnd)
}
return(t)
}
## create a daily average temp for one day for all sites
T3 <- function(n=1, verbose=TRUE){
if (verbose){
print("create a daily average temp for one day for all sites")
print(paste0("Number of runs: ", n))
}
t <- numeric()
for (i in 1:n){
tStart <- now()
df_tempSite <- df |> filter(time==ymd(20190101)) |>
group_by(site) |>
summarise(tempMean = mean(qc_val, na.rm=TRUE)) |>
collect()
tEnd <- as.numeric(now() - tStart)
t <- c(t, tEnd)
}
return(t)
}
## calculate average temp for summer for all sites for one year
T4 <- function(n=1, verbose=TRUE){
if (verbose){
print("calculate average temp for summer for all sites for one year")
print(paste0("Number of runs: ", n))
}
t <- numeric()
for (i in 1:n){
tStart <- now()
df_tempSite <- df |> filter(year(time)==2019, yday(time) <=90) |>
group_by(site) |>
summarise(tempMean = mean(qc_val, na.rm=TRUE)) |>
collect()
tEnd <- as.numeric(now() - tStart)
t <- tEnd
}
return(t)
}
## calculate average temp for each summer day for all sites for one year
T5 <- function(n=1, verbose=TRUE){
if (verbose){
print("calculate average temp for each summer day for all sites for one year")
print(paste0("Number of runs: ", n))
}
t <- numeric()
for (i in 1:n){
tStart <- now()
df_tempSite <- df |> filter(year(time)==2019, yday(time) <=90) |>
group_by() |>
summarise(tempMean = mean(qc_val, na.rm=TRUE)) |>
collect()
tEnd <- as.numeric(now() - tStart)
t <- c(t, tEnd)
}
return(t)
}
## Run tests
T1()
T2()
T3()
T4()
T5()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment