Skip to content

Instantly share code, notes, and snippets.

@DavisVaughan
Created February 20, 2026 13:50
Show Gist options
  • Select an option

  • Save DavisVaughan/e3eb47d4af404c7fe13a72dea1f2619b to your computer and use it in GitHub Desktop.

Select an option

Save DavisVaughan/e3eb47d4af404c7fe13a72dea1f2619b to your computer and use it in GitHub Desktop.
replace-values
# `replace_values()` is a swiss army knife!
library(dplyr)
state <- c("NC", "NY", "CA", NA, "NY", "Unknown", NA, "NotRecorded")
state
# ------------------------------------------------------------------------------
# Replace missing values with a constant
if_else(is.na(state), "Unknown", state)
coalesce(state, "Unknown")
tidyr::replace_na(state, "Unknown")
replace_values(state, NA ~ "Unknown")
# Replace missing values with the corresponding value from another column
# fmt: skip
region <- c("South", "North", "West", "East", "North", "Unknown", "West", "Unknown")
if_else(is.na(state), region, state)
coalesce(state, region)
# tidyr::replace_na(state, region)
replace_values(state, NA ~ region)
# ------------------------------------------------------------------------------
# Or go the other way!
# Replace problematic values with a missing value
if_else(state == "Unknown", NA, state)
na_if(state, "Unknown")
replace_values(state, "Unknown" ~ NA)
# Note how similar it is to the reverse operation
replace_values(state, NA ~ "Unknown")
# `replace_values()` extends naturally to more complex cases
replace_values(state, "Unknown" ~ NA)
replace_values(state, c("Unknown", "NotRecorded") ~ NA)
if_else(state %in% c("Unknown", "NotRecorded"), NA, state)
state |> na_if("Unknown") |> na_if("NotRecorded")
# ------------------------------------------------------------------------------
# Standardize multiple issues at once
# NA -> <missing>
# Unknown -> <unknown>
# NotRecorded -> <unknown>
# Oh god
if_else(
is.na(state),
"<missing>",
if_else(
state %in% c("Unknown", "NotRecorded"),
"<unknown>",
state
)
)
case_when(
is.na(state) ~ "<missing>",
state %in% c("Unknown", "NotRecorded") ~ "<unknown>",
.default = state
)
# ^ its weird that `state` is at the end as a `.default`,
# we're just trying to tweak a few values! it feels like it should be the
# first argument!
state |>
replace_values(
NA ~ "<missing>",
c("Unknown", "NotRecorded") ~ "<unknown>"
)
@DavisVaughan
Copy link
Author

# `replace_values()` is a swiss army knife!

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

state <- c("NC", "NY", "CA", NA, "NY", "Unknown", NA, "NotRecorded")
state
#> [1] "NC"          "NY"          "CA"          NA            "NY"         
#> [6] "Unknown"     NA            "NotRecorded"
# Replace missing values with a constant

if_else(is.na(state), "Unknown", state)
#> [1] "NC"          "NY"          "CA"          "Unknown"     "NY"         
#> [6] "Unknown"     "Unknown"     "NotRecorded"
coalesce(state, "Unknown")
#> [1] "NC"          "NY"          "CA"          "Unknown"     "NY"         
#> [6] "Unknown"     "Unknown"     "NotRecorded"
tidyr::replace_na(state, "Unknown")
#> [1] "NC"          "NY"          "CA"          "Unknown"     "NY"         
#> [6] "Unknown"     "Unknown"     "NotRecorded"

replace_values(state, NA ~ "Unknown")
#> [1] "NC"          "NY"          "CA"          "Unknown"     "NY"         
#> [6] "Unknown"     "Unknown"     "NotRecorded"

# Replace missing values with the corresponding value from another column
# fmt: skip
region <- c("South", "North", "West", "East", "North", "Unknown", "West", "Unknown")

if_else(is.na(state), region, state)
#> [1] "NC"          "NY"          "CA"          "East"        "NY"         
#> [6] "Unknown"     "West"        "NotRecorded"
coalesce(state, region)
#> [1] "NC"          "NY"          "CA"          "East"        "NY"         
#> [6] "Unknown"     "West"        "NotRecorded"
# tidyr::replace_na(state, region)

replace_values(state, NA ~ region)
#> [1] "NC"          "NY"          "CA"          "East"        "NY"         
#> [6] "Unknown"     "West"        "NotRecorded"
# Or go the other way!
# Replace problematic values with a missing value

if_else(state == "Unknown", NA, state)
#> [1] "NC"          "NY"          "CA"          NA            "NY"         
#> [6] NA            NA            "NotRecorded"
na_if(state, "Unknown")
#> [1] "NC"          "NY"          "CA"          NA            "NY"         
#> [6] NA            NA            "NotRecorded"

replace_values(state, "Unknown" ~ NA)
#> [1] "NC"          "NY"          "CA"          NA            "NY"         
#> [6] NA            NA            "NotRecorded"

# Note how similar it is to the reverse operation
replace_values(state, NA ~ "Unknown")
#> [1] "NC"          "NY"          "CA"          "Unknown"     "NY"         
#> [6] "Unknown"     "Unknown"     "NotRecorded"

# `replace_values()` extends naturally to more complex cases
replace_values(state, "Unknown" ~ NA)
#> [1] "NC"          "NY"          "CA"          NA            "NY"         
#> [6] NA            NA            "NotRecorded"
replace_values(state, c("Unknown", "NotRecorded") ~ NA)
#> [1] "NC" "NY" "CA" NA   "NY" NA   NA   NA

if_else(state %in% c("Unknown", "NotRecorded"), NA, state)
#> [1] "NC" "NY" "CA" NA   "NY" NA   NA   NA
state |> na_if("Unknown") |> na_if("NotRecorded")
#> [1] "NC" "NY" "CA" NA   "NY" NA   NA   NA
# Standardize multiple issues at once

# NA -> <missing>
# Unknown -> <unknown>
# NotRecorded -> <unknown>

# Oh god
if_else(
  is.na(state),
  "<missing>",
  if_else(
    state %in% c("Unknown", "NotRecorded"),
    "<unknown>",
    state
  )
)
#> [1] "NC"        "NY"        "CA"        "<missing>" "NY"        "<unknown>"
#> [7] "<missing>" "<unknown>"

case_when(
  is.na(state) ~ "<missing>",
  state %in% c("Unknown", "NotRecorded") ~ "<unknown>",
  .default = state
)
#> [1] "NC"        "NY"        "CA"        "<missing>" "NY"        "<unknown>"
#> [7] "<missing>" "<unknown>"

# ^ its weird that `state` is at the end as a `.default`,
# we're just trying to tweak a few values! it feels like it should be the
# first argument!

state |>
  replace_values(
    NA ~ "<missing>",
    c("Unknown", "NotRecorded") ~ "<unknown>"
  )
#> [1] "NC"        "NY"        "CA"        "<missing>" "NY"        "<unknown>"
#> [7] "<missing>" "<unknown>"

Created on 2026-02-20 with reprex v2.1.1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment