Skip to content

Instantly share code, notes, and snippets.

@jacobrose
Last active February 4, 2025 17:37
Show Gist options
  • Select an option

  • Save jacobrose/e6df7c52d15345d316ac5334c2dab46f to your computer and use it in GitHub Desktop.

Select an option

Save jacobrose/e6df7c52d15345d316ac5334c2dab46f to your computer and use it in GitHub Desktop.
Inserting duplicate rows into a backup table in preparation for deletion
CREATE TABLE `etsy-data-warehouse-dev.finpay.adyen_settlement_detail_backup_2025_02_04`
CLONE `etsy-data-warehouse-dev.finpay.adyen_settlement_detail`;
ALTER TABLE `etsy-data-warehouse-dev.finpay.adyen_settlement_detail_backup_2025_02_04`
SET OPTIONS (
expiration_timestamp = TIMESTAMP '2025-03-05 18:00:00'
);
CREATE TABLE `etsy-data-warehouse-dev.finpay.adyen_settlement_detail_deduplication_2025_02_04`
LIKE `etsy-data-warehouse-dev.finpay.adyen_settlement_detail`;
INSERT INTO `etsy-data-warehouse-dev.finpay.adyen_settlement_detail_deduplication_2025_02_04` (
WITH `duplicates` AS (
SELECT
`a`.`fingerprint` as `fingerprint1`,
`b`.`fingerprint` as `fingerprint2`,
`a`.`filename` as `filename1`,
`b`.`filename` as `filename2`,
`a`.`merchant_account`,
`a`.`psp_reference`,
`a`.`merchant_reference`,
`a`.`payment_method`,
`a`.`creation_date`,
`a`.`type`,
`a`.`modification_reference`,
`a`.`gross_currency`,
`a`.`gross_debit`,
`a`.`gross_credit`,
`a`.`exchange_rate`,
`a`.`net_currency`,
`a`.`net_debit`,
`a`.`net_credit`,
`a`.`commission`,
`a`.`markup`,
`a`.`scheme_fees`,
`a`.`interchange`,
`a`.`payment_method_variant`,
`a`.`modification_merchant_reference`,
`a`.`batch_number`,
`a`.`issuer_country`
FROM `etsy-data-warehouse-dev.finpay.adyen_settlement_detail` AS `a`
left outer join `etsy-data-warehouse-dev.finpay.adyen_settlement_detail` AS `b`
on (
`a`.`fingerprint` <> `b`.`fingerprint`
AND `a`.`filename` <> `b`.`filename`
AND ((`a`.`merchant_account` = `b`.`merchant_account`) OR (`a`.`merchant_account` is null AND `b`.`merchant_account` is null))
AND ((`a`.`psp_reference` = `b`.`psp_reference`) OR (`a`.`psp_reference` is null AND `b`.`psp_reference` is null))
AND ((`a`.`merchant_reference` = `b`.`merchant_reference`) OR (`a`.`merchant_reference` is null AND `b`.`merchant_reference` is null))
AND ((`a`.`payment_method` = `b`.`payment_method`) OR (`a`.`payment_method` is null AND `b`.`payment_method` is null))
AND ((`a`.`creation_date` = `b`.`creation_date`) OR (`a`.`creation_date` is null AND `b`.`creation_date` is null))
AND ((`a`.`type` = `b`.`type`) OR (`a`.`type` is null AND `b`.`type` is null))
AND ((`a`.`modification_reference` = `b`.`modification_reference`) OR (`a`.`modification_reference` is null AND `b`.`modification_reference` is null))
AND ((`a`.`gross_currency` = `b`.`gross_currency`) OR (`a`.`gross_currency` is null AND `b`.`gross_currency` is null))
AND ((`a`.`gross_debit` = `b`.`gross_debit`) OR (`a`.`gross_debit` is null AND `b`.`gross_debit` is null))
AND ((`a`.`gross_credit` = `b`.`gross_credit`) OR (`a`.`gross_credit` is null AND `b`.`gross_credit` is null))
AND ((`a`.`exchange_rate` = `b`.`exchange_rate`) OR (`a`.`exchange_rate` is null AND `b`.`exchange_rate` is null))
AND ((`a`.`net_currency` = `b`.`net_currency`) OR (`a`.`net_currency` is null AND `b`.`net_currency` is null))
AND ((`a`.`net_debit` = `b`.`net_debit`) OR (`a`.`net_debit` is null AND `b`.`net_debit` is null))
AND ((`a`.`net_credit` = `b`.`net_credit`) OR (`a`.`net_credit` is null AND `b`.`net_credit` is null))
AND ((`a`.`commission` = `b`.`commission`) OR (`a`.`commission` is null AND `b`.`commission` is null))
AND ((`a`.`markup` = `b`.`markup`) OR (`a`.`markup` is null AND `b`.`markup` is null))
AND ((`a`.`scheme_fees` = `b`.`scheme_fees`) OR (`a`.`scheme_fees` is null AND `b`.`scheme_fees` is null))
AND ((`a`.`interchange` = `b`.`interchange`) OR (`a`.`interchange` is null AND `b`.`interchange` is null))
AND ((`a`.`payment_method_variant` = `b`.`payment_method_variant`) OR (`a`.`payment_method_variant` is null AND `b`.`payment_method_variant` is null))
AND ((`a`.`modification_merchant_reference` = `b`.`modification_merchant_reference`) OR (`a`.`modification_merchant_reference` is null AND `b`.`modification_merchant_reference` is null))
AND ((`a`.`batch_number` = `b`.`batch_number`) OR (`a`.`batch_number` is null AND `b`.`batch_number` is null))
AND ((`a`.`issuer_country` = `b`.`issuer_country`) OR (`a`.`issuer_country` is null AND `b`.`issuer_country` is null))
)
WHERE
date(`a`.`creation_date`) > '2024-08-01'
AND date(`b`.`creation_date`) > '2024-08-01'
)
SELECT
`duplicates`.`fingerprint1`,
`duplicates`.`filename1`,
`duplicates`.`merchant_account`,
`duplicates`.`psp_reference`,
`duplicates`.`merchant_reference`,
`duplicates`.`payment_method`,
`duplicates`.`creation_date`,
`duplicates`.`type`,
`duplicates`.`modification_reference`,
`duplicates`.`gross_currency`,
`duplicates`.`gross_debit`,
`duplicates`.`gross_credit`,
`duplicates`.`exchange_rate`,
`duplicates`.`net_currency`,
`duplicates`.`net_debit`,
`duplicates`.`net_credit`,
`duplicates`.`commission`,
`duplicates`.`markup`,
`duplicates`.`scheme_fees`,
`duplicates`.`interchange`,
`duplicates`.`payment_method_variant`,
`duplicates`.`modification_merchant_reference`,
`duplicates`.`batch_number`,
`duplicates`.`issuer_country`
FROM `duplicates`
-- arbitrarily pick one of the two (the "lesser")
WHERE `duplicates`.`filename1` < `duplicates`.`filename2`
);
INSERT INTO `etsy-data-warehouse-dev.finpay.adyen_settlement_detail_deduplication_2025_02_04` (
WITH `duplicates` AS (
SELECT
`a`.`fingerprint` as `fingerprint1`,
`b`.`fingerprint` as `fingerprint2`,
`a`.`filename` as `filename1`,
`b`.`filename` as `filename2`,
`a`.`merchant_account`,
`a`.`psp_reference`,
`a`.`merchant_reference`,
`a`.`payment_method`,
`a`.`creation_date`,
`a`.`type`,
`a`.`modification_reference`,
`a`.`gross_currency`,
`a`.`gross_debit`,
`a`.`gross_credit`,
`a`.`exchange_rate`,
`a`.`net_currency`,
`a`.`net_debit`,
`a`.`net_credit`,
`a`.`commission`,
`a`.`markup`,
`a`.`scheme_fees`,
`a`.`interchange`,
`a`.`payment_method_variant`,
`a`.`modification_merchant_reference`,
`a`.`batch_number`,
`a`.`issuer_country`
FROM `etsy-data-warehouse-dev.finpay.adyen_settlement_detail` AS `a`
left outer join `etsy-data-warehouse-dev.finpay.adyen_settlement_detail` AS `b`
on (
`a`.`fingerprint` <> `b`.`fingerprint`
AND ((`a`.`filename` = `b`.`filename`) OR (`a`.`filename` is null AND `b`.`filename` is null))
AND ((`a`.`merchant_account` = `b`.`merchant_account`) OR (`a`.`merchant_account` is null AND `b`.`merchant_account` is null))
AND ((`a`.`psp_reference` = `b`.`psp_reference`) OR (`a`.`psp_reference` is null AND `b`.`psp_reference` is null))
AND ((`a`.`merchant_reference` = `b`.`merchant_reference`) OR (`a`.`merchant_reference` is null AND `b`.`merchant_reference` is null))
AND ((`a`.`payment_method` = `b`.`payment_method`) OR (`a`.`payment_method` is null AND `b`.`payment_method` is null))
AND ((`a`.`creation_date` = `b`.`creation_date`) OR (`a`.`creation_date` is null AND `b`.`creation_date` is null))
AND ((`a`.`type` = `b`.`type`) OR (`a`.`type` is null AND `b`.`type` is null))
AND ((`a`.`modification_reference` = `b`.`modification_reference`) OR (`a`.`modification_reference` is null AND `b`.`modification_reference` is null))
AND ((`a`.`gross_currency` = `b`.`gross_currency`) OR (`a`.`gross_currency` is null AND `b`.`gross_currency` is null))
AND ((`a`.`gross_debit` = `b`.`gross_debit`) OR (`a`.`gross_debit` is null AND `b`.`gross_debit` is null))
AND ((`a`.`gross_credit` = `b`.`gross_credit`) OR (`a`.`gross_credit` is null AND `b`.`gross_credit` is null))
AND ((`a`.`exchange_rate` = `b`.`exchange_rate`) OR (`a`.`exchange_rate` is null AND `b`.`exchange_rate` is null))
AND ((`a`.`net_currency` = `b`.`net_currency`) OR (`a`.`net_currency` is null AND `b`.`net_currency` is null))
AND ((`a`.`net_debit` = `b`.`net_debit`) OR (`a`.`net_debit` is null AND `b`.`net_debit` is null))
AND ((`a`.`net_credit` = `b`.`net_credit`) OR (`a`.`net_credit` is null AND `b`.`net_credit` is null))
AND ((`a`.`commission` = `b`.`commission`) OR (`a`.`commission` is null AND `b`.`commission` is null))
AND ((`a`.`markup` = `b`.`markup`) OR (`a`.`markup` is null AND `b`.`markup` is null))
AND ((`a`.`scheme_fees` = `b`.`scheme_fees`) OR (`a`.`scheme_fees` is null AND `b`.`scheme_fees` is null))
AND ((`a`.`interchange` = `b`.`interchange`) OR (`a`.`interchange` is null AND `b`.`interchange` is null))
AND ((`a`.`payment_method_variant` = `b`.`payment_method_variant`) OR (`a`.`payment_method_variant` is null AND `b`.`payment_method_variant` is null))
AND ((`a`.`modification_merchant_reference` = `b`.`modification_merchant_reference`) OR (`a`.`modification_merchant_reference` is null AND `b`.`modification_merchant_reference` is null))
AND ((`a`.`batch_number` = `b`.`batch_number`) OR (`a`.`batch_number` is null AND `b`.`batch_number` is null))
AND ((`a`.`issuer_country` is null) OR (`b`.`issuer_country` is null) OR (`a`.`issuer_country` <> `b`.`issuer_country`))
)
WHERE
date(`a`.`creation_date`) BETWEEN '2023-01-01' AND '2023-02-01'
AND date(`b`.`creation_date`) BETWEEN '2023-01-01' AND '2023-02-01'
)
SELECT
`duplicates`.`fingerprint1`,
`duplicates`.`filename1`,
`duplicates`.`merchant_account`,
`duplicates`.`psp_reference`,
`duplicates`.`merchant_reference`,
`duplicates`.`payment_method`,
`duplicates`.`creation_date`,
`duplicates`.`type`,
`duplicates`.`modification_reference`,
`duplicates`.`gross_currency`,
`duplicates`.`gross_debit`,
`duplicates`.`gross_credit`,
`duplicates`.`exchange_rate`,
`duplicates`.`net_currency`,
`duplicates`.`net_debit`,
`duplicates`.`net_credit`,
`duplicates`.`commission`,
`duplicates`.`markup`,
`duplicates`.`scheme_fees`,
`duplicates`.`interchange`,
`duplicates`.`payment_method_variant`,
`duplicates`.`modification_merchant_reference`,
`duplicates`.`batch_number`,
`duplicates`.`issuer_country`
FROM `duplicates`
-- we want to delete the row from each pair where the issuer_country is missing
WHERE `duplicates`.`issuer_country` is null
);
DELETE FROM `etsy-data-warehouse-dev.finpay.adyen_settlement_detail` WHERE `fingerprint` IN (
SELECT `fingerprint`
FROM `etsy-data-warehouse-dev.finpay.adyen_settlement_detail_deduplication_2025_02_04`
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment