Last active
February 4, 2025 17:37
-
-
Save jacobrose/e6df7c52d15345d316ac5334c2dab46f to your computer and use it in GitHub Desktop.
Inserting duplicate rows into a backup table in preparation for deletion
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| CREATE TABLE `etsy-data-warehouse-dev.finpay.adyen_settlement_detail_backup_2025_02_04` | |
| CLONE `etsy-data-warehouse-dev.finpay.adyen_settlement_detail`; | |
| ALTER TABLE `etsy-data-warehouse-dev.finpay.adyen_settlement_detail_backup_2025_02_04` | |
| SET OPTIONS ( | |
| expiration_timestamp = TIMESTAMP '2025-03-05 18:00:00' | |
| ); | |
| CREATE TABLE `etsy-data-warehouse-dev.finpay.adyen_settlement_detail_deduplication_2025_02_04` | |
| LIKE `etsy-data-warehouse-dev.finpay.adyen_settlement_detail`; | |
| INSERT INTO `etsy-data-warehouse-dev.finpay.adyen_settlement_detail_deduplication_2025_02_04` ( | |
| WITH `duplicates` AS ( | |
| SELECT | |
| `a`.`fingerprint` as `fingerprint1`, | |
| `b`.`fingerprint` as `fingerprint2`, | |
| `a`.`filename` as `filename1`, | |
| `b`.`filename` as `filename2`, | |
| `a`.`merchant_account`, | |
| `a`.`psp_reference`, | |
| `a`.`merchant_reference`, | |
| `a`.`payment_method`, | |
| `a`.`creation_date`, | |
| `a`.`type`, | |
| `a`.`modification_reference`, | |
| `a`.`gross_currency`, | |
| `a`.`gross_debit`, | |
| `a`.`gross_credit`, | |
| `a`.`exchange_rate`, | |
| `a`.`net_currency`, | |
| `a`.`net_debit`, | |
| `a`.`net_credit`, | |
| `a`.`commission`, | |
| `a`.`markup`, | |
| `a`.`scheme_fees`, | |
| `a`.`interchange`, | |
| `a`.`payment_method_variant`, | |
| `a`.`modification_merchant_reference`, | |
| `a`.`batch_number`, | |
| `a`.`issuer_country` | |
| FROM `etsy-data-warehouse-dev.finpay.adyen_settlement_detail` AS `a` | |
| left outer join `etsy-data-warehouse-dev.finpay.adyen_settlement_detail` AS `b` | |
| on ( | |
| `a`.`fingerprint` <> `b`.`fingerprint` | |
| AND `a`.`filename` <> `b`.`filename` | |
| AND ((`a`.`merchant_account` = `b`.`merchant_account`) OR (`a`.`merchant_account` is null AND `b`.`merchant_account` is null)) | |
| AND ((`a`.`psp_reference` = `b`.`psp_reference`) OR (`a`.`psp_reference` is null AND `b`.`psp_reference` is null)) | |
| AND ((`a`.`merchant_reference` = `b`.`merchant_reference`) OR (`a`.`merchant_reference` is null AND `b`.`merchant_reference` is null)) | |
| AND ((`a`.`payment_method` = `b`.`payment_method`) OR (`a`.`payment_method` is null AND `b`.`payment_method` is null)) | |
| AND ((`a`.`creation_date` = `b`.`creation_date`) OR (`a`.`creation_date` is null AND `b`.`creation_date` is null)) | |
| AND ((`a`.`type` = `b`.`type`) OR (`a`.`type` is null AND `b`.`type` is null)) | |
| AND ((`a`.`modification_reference` = `b`.`modification_reference`) OR (`a`.`modification_reference` is null AND `b`.`modification_reference` is null)) | |
| AND ((`a`.`gross_currency` = `b`.`gross_currency`) OR (`a`.`gross_currency` is null AND `b`.`gross_currency` is null)) | |
| AND ((`a`.`gross_debit` = `b`.`gross_debit`) OR (`a`.`gross_debit` is null AND `b`.`gross_debit` is null)) | |
| AND ((`a`.`gross_credit` = `b`.`gross_credit`) OR (`a`.`gross_credit` is null AND `b`.`gross_credit` is null)) | |
| AND ((`a`.`exchange_rate` = `b`.`exchange_rate`) OR (`a`.`exchange_rate` is null AND `b`.`exchange_rate` is null)) | |
| AND ((`a`.`net_currency` = `b`.`net_currency`) OR (`a`.`net_currency` is null AND `b`.`net_currency` is null)) | |
| AND ((`a`.`net_debit` = `b`.`net_debit`) OR (`a`.`net_debit` is null AND `b`.`net_debit` is null)) | |
| AND ((`a`.`net_credit` = `b`.`net_credit`) OR (`a`.`net_credit` is null AND `b`.`net_credit` is null)) | |
| AND ((`a`.`commission` = `b`.`commission`) OR (`a`.`commission` is null AND `b`.`commission` is null)) | |
| AND ((`a`.`markup` = `b`.`markup`) OR (`a`.`markup` is null AND `b`.`markup` is null)) | |
| AND ((`a`.`scheme_fees` = `b`.`scheme_fees`) OR (`a`.`scheme_fees` is null AND `b`.`scheme_fees` is null)) | |
| AND ((`a`.`interchange` = `b`.`interchange`) OR (`a`.`interchange` is null AND `b`.`interchange` is null)) | |
| AND ((`a`.`payment_method_variant` = `b`.`payment_method_variant`) OR (`a`.`payment_method_variant` is null AND `b`.`payment_method_variant` is null)) | |
| AND ((`a`.`modification_merchant_reference` = `b`.`modification_merchant_reference`) OR (`a`.`modification_merchant_reference` is null AND `b`.`modification_merchant_reference` is null)) | |
| AND ((`a`.`batch_number` = `b`.`batch_number`) OR (`a`.`batch_number` is null AND `b`.`batch_number` is null)) | |
| AND ((`a`.`issuer_country` = `b`.`issuer_country`) OR (`a`.`issuer_country` is null AND `b`.`issuer_country` is null)) | |
| ) | |
| WHERE | |
| date(`a`.`creation_date`) > '2024-08-01' | |
| AND date(`b`.`creation_date`) > '2024-08-01' | |
| ) | |
| SELECT | |
| `duplicates`.`fingerprint1`, | |
| `duplicates`.`filename1`, | |
| `duplicates`.`merchant_account`, | |
| `duplicates`.`psp_reference`, | |
| `duplicates`.`merchant_reference`, | |
| `duplicates`.`payment_method`, | |
| `duplicates`.`creation_date`, | |
| `duplicates`.`type`, | |
| `duplicates`.`modification_reference`, | |
| `duplicates`.`gross_currency`, | |
| `duplicates`.`gross_debit`, | |
| `duplicates`.`gross_credit`, | |
| `duplicates`.`exchange_rate`, | |
| `duplicates`.`net_currency`, | |
| `duplicates`.`net_debit`, | |
| `duplicates`.`net_credit`, | |
| `duplicates`.`commission`, | |
| `duplicates`.`markup`, | |
| `duplicates`.`scheme_fees`, | |
| `duplicates`.`interchange`, | |
| `duplicates`.`payment_method_variant`, | |
| `duplicates`.`modification_merchant_reference`, | |
| `duplicates`.`batch_number`, | |
| `duplicates`.`issuer_country` | |
| FROM `duplicates` | |
| -- arbitrarily pick one of the two (the "lesser") | |
| WHERE `duplicates`.`filename1` < `duplicates`.`filename2` | |
| ); | |
| INSERT INTO `etsy-data-warehouse-dev.finpay.adyen_settlement_detail_deduplication_2025_02_04` ( | |
| WITH `duplicates` AS ( | |
| SELECT | |
| `a`.`fingerprint` as `fingerprint1`, | |
| `b`.`fingerprint` as `fingerprint2`, | |
| `a`.`filename` as `filename1`, | |
| `b`.`filename` as `filename2`, | |
| `a`.`merchant_account`, | |
| `a`.`psp_reference`, | |
| `a`.`merchant_reference`, | |
| `a`.`payment_method`, | |
| `a`.`creation_date`, | |
| `a`.`type`, | |
| `a`.`modification_reference`, | |
| `a`.`gross_currency`, | |
| `a`.`gross_debit`, | |
| `a`.`gross_credit`, | |
| `a`.`exchange_rate`, | |
| `a`.`net_currency`, | |
| `a`.`net_debit`, | |
| `a`.`net_credit`, | |
| `a`.`commission`, | |
| `a`.`markup`, | |
| `a`.`scheme_fees`, | |
| `a`.`interchange`, | |
| `a`.`payment_method_variant`, | |
| `a`.`modification_merchant_reference`, | |
| `a`.`batch_number`, | |
| `a`.`issuer_country` | |
| FROM `etsy-data-warehouse-dev.finpay.adyen_settlement_detail` AS `a` | |
| left outer join `etsy-data-warehouse-dev.finpay.adyen_settlement_detail` AS `b` | |
| on ( | |
| `a`.`fingerprint` <> `b`.`fingerprint` | |
| AND ((`a`.`filename` = `b`.`filename`) OR (`a`.`filename` is null AND `b`.`filename` is null)) | |
| AND ((`a`.`merchant_account` = `b`.`merchant_account`) OR (`a`.`merchant_account` is null AND `b`.`merchant_account` is null)) | |
| AND ((`a`.`psp_reference` = `b`.`psp_reference`) OR (`a`.`psp_reference` is null AND `b`.`psp_reference` is null)) | |
| AND ((`a`.`merchant_reference` = `b`.`merchant_reference`) OR (`a`.`merchant_reference` is null AND `b`.`merchant_reference` is null)) | |
| AND ((`a`.`payment_method` = `b`.`payment_method`) OR (`a`.`payment_method` is null AND `b`.`payment_method` is null)) | |
| AND ((`a`.`creation_date` = `b`.`creation_date`) OR (`a`.`creation_date` is null AND `b`.`creation_date` is null)) | |
| AND ((`a`.`type` = `b`.`type`) OR (`a`.`type` is null AND `b`.`type` is null)) | |
| AND ((`a`.`modification_reference` = `b`.`modification_reference`) OR (`a`.`modification_reference` is null AND `b`.`modification_reference` is null)) | |
| AND ((`a`.`gross_currency` = `b`.`gross_currency`) OR (`a`.`gross_currency` is null AND `b`.`gross_currency` is null)) | |
| AND ((`a`.`gross_debit` = `b`.`gross_debit`) OR (`a`.`gross_debit` is null AND `b`.`gross_debit` is null)) | |
| AND ((`a`.`gross_credit` = `b`.`gross_credit`) OR (`a`.`gross_credit` is null AND `b`.`gross_credit` is null)) | |
| AND ((`a`.`exchange_rate` = `b`.`exchange_rate`) OR (`a`.`exchange_rate` is null AND `b`.`exchange_rate` is null)) | |
| AND ((`a`.`net_currency` = `b`.`net_currency`) OR (`a`.`net_currency` is null AND `b`.`net_currency` is null)) | |
| AND ((`a`.`net_debit` = `b`.`net_debit`) OR (`a`.`net_debit` is null AND `b`.`net_debit` is null)) | |
| AND ((`a`.`net_credit` = `b`.`net_credit`) OR (`a`.`net_credit` is null AND `b`.`net_credit` is null)) | |
| AND ((`a`.`commission` = `b`.`commission`) OR (`a`.`commission` is null AND `b`.`commission` is null)) | |
| AND ((`a`.`markup` = `b`.`markup`) OR (`a`.`markup` is null AND `b`.`markup` is null)) | |
| AND ((`a`.`scheme_fees` = `b`.`scheme_fees`) OR (`a`.`scheme_fees` is null AND `b`.`scheme_fees` is null)) | |
| AND ((`a`.`interchange` = `b`.`interchange`) OR (`a`.`interchange` is null AND `b`.`interchange` is null)) | |
| AND ((`a`.`payment_method_variant` = `b`.`payment_method_variant`) OR (`a`.`payment_method_variant` is null AND `b`.`payment_method_variant` is null)) | |
| AND ((`a`.`modification_merchant_reference` = `b`.`modification_merchant_reference`) OR (`a`.`modification_merchant_reference` is null AND `b`.`modification_merchant_reference` is null)) | |
| AND ((`a`.`batch_number` = `b`.`batch_number`) OR (`a`.`batch_number` is null AND `b`.`batch_number` is null)) | |
| AND ((`a`.`issuer_country` is null) OR (`b`.`issuer_country` is null) OR (`a`.`issuer_country` <> `b`.`issuer_country`)) | |
| ) | |
| WHERE | |
| date(`a`.`creation_date`) BETWEEN '2023-01-01' AND '2023-02-01' | |
| AND date(`b`.`creation_date`) BETWEEN '2023-01-01' AND '2023-02-01' | |
| ) | |
| SELECT | |
| `duplicates`.`fingerprint1`, | |
| `duplicates`.`filename1`, | |
| `duplicates`.`merchant_account`, | |
| `duplicates`.`psp_reference`, | |
| `duplicates`.`merchant_reference`, | |
| `duplicates`.`payment_method`, | |
| `duplicates`.`creation_date`, | |
| `duplicates`.`type`, | |
| `duplicates`.`modification_reference`, | |
| `duplicates`.`gross_currency`, | |
| `duplicates`.`gross_debit`, | |
| `duplicates`.`gross_credit`, | |
| `duplicates`.`exchange_rate`, | |
| `duplicates`.`net_currency`, | |
| `duplicates`.`net_debit`, | |
| `duplicates`.`net_credit`, | |
| `duplicates`.`commission`, | |
| `duplicates`.`markup`, | |
| `duplicates`.`scheme_fees`, | |
| `duplicates`.`interchange`, | |
| `duplicates`.`payment_method_variant`, | |
| `duplicates`.`modification_merchant_reference`, | |
| `duplicates`.`batch_number`, | |
| `duplicates`.`issuer_country` | |
| FROM `duplicates` | |
| -- we want to delete the row from each pair where the issuer_country is missing | |
| WHERE `duplicates`.`issuer_country` is null | |
| ); | |
| DELETE FROM `etsy-data-warehouse-dev.finpay.adyen_settlement_detail` WHERE `fingerprint` IN ( | |
| SELECT `fingerprint` | |
| FROM `etsy-data-warehouse-dev.finpay.adyen_settlement_detail_deduplication_2025_02_04` | |
| ); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment