Skip to content

Instantly share code, notes, and snippets.

@jonathan-ostrander
Last active December 7, 2016 21:57
Show Gist options
  • Select an option

  • Save jonathan-ostrander/cfd5f3bd8a5fce6d50c987376a66d668 to your computer and use it in GitHub Desktop.

Select an option

Save jonathan-ostrander/cfd5f3bd8a5fce6d50c987376a66d668 to your computer and use it in GitHub Desktop.
Stats V1 vs V2
CREATE TABLE attributed_clicks_tmp (
auction_result_id bigint encode lzo,
bam_link_id integer encode delta32k,
click_date timestamp without time zone encode lzo,
click_id bigint encode raw,
merch_id integer encode delta,
merchant_pricing_model smallint encode lzo,
merchant_spent smallint encode lzo,
order_date timestamp without time zone encode lzo,
order_id character varying(50) encode lzo,
order_value integer encode lzo,
pub_id integer encode delta,
uid_bam bigint encode lzo
)
DISTSTYLE key
DISTKEY(auction_result_id)
INTERLEAVED SORTKEY(click_date, order_date)
-- 5.2 seconds
INSERT INTO attributed_clicks_tmp (
auction_result_id,
bam_link_id,
click_date,
click_id,
merch_id,
merchant_pricing_model,
merchant_spent,
order_date,
order_id,
order_value,
pub_id,
uid_bam
) (
SELECT
auction_result_id,
bam_link_id,
click_date,
click_id,
merch_id,
merchant_pricing_model,
merchant_spent,
order_date,
order_id,
order_value,
pub_id,
uid_bam
FROM
attributed_clicks
)
CREATE TABLE auction_result_tmp (
algorithm character varying(50) encode lzo,
auction_id bigint encode bytedict,
bam_box_format_id integer encode lzo,
bam_box_id integer encode lzo,
bam_box_page_num smallint encode lzo,
bam_box_page_view_id character varying(36) encode lzo,
bam_box_product_count smallint encode lzo,
bam_box_product_position smallint encode lzo,
bam_link_id integer encode bytedict,
created timestamp without time zone encode lzo,
edit_id integer encode bytedict,
event_context_id bigint encode lzo,
id bigint encode lzo,
is_brand_page_fallback boolean encode runlength,
is_click_auction boolean encode runlength,
is_exclusive boolean encode runlength,
is_static boolean encode runlength,
is_using_campaign_trackers boolean encode runlength,
merchant_campaign_goal_cpm integer encode lzo,
merchant_campaign_goal_id integer encode lzo,
merchant_campaign_goal_impression_target integer encode lzo,
merchant_campaign_id integer encode lzo,
publisher_campaign_goal_cpm integer encode lzo,
publisher_campaign_goal_datetime_flight_end timestamp without time zone encode lzo,
publisher_campaign_goal_datetime_flight_start timestamp without time zone encode lzo,
publisher_campaign_goal_id integer encode lzo,
publisher_campaign_goal_impression_target integer encode lzo,
publisher_campaign_id integer encode lzo,
publisher_id integer encode lzo,
referer character varying(2048) encode lzo,
reserve_cpc integer encode lzo,
retargeted_on_checkout boolean encode runlength,
u_ip character varying(16) encode lzo,
ua character varying(255) encode lzo,
ua_mob boolean encode raw,
uid_bam bigint encode lzo,
uid_bn character varying(50) encode lzo,
uid_ga character varying(50) encode lzo,
uid_m character varying(50) encode lzo,
uid_om character varying(50) encode lzo,
uid_qc character varying(50) encode lzo,
url character varying(2048) encode lzo,
widget_id bigint encode lzo,
widget_version smallint encode lzo,
winner_bamx_product_category_id smallint encode lzo,
winner_bid_amount smallint encode lzo,
winner_bid_cost smallint encode lzo,
winner_budget_amount_available integer encode lzo,
winner_max_cpc integer encode lzo,
winner_merch_id integer encode lzo,
winner_merchant_product_id integer encode bytedict,
winner_merchant_product_local_currency character varying(3) encode lzo,
winner_merchant_product_local_price integer encode lzo,
winner_merchant_product_price integer encode lzo,
winner_pricing_model smallint encode lzo,
winner_roi_goal double precision encode raw
)
DISTSTYLE key
DISTKEY(id)
SORTKEY(created);
-- 260 seconds
INSERT INTO auction_result_tmp (
algorithm,
auction_id,
bam_box_format_id,
bam_box_id,
bam_box_page_num,
bam_box_page_view_id,
bam_box_product_count,
bam_box_product_position,
bam_link_id,
created,
edit_id,
event_context_id,
id,
is_brand_page_fallback,
is_click_auction,
is_exclusive,
is_static,
is_using_campaign_trackers,
merchant_campaign_goal_cpm,
merchant_campaign_goal_id,
merchant_campaign_goal_impression_target,
merchant_campaign_id,
publisher_campaign_goal_cpm,
publisher_campaign_goal_datetime_flight_end,
publisher_campaign_goal_datetime_flight_start,
publisher_campaign_goal_id,
publisher_campaign_goal_impression_target,
publisher_campaign_id,
publisher_id,
referer,
reserve_cpc,
retargeted_on_checkout,
u_ip,
ua,
ua_mob,
uid_bam,
uid_bn,
uid_ga,
uid_m,
uid_om,
uid_qc,
url,
widget_id,
widget_version,
winner_bamx_product_category_id,
winner_bid_amount,
winner_bid_cost,
winner_budget_amount_available,
winner_max_cpc,
winner_merch_id,
winner_merchant_product_id,
winner_merchant_product_local_currency,
winner_merchant_product_local_price,
winner_merchant_product_price,
winner_pricing_model,
winner_roi_goal
)
(
SELECT
algorithm,
auction_id,
bam_box_format_id,
bam_box_id,
bam_box_page_num,
bam_box_page_view_id,
bam_box_product_count,
bam_box_product_position,
bam_link_id,
created,
edit_id,
event_context_id,
id,
is_brand_page_fallback,
is_click_auction,
is_exclusive,
is_static,
is_using_campaign_trackers,
merchant_campaign_goal_cpm,
merchant_campaign_goal_id,
merchant_campaign_goal_impression_target,
merchant_campaign_id,
publisher_campaign_goal_cpm,
publisher_campaign_goal_datetime_flight_end,
publisher_campaign_goal_datetime_flight_start,
publisher_campaign_goal_id,
publisher_campaign_goal_impression_target,
publisher_campaign_id,
publisher_id,
referer,
reserve_cpc,
retargeted_on_checkout,
u_ip,
ua,
ua_mob,
uid_bam,
uid_bn,
uid_ga,
uid_m,
uid_om,
uid_qc,
url,
widget_id,
widget_version,
winner_bamx_product_category_id,
winner_bid_amount,
winner_bid_cost,
winner_budget_amount_available,
winner_max_cpc,
winner_merch_id,
winner_merchant_product_id,
winner_merchant_product_local_currency,
winner_merchant_product_local_price,
winner_merchant_product_price,
winner_pricing_model,
winner_roi_goal
FROM
auction_result
)
-- The average queue time ends up being negative due huge query times on some queries
SELECT
service_class AS svc_class,
COUNT(*) AS num_queries,
AVG(DATEDIFF(microseconds, queue_start_time, queue_end_time)) AS avg_queue_time,
AVG(DATEDIFF(microseconds, exec_start_time, exec_end_time)) AS avg_exec_time
FROM
stl_wlm_query
WHERE
service_class > 4
GROUP BY
service_class
ORDER BY
service_class;

Clicks

V1 Query

SELECT
    pub_id,
    COUNT(DISTINCT click_id)::NUMERIC(10, 0) AS clicks
FROM
    attributed_clicks
WHERE
    click_date BETWEEN '2016-11-01' AND '2016-11-08'
GROUP BY pub_id;

-- QUERY PLAN
-- XN HashAggregate  (cost=6438.41..6439.91 rows=200 width=12)
--   ->  XN Subquery Scan volt_dt_0  (cost=3679.09..5518.64 rows=183955 width=12)
--         ->  XN HashAggregate  (cost=3679.09..3679.09 rows=183955 width=12)
--               ->  XN Seq Scan on attributed_clicks  (cost=0.00..2759.31 rows=183955 width=12)
--                     Filter: ((click_date >= '2016-11-01 00:00:00'::timestamp without time zone) AND
--                              (click_date <= '2016-11-08 00:00:00'::timestamp without time zone))

V2 Query

SELECT
    owner_pub_id AS pub_id,
    COUNT(DISTINCT interaction_id) AS clicks
FROM convertible_impression
WHERE
    interaction_type = 'bam_link_click' AND
    interaction_datetime BETWEEN '2016-11-01' AND '2016-11-08' AND
    conversion_datetime IS NULL
GROUP BY pub_id;

-- QUERY PLAN
-- XN HashAggregate  (cost=17439003.04..17439003.09 rows=20 width=12)
--   ->  XN Subquery Scan volt_dt_0  (cost=17439002.74..17439002.94 rows=20 width=12)
--         ->  XN HashAggregate  (cost=17439002.74..17439002.74 rows=20 width=12)
--               ->  XN Seq Scan on convertible_impression  (cost=0.00..17438566.88 rows=87172 width=12)
--                     Filter: (((interaction_type)::text = 'bam_link_click'::text) AND
--                              (interaction_datetime <= '2016-11-08 00:00:00'::timestamp without time zone) AND
--                              (interaction_datetime >= '2016-11-01 00:00:00'::timestamp without time zone) AND
--                              (conversion_datetime IS NULL))

Before Vacuum

Run # Date Start Date End V1 Time (ms) V2 Time (ms)
1 2016-11-01 2016-11-08 51413 30565
2 666 6882
3 5808 3701

After Vacuum

Run # Date Start Date End V1 Time (ms) V2 Time (ms)
1 2016-11-01 2016-11-08 702 6609
2 450 15689
3 413 7821
Run # Date Start Date End V1 Time (ms) V2 Time (ms)
1 2016-10-15 NONE 13973 7180
2 875 22158
3 1632 9261

Optimized Column Compression Comparison (Full table count grouped by pub_id)

Using the same queries as above without the filtering on datetime

Run # attributed_clicks Current (ms) attributed_clicks Optimized (ms) convertible_impression Current (ms) convertible_impression Optimized (ms)
1 1810 1841 4341 3382
2 248 524 1465 1969
3 233 854 1429 2215
4 522 746 1579 2288
5 315 585 1660 1898
6 339 725 1683 2125
7 248 668 1391 1979
8 352 821 1624 2231
9 232 476 1661 2014
10 277 635 1442 2284
CREATE TABLE convertible_impression_tmp (
auction_id bigint encode lzo,
bam_box_arrow_direction character varying(10) encode lzo,
bam_box_format_id integer encode lzo,
bam_box_id integer encode lzo,
bam_box_page_num smallint encode lzo,
bam_box_product_position smallint encode lzo,
bam_link_edit_id integer encode lzo,
bam_link_id integer encode lzo,
bam_link_pub_id integer encode lzo,
bamx_user_device_id smallint encode lzo,
checkout_calculated_local_value integer encode lzo,
checkout_id character varying(50) encode lzo,
checkout_local_currency character varying(3) encode lzo,
checkout_local_value integer encode lzo,
checkout_product_count integer encode lzo,
conversion_datetime timestamp without time zone encode lzo,
conversion_id bigint encode lzo,
conversion_type character varying(50) encode lzo,
conversion_value integer encode lzo,
event_context_id bigint encode lzo,
impression_datetime timestamp without time zone encode lzo,
impression_id bigint encode lzo,
impression_type character varying(50) encode lzo,
interaction_datetime timestamp without time zone encode lzo,
interaction_id bigint encode lzo,
interaction_type character varying(50) encode lzo,
is_click_auction boolean encode runlength,
is_using_campaign_trackers boolean encode runlength,
merchant_campaign_goal_cpm integer encode lzo,
merchant_campaign_goal_id integer encode lzo,
merchant_campaign_goal_impression_target integer encode lzo,
merchant_campaign_id integer encode lzo,
merchant_product_bamx_product_category_id smallint encode lzo,
merchant_product_id integer encode lzo,
merchant_product_local_currency character varying(3) encode lzo,
merchant_product_local_price integer encode lzo,
merchant_product_merch_id integer encode lzo,
merchant_product_price integer encode lzo,
owner_merch_id integer encode delta,
owner_pub_id integer encode lzo,
publisher_campaign_goal_cpm integer encode lzo,
publisher_campaign_goal_datetime_flight_end timestamp without time zone encode lzo,
publisher_campaign_goal_datetime_flight_start timestamp without time zone encode lzo,
publisher_campaign_goal_id integer encode lzo,
publisher_campaign_goal_impression_target integer encode lzo,
publisher_campaign_id integer encode lzo,
redirect_target_url character varying(2048) encode lzo,
seconds_since_impression bigint encode lzo,
seconds_since_interaction bigint encode lzo,
target_email_sha256 character varying(64) encode lzo,
widget_id bigint encode lzo,
widget_version smallint encode lzo,
winner_bid_cost smallint encode lzo,
winner_pricing_model smallint encode lzo
)
DISTSTYLE key
DISTKEY(event_context_id)
interleaved sortkey(impression_datetime, interaction_datetime, conversion_datetime);
-- 340 seconds
INSERT INTO convertible_impression_tmp (
auction_id,
bam_box_arrow_direction,
bam_box_format_id,
bam_box_id,
bam_box_page_num,
bam_box_product_position,
bam_link_edit_id,
bam_link_id,
bam_link_pub_id,
bamx_user_device_id,
checkout_calculated_local_value,
checkout_id,
checkout_local_currency,
checkout_local_value,
checkout_product_count,
conversion_datetime,
conversion_id,
conversion_type,
conversion_value,
event_context_id,
impression_datetime,
impression_id,
impression_type,
interaction_datetime,
interaction_id,
interaction_type,
is_click_auction,
is_using_campaign_trackers,
merchant_campaign_goal_cpm,
merchant_campaign_goal_id,
merchant_campaign_goal_impression_target,
merchant_campaign_id,
merchant_product_bamx_product_category_id,
merchant_product_id,
merchant_product_local_currency,
merchant_product_local_price,
merchant_product_merch_id,
merchant_product_price,
owner_merch_id,
owner_pub_id,
publisher_campaign_goal_cpm,
publisher_campaign_goal_datetime_flight_end,
publisher_campaign_goal_datetime_flight_start,
publisher_campaign_goal_id,
publisher_campaign_goal_impression_target,
publisher_campaign_id,
redirect_target_url,
seconds_since_impression,
seconds_since_interaction,
target_email_sha256,
widget_id,
widget_version,
winner_bid_cost,
winner_pricing_model
) (
SELECT
auction_id,
bam_box_arrow_direction,
bam_box_format_id,
bam_box_id,
bam_box_page_num,
bam_box_product_position,
bam_link_edit_id,
bam_link_id,
bam_link_pub_id,
bamx_user_device_id,
checkout_calculated_local_value,
checkout_id,
checkout_local_currency,
checkout_local_value,
checkout_product_count,
conversion_datetime,
conversion_id,
conversion_type,
conversion_value,
event_context_id,
impression_datetime,
impression_id,
impression_type,
interaction_datetime,
interaction_id,
interaction_type,
is_click_auction,
is_using_campaign_trackers,
merchant_campaign_goal_cpm,
merchant_campaign_goal_id,
merchant_campaign_goal_impression_target,
merchant_campaign_id,
merchant_product_bamx_product_category_id,
merchant_product_id,
merchant_product_local_currency,
merchant_product_local_price,
merchant_product_merch_id,
merchant_product_price,
owner_merch_id,
owner_pub_id,
publisher_campaign_goal_cpm,
publisher_campaign_goal_datetime_flight_end,
publisher_campaign_goal_datetime_flight_start,
publisher_campaign_goal_id,
publisher_campaign_goal_impression_target,
publisher_campaign_id,
redirect_target_url,
seconds_since_impression,
seconds_since_interaction,
target_email_sha256,
widget_id,
widget_version,
winner_bid_cost,
winner_pricing_model
FROM convertible_impression
)

Impressions

V1 Query

SELECT
    publisher_id AS pub_id,
    COUNT(*) AS impressions
FROM
    auction_result
WHERE
    created BETWEEN '2016-11-01' AND '2016-11-08'
GROUP BY pub_id;

-- QUERY PLAN
-- XN HashAggregate  (cost=134208.33..134208.34 rows=1 width=4)
--   -> XN Seq Scan on auction_result  (cost=0.00..100656.25 rows=6710417 width=4)
--        Filter: ((created >= '2016-11-01 00:00:00'::timestamp without time zone) AND (created <= '2016-11-08 00:00:00'::timestamp without time zone))

V2 Query

SELECT
    owner_pub_id AS pub_id,
    COUNT(*) AS impressions
FROM
    convertible_impression
WHERE
    impression_type = 'bam_link_impression' AND
    impression_datetime BETWEEN '2016-11-01' AND '2016-11-08' AND
    interaction_datetime IS NULL AND
    conversion_datetime IS NULL
GROUP BY pub_id;

-- QUERY PLAN
-- XN HashAggregate  (cost=17473948.20..17473948.20 rows=1 width=4)
--   ->  XN Seq Scan on convertible_impression  (cost=0.00..17438566.88 rows=7076263 width=4)"
--         Filter: ((impression_datetime <= '2016-11-08 00:00:00'::timestamp without time zone) AND
--                  (impression_datetime >= '2016-11-01 00:00:00'::timestamp without time zone) AND
--                  ((impression_type)::text = 'bam_link_impression'::text) AND
--                  (conversion_datetime IS NULL) AND (interaction_datetime IS NULL))

Before Vacuum

Run # Date Start Date End V1 Time (ms) V2 Time (ms)
1 2016-11-01 2016-11-08 10634 17354
2 4021 9995
3 801 6853

After Vacuum

Run # Date Start Date End V1 Time (ms) V2 Time (ms)
1 2016-11-01 2016-11-08 3600 16416
2 12087 14302
3 4012 9152
Run # Date Start Date End V1 Time (ms) V2 Time (ms)
1 2016-10-15 NONE 3954 9456
2 728 13371
3 981 5871

Optimized Column Compression Comparison (Full table count grouped by pub_id)

Using the same queries as above without the filtering on datetime

Run # auction_result Current (ms) auction_result Optimized (ms) convertible_impression Current (ms) convertible_impression Optimized (ms)
1 2473 954 8019 5205
2 2129 921 7833 3984
3 1022 857 8997 3754
4 1255 986 8283 3723
5 943 1138 8703 3947
6 998 899 6728 3844
7 1007 955 7332 3799
8 1465 841 8522 3735
9 1243 829 7942 3741
10 949 1013 7938 3979

Merchant Revenue

V1 Query

SELECT
    merch_id,
    (SUM(COALESCE(order_value, 0)) / 100.0)::NUMERIC(10, 2) AS merchants_revenue
FROM
    attributed_clicks
WHERE
    order_date BETWEEN '2016-11-01' AND '2016-11-08'
GROUP BY merch_id;

-- QUERY PLAN
-- XN HashAggregate  (cost=117571.84..117571.85 rows=1 width=8)
--   ->  XN Seq Scan on attributed_clicks  (cost=0.00..117564.55 rows=1457 width=8)
--         Filter: (order_date > '2016-11-01 00:00:00'::timestamp without time zone) AND
--                 (order_date < '2016-11-08 00:00:00'::timestamp without time zone)

V2 Query

SELECT
    merch_id,
    (SUM(COALESCE(conversion_value, 0)) / 100.0)::NUMERIC(10, 2) AS merchants_revenue
FROM
(
    SELECT
        merchant_product_merch_id AS merch_id,
        conversion_id,
        COALESCE(MAX(conversion_value), 0) AS conversion_value
    FROM
        convertible_impression
    WHERE
        conversion_datetime BETWEEN '2016-11-01' AND '2016-11-08' AND
        interaction_type = 'bam_link_click' AND
        seconds_since_interaction <= 1209600 -- 14 day post click
    GROUP BY merch_id, conversion_id
)
GROUP BY merch_id

-- QUERY PLAN
-- XN HashAggregate  (cost=17449856.17..17449856.43 rows=26 width=8)
--   ->  XN Subquery Scan derived_table1  (cost=17449855.72..17449856.04 rows=26 width=8)
--         ->  XN HashAggregate  (cost=17449855.72..17449855.78 rows=26 width=16)
--               ->  XN Seq Scan on convertible_impression  (cost=0.00..17449296.48 rows=74565 width=16)
--                     Filter: (((interaction_type)::text = 'bam_link_click'::text) AND
--                              (seconds_since_interaction <= 1209600) AND
--                              (conversion_datetime > '2016-11-01 00:00:00'::timestamp without time zone)) AND
--                              (conversion_datetime < '2016-11-08 00:00:00'::timestamp without time zone))

Before Vacuum

Run # Date Start Date End V1 Time (ms) V2 Time (ms)
1 2016-11-01 2016-11-08 36406 17354
2 4021 9995
3 5932 8122

After Vacuum

Run # Date Start Date End V1 Time (ms) V2 Time (ms)
1 2016-11-01 2016-11-08 209 4041
2 480 6846
3 172 3912
Run # Date Start Date End V1 Time (ms) V2 Time (ms)
1 2016-10-15 NONE 28604 6781
2 187 7238
3 341 3476

Optimized Column Compression Comparison (Full table count grouped by merch_id)

Using the same queries as above without the filtering on datetime

Run # attributed_clicks Current (ms) attributed_clicks Optimized (ms) convertible_impression Current (ms) convertible_impression Optimized (ms)
1 1328 1263 4229 3309
2 170 170 1513 1861
3 173 181 1526 1969
4 177 179 1396 2068
5 200 176 1496 2196
6 177 174 1456 1947
7 178 169 1534 1992
8 198 430 1479 1897
9 171 207 1484 1945
10 173 178 1370 2019

Notes

All of the queries comparing compression where run on a snapshot of our Redshift. This snapshot only had a single query running on it at any time so queue time was always 0.

Compression

The optimal encoding types for each column were determined by running ANALYZE COMPRESSION table_name for each table.

Table Size (MB) # of Rows
auction_result 137665 380772558
auction_result_opt 41108 380770391
attributed_clicks 1280 9416375
attributed_clicks_opt 1280 9416375
convertible_impression 106698 1007428543
convertible_impression_opt 45243 1006726296

These are the 3 tables that the application currently queries from for stats. attributed_clicks size could not be improved by changing the encoding on the columns, but the other two tables were reduced in size by at least half. However, there isn't a great improvement in performance looking at the query times for the uncompressed vs compressed. It seems like using the optimal encodings for each column only helps with disk space usage (maybe this will help with the problems with our leader node?).

Currently, our cluster only has one service class that it uses to queue up queries. The default number of concurrent queries for a service class is 5. All other queries that come in will be queued. In queue_times.md, I listed the last couple of hundred query queue and execution times. There are a few queries that take >10 seconds, but for the most part the execution time is <3 seconds. Large query times are unavoidable with our current system. Customer events have to be imported into each table and while we could probably find a sweet spot for optimizing those imports, it probably wouldn't be worth the engineering cost at the moment.

The real problem with the data in queue_times.md is the queue times themselves. While there are plenty of 0s in the queue_time column, there is a streak of queue times that gradually increases and then decreases. Here's a graph of the queue times over the course of an hour on December 7, 2016:

While 25 seconds might be okay for an asynchronous job running on a celery worker, it is pretty ridiculous for a synchronous request to take that amount of time (or really anything more than a second). The tests on the restored snapshot show that customer queries never take more than a few seconds. According to our New Relic SPA monitoring, we never see more than 10 requests per minute on any of the endpoints that query Redshift. If those queries had their own service class with 5 concurrent queries, then their queue times should drop to 0.

Query time will most likely still be higher than it is on a standalone cluster running 1 query at a time because each service class is allocated a percentage of memory on each Redshift node. Each query on the standalone cluster always had 100% of the memory available.

SELECT
    service_class AS svc_class,
    queue_start_time,
    DATEDOFF(MILLISECONDS, queue_start_time, queue_end_time) AS queue_time,
    DATEDIFF(MILLISECONDS, exec_start_time, exec_end_time ) AS exec_time
FROM
    stl_wlm_query
WHERE
    service_class > 4
ORDER BY
    queue_start_time DESC;
svc_class queue_start_time (UTC) queue_time (ms) exec_time (ms)
6 2016-12-07 20:24:40.332781 0 656
6 2016-12-07 20:24:39.148961 0 6
6 2016-12-07 20:24:39.006717 0 346
6 2016-12-07 20:24:38.380482 104 1585
6 2016-12-07 20:24:38.309809 24 420
6 2016-12-07 20:24:38.289337 0 508
6 2016-12-07 20:24:37.623203 527 9
6 2016-12-07 20:24:37.363556 778 9
6 2016-12-07 20:24:36.713124 1422 6
6 2016-12-07 20:24:36.162238 1203 1119
6 2016-12-07 20:24:34.799063 2561 5
6 2016-12-07 20:24:34.231588 3121 8
6 2016-12-07 20:24:33.430055 3495 1210
6 2016-12-07 20:24:33.39447 3080 878
6 2016-12-07 20:24:32.384788 2438 2103
6 2016-12-07 20:24:31.938513 2852 31
6 2016-12-07 20:24:31.926978 2855 9
6 2016-12-07 20:24:31.449137 2778 2247
6 2016-12-07 20:24:31.43687 1799 992
6 2016-12-07 20:24:30.825615 2407 1549
6 2016-12-07 20:24:29.452803 2810 973
6 2016-12-07 20:24:25.562004 6372 1297
6 2016-12-07 20:24:25.104474 6814 16
6 2016-12-07 20:24:24.980428 6423 515
6 2016-12-07 20:24:22.992135 7791 1479
6 2016-12-07 20:24:22.200645 7747 1455
6 2016-12-07 20:24:20.1486 8391 1408
6 2016-12-07 20:24:18.806054 8302 1431
6 2016-12-07 20:24:17.723534 7371 2014
6 2016-12-07 20:24:16.870912 7644 3712
6 2016-12-07 20:24:16.812206 6922 779
6 2016-12-07 20:24:16.405749 6425 904
6 2016-12-07 20:24:16.057321 6089 2948
6 2016-12-07 20:24:14.668644 5797 1681
6 2016-12-07 20:24:14.55645 5568 2706
6 2016-12-07 20:24:14.505192 4732 1228
6 2016-12-07 20:24:14.318099 4474 1332
6 2016-12-07 20:24:14.123825 3589 1080
6 2016-12-07 20:24:13.979804 2712 1021
6 2016-12-07 20:24:13.815032 2577 14391
6 2016-12-07 20:24:13.600667 1852 940
6 2016-12-07 20:24:13.567844 1820 1304
6 2016-12-07 20:24:13.54951 1514 389
6 2016-12-07 20:24:13.104498 1547 736
6 2016-12-07 20:24:12.964259 1571 24257
6 2016-12-07 20:24:12.851051 1637 23845
6 2016-12-07 20:24:12.056138 1860 1145
6 2016-12-07 20:24:12.005064 0 2646
6 2016-12-07 20:24:11.91769 0 2571
6 2016-12-07 20:24:11.58621 0 2949
6 2016-12-07 20:24:09.549595 0 4367
6 2016-12-07 20:24:08.152901 0 5
6 2016-12-07 20:24:07.743142 0 1400

Merchant Spend

V1 Query

SELECT
    merch_id,
    (SUM(merchant_spent)/ 100.0)::NUMERIC(10, 2) AS spend
FROM
    attributed_clicks
WHERE
    click_date BETWEEN '2016-11-01' AND '2016-11-08'
GROUP BY merch_id;

-- QUERY PLAN
-- XN HashAggregate  (cost=3624.48..3624.50 rows=2 width=6)
--   ->  XN Seq Scan on attributed_clicks  (cost=0.00..2718.36 rows=181224 width=6)
--         Filter: ((click_date >= '2016-11-01 00:00:00'::timestamp without time zone) AND
--                  (click_date <= '2016-11-08 00:00:00'::timestamp without time zone))

V2 Query

SELECT
    merch_id,
    (SUM(COALESCE(spend, 0)) / 100.0)::NUMERIC(10, 2) AS spend
FROM
(
    SELECT
        merchant_product_merch_id AS merch_id,
        interaction_id,
        COALESCE(MAX(winner_bid_cost), 0) AS spend
    FROM
        convertible_impression
    WHERE
        interaction_datetime BETWEEN '2016-11-01' AND '2016-11-08' AND
        conversion_datetime IS NULL AND
        interaction_type = 'bam_link_click'
    GROUP BY merch_id, interaction_id
)
GROUP BY merch_id

-- QUERY PLAN
-- XN HashAggregate  (cost=17449951.33..17449951.66 rows=33 width=8)
--   ->  XN Subquery Scan derived_table1  (cost=17449950.67..17449951.16 rows=33 width=8)
--         ->  XN HashAggregate  (cost=17449950.67..17449950.83 rows=33 width=14)
--               ->  XN Seq Scan on convertible_impression  (cost=0.00..17449296.48 rows=87225 width=14)
--                     Filter: (((interaction_type)::text = 'bam_link_click'::text) AND
--                              (interaction_datetime <= '2016-11-08 00:00:00'::timestamp without time zone) AND
--                              (interaction_datetime >= '2016-11-01 00:00:00'::timestamp without time zone) AND
--                              (conversion_datetime IS NULL))

Before Vacuum

Run # Date Start Date End V1 Time (ms) V2 Time (ms)
1 2016-11-01 2016-11-08 12095 73495
2 795 25502
3 7522 20152

After Vacuum

Run # Date Start Date End V1 Time (ms) V2 Time (ms)
1 2016-11-01 2016-11-08 5236 2980
2 5425 4015
3 289 3888
Run # Date Start Date End V1 Time (ms) V2 Time (ms)
1 2016-10-15 NONE 27971 13242
2 11891 4528
3 781 7385

Optimized Column Compression Comparison (Full table count grouped by merch_id)

Using the same queries as above without the filtering on datetime

Run # attributed_clicks Current (ms) attributed_clicks Optimized (ms) convertible_impression Current (ms) convertible_impression Optimized (ms)
1 3075 1276 5210 3620
2 170 172 1538 2225
3 184 190 1639 2500
4 173 170 1403 2376
5 177 169 1617 2337
6 186 186 1660 2250
7 178 181 1590 2353
8 185 172 1508 2405
9 181 187 1442 2327
10 184 185 1408 2493
SELECT
trim(pgdb.datname) as Database,
trim(pgn.nspname) as Schema,
trim(a.name) as Table,
b.mbytes,
a.rows
FROM (
SELECT db_id, id, name, SUM(rows) as rows
FROM stv_tbl_perm a
GROUP BY db_id, id, name
) AS a
JOIN pg_class AS pgc ON pgc.oid = a.id
JOIN pg_namespace AS pgn ON pgn.oid = pgc.relnamespace
JOIN pg_database AS pgdb ON pgdb.oid = a.db_id
JOIN (
SELECT tbl, COUNT(*) AS mbytes
FROM stv_blocklist
GROUP BY tbl
) b ON a.id = b.tbl
ORDER BY mbytes desc, a.db_id, a.name;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment