|
-- PAGE DETAILS |
|
-- https://app.pendo.io/s/5685572921851904/pages/eUVHijHvCYzPMjlwGzEL75JEgKs |
|
|
|
WITH |
|
reporting_query_parameters AS ( |
|
/** |
|
* These are convenience measures to tune the subsequent queries. |
|
* |
|
* These would normally live in a reporting interface or data |
|
* management tool as opposed to be hardcoded in a script. |
|
*/ |
|
SELECT |
|
'US/Eastern' AS subscription_tz |
|
, True AS should_apply_blacklist |
|
, '2025-10-01' :: DATE AS report_start |
|
, '2025-10-01' :: DATE AS report_end |
|
, 'Page/eUVHijHvCYzPMjlwGzEL75JEgKs' AS page_id |
|
), |
|
|
|
_utc_boundaries AS ( |
|
/** |
|
* Pendo data is stored in the UTC timezone, but the Pendo UI |
|
* can be configured to any supported timezone. We'll need to |
|
* perform the same conversion (in the reverse order) as the |
|
* UI does. |
|
* |
|
* NOTE: |
|
* Data Sync partitions the data on PERIOD, which is the |
|
* number of days (UTC) since EPOCH (1970/01/01 at 0:00). |
|
* Additionally, the BROWSERTIMESTAMP column is the time in |
|
* milliseconds (UTC). |
|
*/ |
|
SELECT |
|
FLOOR( |
|
EXTRACT(EPOCH FROM CONVERT_TIMEZONE(p.subscription_tz, 'UTC', p.report_start :: TIMESTAMP_NTZ)) |
|
/ 86400 |
|
) AS report_start |
|
, FLOOR( |
|
EXTRACT(EPOCH FROM CONVERT_TIMEZONE(p.subscription_tz, 'UTC', (p.report_end + 1) :: TIMESTAMP_NTZ)) |
|
/ 86400 |
|
) AS report_end |
|
FROM reporting_query_parameters AS p |
|
), |
|
|
|
_blacklist_filter AS ( |
|
/** |
|
* Pendo data exported via Data Sync matches the raw event data |
|
* from the Pendo UI. This means that the Exclude/Include list nor |
|
* any Segments are applied by default. |
|
* |
|
* In reality this would be a "configuration table" that is reloaded |
|
* regularly, instead of acting as a series of VALUES statements. |
|
* |
|
* Further reading: |
|
* https://engageapi.pendo.io/#cb7577d8-17af-4428-ac8f-dda7d7a3502a |
|
*/ |
|
SELECT |
|
* |
|
FROM |
|
VALUES |
|
('server', 'dev.pendo.io') |
|
, ('server', 'staging.pendo.io') |
|
-- , ('ip_address', '192.168.1.1') |
|
-- , ('visitor_id', 'test-visitor-123') |
|
-- , ('account_id', 'internal-account-456') |
|
AS t (filter_type, filter_value) |
|
), |
|
|
|
visitors_with_metadata AS ( |
|
/** |
|
* Visitors can be pre-joined with their metadata for more info. |
|
* |
|
* The METADATA table is effectively a key-value pair, so we'll |
|
* likely want to PIVOT it in order to not blow up the events |
|
* resultset when JOINing. |
|
*/ |
|
SELECT |
|
v.id |
|
, (MAX(v.firstvisitms) / 1000) :: TIMESTAMP AS first_seen_utc |
|
, (MAX(v.lastvisitms) / 1000) :: TIMESTAMP AS last_seen_utc |
|
/** |
|
* These fields will be different for all customers, as this is the |
|
* metadata that you pass directly to Pendo. These fields values |
|
* can be found by navigating to https://app.pendo.io/admin/mappings |
|
* |
|
* There is also a TYPE column which denotes the simple JSON type |
|
* of the underlying data sent to Pendo. |
|
*/ |
|
-- , MAX(CASE WHEN m.name = 'is_impersonated' THEN m.value END) :: BOOLEAN AS is_impersonated |
|
|
|
FROM SUB_5685572921851904_VISITOR.VISITORS AS v |
|
|
|
LEFT JOIN |
|
SUB_5685572921851904_VISITOR.VISITORMETADATA AS m |
|
ON m.visitorid = v.id AND NOT m.isdeleted |
|
|
|
GROUP BY |
|
v.id |
|
), |
|
|
|
filtered_raw_events AS ( |
|
/** |
|
* This "first pass" of the raw data will perform the implicit |
|
* filtering of the data in the Pendo UI. |
|
* |
|
* 1/ Select from RAW EVENTS for a single app. |
|
* 2/ Filter to the relevant partitions (PERIODs) |
|
* 3/ Filter to the "production friendly" data (apply blacklist). |
|
* |
|
* It's not strictly necessary to write your SQL in this way, |
|
* however it is written here for clarity of the types of |
|
* explicitly or implicit knowledge you need to have to go from |
|
* RAW EVENTS to something that matches the Pendo UI. |
|
*/ |
|
SELECT |
|
e.* |
|
|
|
FROM |
|
-- SUB: Digital Adoption (5685572921851904) |
|
-- APP: Salesforce (5720135393738752) |
|
SUB_5685572921851904_APP_5720135393738752.ALLEVENTS AS e |
|
|
|
CROSS JOIN reporting_query_parameters AS p |
|
CROSS JOIN _utc_boundaries AS _u |
|
CROSS JOIN _blacklist_filter AS _i |
|
|
|
WHERE |
|
-- Engage the PARTITION KEY for more efficient filtering |
|
e.periodid BETWEEN (_u.report_start - 1) AND (_u.report_end + 1) |
|
|
|
-- Apply the Exclude/Ignore list |
|
AND ( |
|
NOT p.should_apply_blacklist |
|
OR ( |
|
NOT (_i.filter_type = 'server' AND e.server = _i.filter_value) |
|
AND NOT (_i.filter_type = 'ip_address' AND e.remoteip = _i.filter_value) |
|
AND NOT (_i.filter_type = 'visitor_id' AND e.visitorid = _i.filter_value) |
|
AND NOT (_i.filter_type = 'account_id' AND e.accountid = _i.filter_value) |
|
) |
|
) |
|
), |
|
|
|
filtered_matched_events AS ( |
|
/** |
|
* This is the "second pass" of the raw data which shows how we |
|
* can join to other common entities in the Pendo data. |
|
* |
|
* We're applying our reporting filters here to limit our data |
|
* to only what the user has asked for, however we are not yet |
|
* performing any meaningful aggregation. |
|
*/ |
|
SELECT |
|
e.* |
|
, pe.matchableid AS page_id |
|
|
|
FROM |
|
filtered_raw_events AS e |
|
|
|
LEFT JOIN |
|
SUB_5685572921851904_APP_5720135393738752.MATCHEDPAGEEVENTS AS pe |
|
ON pe.periodid = e.periodid AND pe.eventid = e.eventid |
|
AND pe.deletedat IS NULL |
|
|
|
CROSS JOIN reporting_query_parameters AS p |
|
CROSS JOIN _utc_boundaries AS _u |
|
|
|
WHERE |
|
1 = 1 |
|
|
|
/** |
|
* NOTE: |
|
* For a more efficient query in this exact pattern, we'd want |
|
* to re-engage the PARTITION KEY on the other large history |
|
* tables (PAGE EVENTS). |
|
* |
|
* In reality, |
|
*/ |
|
-- AND pe.periodid BETWEEN (_u.report_start - 1) AND (_u.report_end + 1) |
|
|
|
-- Reporting Filters |
|
AND pe.matchableid = p.page_id |
|
AND e.browsertimestamp >= (_u.report_start * 86400000) |
|
AND e.browsertimestamp < (_u.report_end * 86400000) |
|
) |
|
|
|
|
|
/** |
|
* Here we are matching the simple stats on a Page Report. |
|
* (https://app.pendo.io/s/<subid>/pages/<pageid>) |
|
* |
|
* It's important to remember that this data is STILL IN UTC, |
|
* and typically we would reserve conversion to local to the |
|
* application/reporting layer. |
|
*/ |
|
SELECT |
|
COUNT(DISTINCT v.id) AS unique_visitors |
|
, COUNT(DISTINCT e.accountid) AS unique_accounts |
|
, COUNT(DISTINCT CASE WHEN e.eventtype = 'load' THEN e.eventid ELSE NULL END) AS page_views |
|
|
|
FROM |
|
filtered_matched_events AS e |
|
LEFT JOIN |
|
visitors_with_metadata AS v ON v.id = e.visitorid |