diff --git a/models/descriptions/columns/balance_changed_on_date.md b/models/descriptions/columns/balance_changed_on_date.md new file mode 100644 index 00000000..3d6923b1 --- /dev/null +++ b/models/descriptions/columns/balance_changed_on_date.md @@ -0,0 +1,10 @@ +{% docs balance_changed_on_date %} + +Boolean flag indicating whether the balance actually changed on this specific date. TRUE means there was a balance-changing transaction on this date, FALSE means the balance was forward-filled from a previous date to maintain a complete daily time series. + +**Data type:** BOOLEAN +**Business context:** Distinguishes between days with actual balance activity versus days where balances are carried forward. Critical for understanding account activity patterns and data completeness. +**Analytics use cases:** Account activity tracking, identifying active trading days, filtering for actual balance changes, and understanding transaction frequency patterns. +**Example:** TRUE (balance changed on this date), FALSE (balance carried forward from previous day) + +{% enddocs %} diff --git a/models/descriptions/columns/balance_date.md b/models/descriptions/columns/balance_date.md new file mode 100644 index 00000000..062b7b22 --- /dev/null +++ b/models/descriptions/columns/balance_date.md @@ -0,0 +1,10 @@ +{% docs balance_date %} + +The date for which the balance snapshot is recorded. This field represents the calendar date (in UTC) for the daily balance record, providing the time dimension for balance analysis. + +**Data type:** DATE +**Business context:** Essential for time-series analysis of token balances, portfolio tracking, and historical balance lookups. +**Analytics use cases:** Daily balance trending, portfolio performance analysis, holder distribution over time, and balance change pattern analysis. +**Example:** 2024-01-15, 2024-01-16 + +{% enddocs %} diff --git a/models/descriptions/columns/last_balance_change.md b/models/descriptions/columns/last_balance_change.md new file mode 100644 index 00000000..45396f4e --- /dev/null +++ b/models/descriptions/columns/last_balance_change.md @@ -0,0 +1,10 @@ +{% docs last_balance_change %} + +The date when this account-mint combination last had an actual balance change. This field tracks the most recent date when the balance was modified, which may be earlier than the balance_date due to forward-filling of balances on days with no activity. + +**Data type:** DATE +**Business context:** Used to identify active vs. inactive accounts, understand balance change frequency, and distinguish between actual balance changes and forward-filled values. +**Analytics use cases:** Account activity analysis, dormant account identification, balance change frequency tracking, and data freshness assessment. +**Example:** 2024-01-10 (when balance_date is 2024-01-15, indicating no changes for 5 days) + +{% enddocs %} diff --git a/models/descriptions/tables/fact_daily_balances.md b/models/descriptions/tables/fact_daily_balances.md new file mode 100644 index 00000000..a6f6a2d6 --- /dev/null +++ b/models/descriptions/tables/fact_daily_balances.md @@ -0,0 +1,29 @@ +{% docs fact_token_daily_balances %} + +## Description +This table provides daily snapshots of token balances for each account and mint combination on the Solana blockchain. It creates a complete time series by forward-filling the most recent balance when there's no activity on a given day, ensuring every account-mint combination has a balance record for each day since their first balance change. When multiple balance updates occur within a day, only the last balance is retained, providing a consistent end-of-day view. + +## Key Use Cases +- Daily balance tracking and portfolio analysis over time +- Time-series analysis of token holdings and distribution patterns +- Historical balance lookups for any account-mint combination on any date +- Whale tracking and large holder analysis with daily granularity +- DeFi protocol analytics requiring daily balance snapshots +- Token distribution studies and holder concentration analysis + +## Important Relationships +- Sources data from `core.fact_token_balances` for balance change events +- Links to `core.fact_token_account_owners` through `post_owner` for ownership attribution +- Connects to `price.ez_asset_metadata` via `mint` for token metadata and pricing +- Joins with `core.ez_transfers` for transfer context and flow analysis + +## Commonly-used Fields +- `balance_date`: The date for the balance snapshot (primary time dimension) +- `account`: Token account address holding the balance +- `mint`: Token mint address identifying the specific token +- `amount`: The token balance amount (decimal adjusted) at end of day +- `owner`: The owner of the token account (for attribution) +- `last_balance_change`: The last date when this account's balance actually changed +- `balance_changed_on_date`: Boolean indicating if the balance changed on this specific date + +{% enddocs %} diff --git a/models/gold/core/core__fact_token_daily_balances.sql b/models/gold/core/core__fact_token_daily_balances.sql new file mode 100644 index 00000000..5efcb2df --- /dev/null +++ b/models/gold/core/core__fact_token_daily_balances.sql @@ -0,0 +1,249 @@ +{{ config( + materialized = 'incremental', + unique_key = ['token_daily_balances_id'], + incremental_predicates = ["dynamic_range_predicate", "balance_date"], + cluster_by = ['balance_date'], + merge_exclude_columns = ["inserted_timestamp"], + post_hook = enable_search_optimization('{{this.schema}}','{{this.identifier}}','ON EQUALITY(account, mint)'), + tags = ['daily'] +) }} + +WITH date_spine AS ( + SELECT + date_day AS balance_date + FROM + {{ source( + 'crosschain', + 'dim_dates' + ) }} + WHERE + date_day < SYSDATE() :: DATE + +{% if is_incremental() %} +AND date_day > ( + SELECT + MAX(balance_date) + FROM + {{ this }} +) +-- Limit to next 60 days for backfill batching +AND date_day <= ( + SELECT + LEAST( + MAX(balance_date) + 60, + CURRENT_DATE() + ) + FROM + {{ this }} +) +{% else %} + AND date_day >= '2021-01-30' + AND date_day <= '2021-04-01'-- First 2 months only +{% endif %} +), + +{% if is_incremental() %} +latest_balances_from_table AS ( + SELECT + account, + mint, + amount, + owner, + last_balance_change, + balance_date + FROM {{ this }} + WHERE balance_date = ( + SELECT MAX(balance_date) + FROM {{ this }} + ) +), +{% endif %} + +todays_balance_changes AS ( + -- Get balance changes for dates in the date spine + SELECT + block_timestamp::DATE AS balance_date, + account_address AS account, + mint, + balance AS amount, + owner, + block_timestamp, + ROW_NUMBER() OVER ( + PARTITION BY block_timestamp::DATE, account_address, mint + ORDER BY block_timestamp DESC, block_id DESC, tx_index DESC + ) AS daily_rank + FROM {{ ref('core__fact_token_balances') }} tb + WHERE EXISTS ( + SELECT 1 FROM date_spine ds + WHERE ds.balance_date = tb.block_timestamp::DATE + ) +), + +todays_final_balances AS ( + -- Get the last balance change per account-mint for today + SELECT + balance_date, + account, + mint, + amount, + owner, + block_timestamp AS last_balance_change_timestamp, + TRUE AS balance_changed_on_date + FROM todays_balance_changes + WHERE daily_rank = 1 +), + +account_mint_combinations AS ( + -- Get all unique account-mint combinations that have ever had a balance + SELECT DISTINCT + account, + mint + FROM todays_final_balances +), + +source_data AS ( + {% if is_incremental() %} + -- Check if processing multiple days (batch mode) + {% if execute %} + {% set max_date_query %} + SELECT MAX(balance_date) as max_date FROM {{ this }} + {% endset %} + {% set max_date = run_query(max_date_query).columns[0].values()[0] %} + {% set days_to_process = (modules.datetime.date.today() - max_date).days %} + {% set batch_size = days_to_process if days_to_process <= 60 else 60 %} + {% else %} + {% set batch_size = 1 %} + {% endif %} + + {% if batch_size > 1 %} + -- Multi-day batch: Use window functions for proper forward-filling + SELECT + d.balance_date, + COALESCE(c.account, y.account) AS account, + COALESCE(c.mint, y.mint) AS mint, + -- For amount, use the most recent change within batch, or carry forward from yesterday + COALESCE( + LAST_VALUE(t.amount IGNORE NULLS) OVER ( + PARTITION BY COALESCE(c.account, y.account), COALESCE(c.mint, y.mint) + ORDER BY d.balance_date + ROWS UNBOUNDED PRECEDING + ), + y.amount + ) AS amount, + -- For owner, use the most recent change within batch, or carry forward from yesterday + COALESCE( + LAST_VALUE(t.owner IGNORE NULLS) OVER ( + PARTITION BY COALESCE(c.account, y.account), COALESCE(c.mint, y.mint) + ORDER BY d.balance_date + ROWS UNBOUNDED PRECEDING + ), + y.owner + ) AS owner, + -- For last_balance_change, we need to track the most recent change date within the batch + CASE + WHEN MAX(CASE WHEN t.balance_date IS NOT NULL THEN d.balance_date END) OVER ( + PARTITION BY COALESCE(c.account, y.account), COALESCE(c.mint, y.mint) + ORDER BY d.balance_date + ROWS UNBOUNDED PRECEDING + ) IS NOT NULL THEN + MAX(CASE WHEN t.balance_date IS NOT NULL THEN d.balance_date END) OVER ( + PARTITION BY COALESCE(c.account, y.account), COALESCE(c.mint, y.mint) + ORDER BY d.balance_date + ROWS UNBOUNDED PRECEDING + )::TIMESTAMP + ELSE y.last_balance_change::TIMESTAMP + END AS last_balance_change_timestamp, + CASE WHEN t.balance_date IS NOT NULL THEN TRUE ELSE FALSE END AS balance_changed_on_date + FROM date_spine d + CROSS JOIN ( + -- All accounts that should exist (previous + new) + SELECT account, mint FROM latest_balances_from_table + UNION + SELECT account, mint FROM account_mint_combinations + ) c + LEFT JOIN todays_final_balances t + ON d.balance_date = t.balance_date + AND c.account = t.account + AND c.mint = t.mint + LEFT JOIN latest_balances_from_table y + ON c.account = y.account + AND c.mint = y.mint + + {% else %} + -- Single day: Use original efficient logic + SELECT + balance_date, + account, + mint, + amount, + owner, + last_balance_change_timestamp, + balance_changed_on_date + FROM todays_final_balances + + UNION ALL + + -- Carry forward yesterday's balances for accounts that didn't change today + SELECT + d.balance_date, + y.account, + y.mint, + y.amount, + y.owner, + y.last_balance_change::TIMESTAMP AS last_balance_change_timestamp, + FALSE AS balance_changed_on_date + FROM date_spine d + CROSS JOIN latest_balances_from_table y + LEFT JOIN todays_final_balances t + ON y.account = t.account + AND y.mint = t.mint + AND d.balance_date = t.balance_date + WHERE t.account IS NULL -- Only accounts with no changes today + {% endif %} + + {% else %} + -- Full refresh: Create complete time series with forward-filling + SELECT + d.balance_date, + c.account, + c.mint, + LAST_VALUE(t.amount IGNORE NULLS) OVER ( + PARTITION BY c.account, c.mint + ORDER BY d.balance_date + ROWS UNBOUNDED PRECEDING + ) AS amount, + LAST_VALUE(t.owner IGNORE NULLS) OVER ( + PARTITION BY c.account, c.mint + ORDER BY d.balance_date + ROWS UNBOUNDED PRECEDING + ) AS owner, + LAST_VALUE(t.last_balance_change_timestamp IGNORE NULLS) OVER ( + PARTITION BY c.account, c.mint + ORDER BY d.balance_date + ROWS UNBOUNDED PRECEDING + ) AS last_balance_change_timestamp, + CASE WHEN t.balance_date IS NOT NULL THEN TRUE ELSE FALSE END AS balance_changed_on_date + FROM date_spine d + CROSS JOIN account_mint_combinations c + LEFT JOIN todays_final_balances t + ON d.balance_date = t.balance_date + AND c.account = t.account + AND c.mint = t.mint + {% endif %} +) + +SELECT + balance_date, + account, + mint, + amount, + owner, + last_balance_change_timestamp::DATE AS last_balance_change, + balance_changed_on_date, + {{ dbt_utils.generate_surrogate_key(['balance_date', 'account', 'mint']) }} AS token_daily_balances_id, + SYSDATE() AS inserted_timestamp, + SYSDATE() AS modified_timestamp, + '{{ invocation_id }}' AS _invocation_id +FROM source_data +WHERE amount IS NOT NULL -- Only include accounts that have had at least one balance + AND amount > 0 -- Only include accounts with positive balances diff --git a/models/gold/core/core__fact_token_daily_balances.yml b/models/gold/core/core__fact_token_daily_balances.yml new file mode 100644 index 00000000..84d5fbf2 --- /dev/null +++ b/models/gold/core/core__fact_token_daily_balances.yml @@ -0,0 +1,60 @@ +version: 2 +models: + - name: core__fact_token_daily_balances + description: "{{ doc('fact_token_daily_balances') }}" + recent_date_filter: &recent_date_filter + config: + where: modified_timestamp >= current_date - 7 + columns: + - name: BALANCE_DATE + description: "{{ doc('balance_date') }}" + tests: + - dbt_expectations.expect_column_to_exist + - not_null: *recent_date_filter + - name: ACCOUNT + description: "{{ doc('balances_account') }}" + tests: + - dbt_expectations.expect_column_to_exist + - not_null: *recent_date_filter + - name: MINT + description: "{{ doc('mint') }}" + tests: + - dbt_expectations.expect_column_to_exist + - not_null: *recent_date_filter + - name: AMOUNT + description: "{{ doc('balances_post_amount') }}" + tests: + - dbt_expectations.expect_column_to_exist + - not_null: *recent_date_filter + - name: OWNER + description: "{{ doc('token_balances_block_owner') }}" + tests: + - dbt_expectations.expect_column_to_exist + - name: LAST_BALANCE_CHANGE + description: "{{ doc('last_balance_change') }}" + tests: + - dbt_expectations.expect_column_to_exist + - not_null: *recent_date_filter + - name: BALANCE_CHANGED_ON_DATE + description: "{{ doc('balance_changed_on_date') }}" + tests: + - dbt_expectations.expect_column_to_exist + - not_null: *recent_date_filter + - accepted_values: + values: [true, false] + - name: TOKEN_DAILY_BALANCES_ID + description: '{{ doc("pk") }}' + tests: + - dbt_expectations.expect_column_to_exist + - not_null: *recent_date_filter + - unique: *recent_date_filter + - name: INSERTED_TIMESTAMP + description: '{{ doc("inserted_timestamp") }}' + tests: + - dbt_expectations.expect_column_to_exist + - not_null: *recent_date_filter + - name: MODIFIED_TIMESTAMP + description: '{{ doc("modified_timestamp") }}' + tests: + - dbt_expectations.expect_column_to_exist + - not_null diff --git a/models/sources.yml b/models/sources.yml index 2421ef21..f47fea5e 100644 --- a/models/sources.yml +++ b/models/sources.yml @@ -7,6 +7,7 @@ sources: tables: - name: dim_labels - name: dim_date_hours + - name: dim_dates - name: crosschain_public database: "{{ 'crosschain' if target.database == 'SOLANA' else 'crosschain_dev' }}" schema: bronze_public