diff --git a/.github/workflows/dbt_run_daily.yml b/.github/workflows/dbt_run_daily.yml index 6ff5078c..3e1a419b 100644 --- a/.github/workflows/dbt_run_daily.yml +++ b/.github/workflows/dbt_run_daily.yml @@ -3,8 +3,8 @@ run-name: dbt_run_scheduled_daily on: schedule: - # Runs 00:35 daily (see https://crontab.guru) - - cron: '35 0 * * *' + # Runs 01:35 daily (see https://crontab.guru) + - cron: '35 1 * * *' env: DBT_PROFILES_DIR: "${{ vars.DBT_PROFILES_DIR }}" @@ -46,6 +46,7 @@ jobs: dbt run -s models/silver/validator/silver__snapshot_validators_app_data.sql models/silver/validator/silver__snapshot_vote_accounts.sql models/silver/validator/silver__snapshot_vote_accounts_extended_stats.sql dbt run -s "solana_models,tag:nft_api" dbt run -s "solana_models,tag:daily" + dbt run -s "solana_models,tag:daily_balances" notify-failure: needs: [run_dbt_jobs] diff --git a/models/descriptions/columns/last_balance_change.md b/models/descriptions/columns/last_balance_change.md index 45396f4e..ff9939b8 100644 --- a/models/descriptions/columns/last_balance_change.md +++ b/models/descriptions/columns/last_balance_change.md @@ -1,10 +1,10 @@ {% docs last_balance_change %} -The date when this account-mint combination last had an actual balance change. This field tracks the most recent date when the balance was modified, which may be earlier than the balance_date due to forward-filling of balances on days with no activity. +The date when this account last had an actual balance change that resulted in a positive balance. This field tracks the most recent date when the balance was modified to a positive amount, which may be earlier than the balance_date due to forward-filling of balances on days with no activity. Note that accounts with zero balances are excluded from daily balance tables. **Data type:** DATE -**Business context:** Used to identify active vs. inactive accounts, understand balance change frequency, and distinguish between actual balance changes and forward-filled values. -**Analytics use cases:** Account activity analysis, dormant account identification, balance change frequency tracking, and data freshness assessment. +**Business context:** Used to identify active vs. inactive accounts, understand balance change frequency, and distinguish between actual balance changes and forward-filled values. Only accounts with positive balances are tracked. +**Analytics use cases:** Account activity analysis, dormant account identification, balance change frequency tracking, data freshness assessment, and identifying when accounts last held positive balances. **Example:** 2024-01-10 (when balance_date is 2024-01-15, indicating no changes for 5 days) {% enddocs %} diff --git a/models/descriptions/tables/fact_daily_balances.md b/models/descriptions/tables/fact_daily_balances.md index a6f6a2d6..e277b7a9 100644 --- a/models/descriptions/tables/fact_daily_balances.md +++ b/models/descriptions/tables/fact_daily_balances.md @@ -1,7 +1,9 @@ {% docs fact_token_daily_balances %} ## Description -This table provides daily snapshots of token balances for each account and mint combination on the Solana blockchain. It creates a complete time series by forward-filling the most recent balance when there's no activity on a given day, ensuring every account-mint combination has a balance record for each day since their first balance change. When multiple balance updates occur within a day, only the last balance is retained, providing a consistent end-of-day view. +This table provides daily snapshots of token balances for each account and mint combination on the Solana blockchain. It creates a complete time series by forward-filling the most recent balance when there's no activity on a given day, ensuring every account-mint combination has a balance record for each day since their first balance change. When multiple balance updates occur within a day, only the last balance is retained, providing a consistent end-of-day view. + +**Important:** If an account is left with a 0 balance at the end of the day, it is not included in the table. This means account-mint combinations will "disappear" from the daily snapshots when their balance reaches zero and "reappear" when they receive tokens again. ## Key Use Cases - Daily balance tracking and portfolio analysis over time @@ -23,7 +25,7 @@ This table provides daily snapshots of token balances for each account and mint - `mint`: Token mint address identifying the specific token - `amount`: The token balance amount (decimal adjusted) at end of day - `owner`: The owner of the token account (for attribution) -- `last_balance_change`: The last date when this account's balance actually changed +- `last_balance_change`: The last date when this account's balance actually changed (only tracks dates when account had a positive balance) - `balance_changed_on_date`: Boolean indicating if the balance changed on this specific date {% enddocs %} diff --git a/models/descriptions/tables/fact_sol_daily_balances.md b/models/descriptions/tables/fact_sol_daily_balances.md new file mode 100644 index 00000000..ffdd1398 --- /dev/null +++ b/models/descriptions/tables/fact_sol_daily_balances.md @@ -0,0 +1,84 @@ +{% docs fact_sol_daily_balances %} + +## Description +This table provides daily snapshots of native SOL balances for each account on the Solana blockchain. It creates a complete daily time series by forward-filling the most recent balance when there's no activity on a given day, ensuring every account has a balance record for each date since their first transaction. When multiple balance updates occur within a day, only the last balance is retained. The table tracks native SOL only (mint address: So11111111111111111111111111111111111111111). + +**Important:** If an account is left with a 0 balance at the end of the day, it is not included in the table. This means accounts will "disappear" from the daily snapshots when their balance reaches zero and "reappear" when they receive SOL again. + +## Key Use Cases +- **Daily balance analysis**: Track SOL holdings over time for accounts, wallets, and protocols +- **Portfolio tracking**: Monitor SOL balance changes and trends for specific addresses +- **Whale watching**: Identify large SOL holders and track their balance movements +- **Protocol analysis**: Analyze SOL reserves and treasury balances for DeFi protocols +- **Time series analytics**: Perform historical balance analysis and trend identification +- **Snapshot reporting**: Generate point-in-time balance reports for any historical date + +## Important Relationships +- Sources data from `core.fact_sol_balances` which contains all SOL balance changes +- Uses `crosschain.core.dim_dates` for generating complete daily time series +- Complements `core.fact_token_daily_balances` which handles SPL token balances +- Related to `core.ez_transfers` for understanding SOL movement patterns + +## Commonly-used Fields +- `balance_date`: Essential for time-based analysis and filtering to specific dates +- `account`: Core field for account-specific balance tracking and wallet analysis +- `amount`: The SOL balance amount (in decimal SOL, not lamports) for value calculations +- `last_balance_change`: Critical for understanding when balances were last updated (only tracks dates when account had a positive balance) +- `balance_changed_on_date`: Key for filtering to only dates with actual balance activity + +## Sample Queries + +### Get current SOL balance for a specific account +```sql +SELECT + account, + amount as sol_balance, + last_balance_change +FROM solana.core.fact_sol_daily_balances +WHERE account = 'EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v' + AND balance_date = CURRENT_DATE() - 1 +``` + +### Track SOL balance changes over time for an account +```sql +SELECT + balance_date, + amount as sol_balance, + balance_changed_on_date +FROM solana.core.fact_sol_daily_balances +WHERE account = 'EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v' + AND balance_date >= CURRENT_DATE() - 30 +ORDER BY balance_date DESC +``` + +### Find accounts with largest SOL balances on a specific date +```sql +SELECT + account, + amount as sol_balance +FROM solana.core.fact_sol_daily_balances +WHERE balance_date = '2024-01-01' + AND amount > 0 +ORDER BY amount DESC +LIMIT 100 +``` + +### Analyze SOL balance distribution +```sql +SELECT + CASE + WHEN amount >= 10000 THEN '10K+ SOL' + WHEN amount >= 1000 THEN '1K-10K SOL' + WHEN amount >= 100 THEN '100-1K SOL' + WHEN amount >= 10 THEN '10-100 SOL' + ELSE '<10 SOL' + END as balance_tier, + COUNT(*) as account_count, + SUM(amount) as total_sol +FROM solana.core.fact_sol_daily_balances +WHERE balance_date = CURRENT_DATE() - 1 +GROUP BY 1 +ORDER BY total_sol DESC +``` + +{% enddocs %} diff --git a/models/gold/core/core__fact_sol_balances.sql b/models/gold/core/core__fact_sol_balances.sql index f20c1c37..a43f8ea6 100644 --- a/models/gold/core/core__fact_sol_balances.sql +++ b/models/gold/core/core__fact_sol_balances.sql @@ -5,7 +5,7 @@ cluster_by = ['block_timestamp::DATE','modified_timestamp::DATE'], merge_exclude_columns = ["inserted_timestamp"], post_hook = enable_search_optimization('{{this.schema}}', '{{this.identifier}}', 'ON EQUALITY(tx_id, account_address)'), - tags = ['scheduled_non_core'] + tags = ['scheduled_non_core','daily_balances'] ) }} SELECT diff --git a/models/gold/core/core__fact_sol_daily_balances.sql b/models/gold/core/core__fact_sol_daily_balances.sql new file mode 100644 index 00000000..de628266 --- /dev/null +++ b/models/gold/core/core__fact_sol_daily_balances.sql @@ -0,0 +1,216 @@ +{{ config( + materialized = 'incremental', + unique_key = ['sol_daily_balances_id'], + incremental_predicates = ["dynamic_range_predicate", "balance_date"], + cluster_by = ['balance_date'], + merge_exclude_columns = ["inserted_timestamp"], + post_hook = enable_search_optimization('{{this.schema}}','{{this.identifier}}','ON EQUALITY(account)'), + tags = ['daily_balances'] +) }} + +WITH date_spine AS ( + SELECT + date_day AS balance_date + FROM + {{ source( + 'crosschain', + 'dim_dates' + ) }} + WHERE + date_day < SYSDATE() :: DATE + +{% if is_incremental() %} +AND date_day > ( + SELECT + MAX(balance_date) + FROM + {{ this }} +) +-- Limit to next 60 days for backfill batching +AND date_day <= ( + SELECT + LEAST( + MAX(balance_date) + 60, + CURRENT_DATE() + ) + FROM + {{ this }} +) +{% else %} + AND date_day >= '2020-03-16' + AND date_day <= '2021-01-01'-- range for initial FR +{% endif %} +), + +{% if is_incremental() %} +latest_balances_from_table AS ( + SELECT + account, + amount, + last_balance_change, + balance_date + FROM {{ this }} + WHERE balance_date = ( + SELECT MAX(balance_date) + FROM {{ this }} + ) +), +{% endif %} + +todays_balance_changes AS ( + -- Get balance changes for dates in the date spine + SELECT + block_timestamp::DATE AS balance_date, + account_address AS account, + balance AS amount, + block_timestamp, + ROW_NUMBER() OVER ( + PARTITION BY block_timestamp::DATE, account_address + ORDER BY block_timestamp DESC, block_id DESC, tx_index DESC + ) AS daily_rank + FROM {{ ref('core__fact_sol_balances') }} tb + WHERE EXISTS ( + SELECT 1 FROM date_spine ds + WHERE ds.balance_date = tb.block_timestamp::DATE + ) +), + +todays_final_balances AS ( + -- Get the last balance change per account for today + SELECT + balance_date, + account, + amount, + block_timestamp AS last_balance_change_timestamp, + TRUE AS balance_changed_on_date + FROM todays_balance_changes + WHERE daily_rank = 1 +), + +account_combinations AS ( + -- Get all unique accounts that have ever had a balance + SELECT DISTINCT + account + FROM todays_final_balances +), + +source_data AS ( + {% if is_incremental() %} + -- Check if processing multiple days (batch mode) + {% if execute %} + {% set max_date_query %} + SELECT MAX(balance_date) as max_date FROM {{ this }} + {% endset %} + {% set max_date = run_query(max_date_query).columns[0].values()[0] %} + {% set days_to_process = (modules.datetime.date.today() - max_date).days %} + {% set batch_size = days_to_process if days_to_process <= 60 else 60 %} + {% else %} + {% set batch_size = 1 %} + {% endif %} + + {% if batch_size > 1 %} + -- Multi-day batch: Use window functions for proper forward-filling + SELECT + d.balance_date, + COALESCE(c.account, y.account) AS account, + -- For amount, use the most recent change within batch, or carry forward from yesterday + COALESCE( + LAST_VALUE(t.amount IGNORE NULLS) OVER ( + PARTITION BY COALESCE(c.account, y.account) + ORDER BY d.balance_date + ROWS UNBOUNDED PRECEDING + ), + y.amount + ) AS amount, + -- For last_balance_change, we need to track the most recent change date within the batch + CASE + WHEN MAX(CASE WHEN t.balance_date IS NOT NULL THEN d.balance_date END) OVER ( + PARTITION BY COALESCE(c.account, y.account) + ORDER BY d.balance_date + ROWS UNBOUNDED PRECEDING + ) IS NOT NULL THEN + MAX(CASE WHEN t.balance_date IS NOT NULL THEN d.balance_date END) OVER ( + PARTITION BY COALESCE(c.account, y.account) + ORDER BY d.balance_date + ROWS UNBOUNDED PRECEDING + )::TIMESTAMP + ELSE y.last_balance_change::TIMESTAMP + END AS last_balance_change_timestamp, + CASE WHEN t.balance_date IS NOT NULL THEN TRUE ELSE FALSE END AS balance_changed_on_date + FROM date_spine d + CROSS JOIN ( + -- All accounts that should exist (previous + new) + SELECT account FROM latest_balances_from_table + UNION + SELECT account FROM account_combinations + ) c + LEFT JOIN todays_final_balances t + ON d.balance_date = t.balance_date + AND c.account = t.account + LEFT JOIN latest_balances_from_table y + ON c.account = y.account + + {% else %} + -- Single day: Use original efficient logic + SELECT + balance_date, + account, + amount, + last_balance_change_timestamp, + balance_changed_on_date + FROM todays_final_balances + + UNION ALL + + -- Carry forward yesterday's balances for accounts that didn't change today + SELECT + d.balance_date, + y.account, + y.amount, + y.last_balance_change::TIMESTAMP AS last_balance_change_timestamp, + FALSE AS balance_changed_on_date + FROM date_spine d + CROSS JOIN latest_balances_from_table y + LEFT JOIN todays_final_balances t + ON y.account = t.account + AND d.balance_date = t.balance_date + WHERE t.account IS NULL -- Only accounts with no changes today + {% endif %} + + {% else %} + -- Full refresh: Create complete time series with forward-filling + SELECT + d.balance_date, + c.account, + LAST_VALUE(t.amount IGNORE NULLS) OVER ( + PARTITION BY c.account + ORDER BY d.balance_date + ROWS UNBOUNDED PRECEDING + ) AS amount, + LAST_VALUE(t.last_balance_change_timestamp IGNORE NULLS) OVER ( + PARTITION BY c.account + ORDER BY d.balance_date + ROWS UNBOUNDED PRECEDING + ) AS last_balance_change_timestamp, + CASE WHEN t.balance_date IS NOT NULL THEN TRUE ELSE FALSE END AS balance_changed_on_date + FROM date_spine d + CROSS JOIN account_combinations c + LEFT JOIN todays_final_balances t + ON d.balance_date = t.balance_date + AND c.account = t.account + {% endif %} +) + +SELECT + balance_date, + account, + amount, + last_balance_change_timestamp::DATE AS last_balance_change, + balance_changed_on_date, + {{ dbt_utils.generate_surrogate_key(['balance_date', 'account']) }} AS sol_daily_balances_id, + SYSDATE() AS inserted_timestamp, + SYSDATE() AS modified_timestamp, + '{{ invocation_id }}' AS _invocation_id +FROM source_data +WHERE amount IS NOT NULL -- Only include accounts that have had at least one balance + AND amount > 0 -- Only include accounts with positive balances diff --git a/models/gold/core/core__fact_sol_daily_balances.yml b/models/gold/core/core__fact_sol_daily_balances.yml new file mode 100644 index 00000000..5123b74d --- /dev/null +++ b/models/gold/core/core__fact_sol_daily_balances.yml @@ -0,0 +1,49 @@ +version: 2 +models: + - name: core__fact_sol_daily_balances + description: "{{ doc('fact_sol_daily_balances') }}" + recent_date_filter: &recent_date_filter + config: + where: modified_timestamp >= current_date - 7 + columns: + - name: BALANCE_DATE + description: "{{ doc('balance_date') }}" + tests: + - dbt_expectations.expect_column_to_exist + - not_null: *recent_date_filter + - name: ACCOUNT + description: "{{ doc('balances_account') }}" + tests: + - dbt_expectations.expect_column_to_exist + - not_null: *recent_date_filter + - name: AMOUNT + description: "{{ doc('balances_post_amount') }}" + tests: + - dbt_expectations.expect_column_to_exist + - not_null: *recent_date_filter + - name: LAST_BALANCE_CHANGE + description: "{{ doc('last_balance_change') }}" + tests: + - dbt_expectations.expect_column_to_exist + - not_null: *recent_date_filter + - name: BALANCE_CHANGED_ON_DATE + description: "{{ doc('balance_changed_on_date') }}" + tests: + - dbt_expectations.expect_column_to_exist + - not_null: *recent_date_filter + - name: SOL_DAILY_BALANCES_ID + description: '{{ doc("pk") }}' + tests: + - dbt_expectations.expect_column_to_exist + - not_null: *recent_date_filter + - unique: *recent_date_filter + - name: INSERTED_TIMESTAMP + description: '{{ doc("inserted_timestamp") }}' + tests: + - dbt_expectations.expect_column_to_exist + - not_null: *recent_date_filter + - name: MODIFIED_TIMESTAMP + description: '{{ doc("modified_timestamp") }}' + tests: + - dbt_expectations.expect_column_to_exist + - not_null diff --git a/models/gold/core/core__fact_token_balances.sql b/models/gold/core/core__fact_token_balances.sql index c5de84df..78c2feb3 100644 --- a/models/gold/core/core__fact_token_balances.sql +++ b/models/gold/core/core__fact_token_balances.sql @@ -5,7 +5,7 @@ cluster_by = ['block_timestamp::DATE','modified_timestamp::DATE'], merge_exclude_columns = ["inserted_timestamp"], post_hook = enable_search_optimization('{{this.schema}}','{{this.identifier}}','ON EQUALITY(tx_id, account_address, fact_token_balances_id)'), - tags = ['scheduled_non_core'] + tags = ['scheduled_non_core','daily_balances'] ) }} SELECT diff --git a/models/gold/core/core__fact_token_daily_balances.sql b/models/gold/core/core__fact_token_daily_balances.sql index 5efcb2df..b1b92f47 100644 --- a/models/gold/core/core__fact_token_daily_balances.sql +++ b/models/gold/core/core__fact_token_daily_balances.sql @@ -5,7 +5,7 @@ cluster_by = ['balance_date'], merge_exclude_columns = ["inserted_timestamp"], post_hook = enable_search_optimization('{{this.schema}}','{{this.identifier}}','ON EQUALITY(account, mint)'), - tags = ['daily'] + tags = ['daily_balances'] ) }} WITH date_spine AS ( diff --git a/models/silver/accounts/silver__token_account_owners.sql b/models/silver/accounts/silver__token_account_owners.sql index bcdf5d13..6d17b8f6 100644 --- a/models/silver/accounts/silver__token_account_owners.sql +++ b/models/silver/accounts/silver__token_account_owners.sql @@ -5,7 +5,7 @@ unique_key = ["account_address"], cluster_by = ["round(start_block_id,-5)"], post_hook = enable_search_optimization('{{this.schema}}','{{this.identifier}}','ON EQUALITY(account_address, owner)'), - tags = ['scheduled_non_core'] + tags = ['scheduled_non_core','daily_balances'] ) }} diff --git a/models/silver/accounts/silver__token_account_ownership_events.sql b/models/silver/accounts/silver__token_account_ownership_events.sql index dbace893..787829ff 100644 --- a/models/silver/accounts/silver__token_account_ownership_events.sql +++ b/models/silver/accounts/silver__token_account_ownership_events.sql @@ -4,7 +4,7 @@ incremental_predicates = ["dynamic_range_predicate", "block_timestamp::date"], cluster_by = ['block_timestamp::DATE','_inserted_timestamp::DATE'], full_refresh = false, - tags = ['scheduled_non_core'], + tags = ['scheduled_non_core','daily_balances'], ) }} with base_events as ( diff --git a/models/silver/balances/silver__sol_balances.sql b/models/silver/balances/silver__sol_balances.sql index 0da53c6f..e8232805 100644 --- a/models/silver/balances/silver__sol_balances.sql +++ b/models/silver/balances/silver__sol_balances.sql @@ -5,7 +5,7 @@ cluster_by = ['block_timestamp::DATE','modified_timestamp::DATE'], full_refresh = false, merge_exclude_columns = ["inserted_timestamp"], - tags = ['scheduled_non_core'] + tags = ['scheduled_non_core','daily_balances'] ) }} WITH balances AS ( diff --git a/models/silver/balances/silver__token_balances.sql b/models/silver/balances/silver__token_balances.sql index 237d062e..4b8b4b15 100644 --- a/models/silver/balances/silver__token_balances.sql +++ b/models/silver/balances/silver__token_balances.sql @@ -5,7 +5,7 @@ cluster_by = ['block_timestamp::DATE','modified_timestamp::DATE'], full_refresh = false, merge_exclude_columns = ["inserted_timestamp"], - tags = ['scheduled_non_core'] + tags = ['scheduled_non_core','daily_balances'] ) }} WITH pre AS (