data2al icon data2al Snowflake, Databricks, and SQL Engineering

Concept note

Monitor Model Drift and Retraining Signals in Snowflake

A model monitoring lab that compares training and production feature distributions, flags drift, tracks prediction quality, and cleans up.

2026-04-29
Alan
AI/Data Science Lab
Intermediate
Snowflake Model-Monitoring Drift Data-Science Evaluation ML

Model lifecycle work does not stop after registration. Production models need monitoring for input drift, prediction changes, latency, and quality. This lab creates simple training and production distributions, calculates drift signals, and decides whether retraining should be reviewed.

Step 1: Create monitoring tables

USE ROLE ACCOUNTADMIN;
CREATE WAREHOUSE IF NOT EXISTS AI_DS_LAB_WH WAREHOUSE_SIZE = XSMALL AUTO_SUSPEND = 60 AUTO_RESUME = TRUE;
CREATE DATABASE IF NOT EXISTS AI_DS_LAB_DB;
CREATE SCHEMA IF NOT EXISTS AI_DS_LAB_DB.MODEL_MONITORING;

USE WAREHOUSE AI_DS_LAB_WH;
USE DATABASE AI_DS_LAB_DB;
USE SCHEMA MODEL_MONITORING;

CREATE OR REPLACE TABLE TRAINING_FEATURE_PROFILE (
  FEATURE_NAME STRING,
  BIN_NAME STRING,
  ROW_COUNT NUMBER
);

INSERT INTO TRAINING_FEATURE_PROFILE
SELECT * FROM VALUES
  ('LOGINS_30D', 'low', 25),
  ('LOGINS_30D', 'medium', 50),
  ('LOGINS_30D', 'high', 25),
  ('TICKETS_30D', 'none', 60),
  ('TICKETS_30D', 'one', 25),
  ('TICKETS_30D', 'many', 15)
AS v (FEATURE_NAME, BIN_NAME, ROW_COUNT);

CREATE OR REPLACE TABLE PRODUCTION_FEATURE_PROFILE (
  FEATURE_NAME STRING,
  BIN_NAME STRING,
  ROW_COUNT NUMBER
);

INSERT INTO PRODUCTION_FEATURE_PROFILE
SELECT * FROM VALUES
  ('LOGINS_30D', 'low', 45),
  ('LOGINS_30D', 'medium', 40),
  ('LOGINS_30D', 'high', 15),
  ('TICKETS_30D', 'none', 40),
  ('TICKETS_30D', 'one', 30),
  ('TICKETS_30D', 'many', 30)
AS v (FEATURE_NAME, BIN_NAME, ROW_COUNT);

Step 2: Calculate population stability index

CREATE OR REPLACE TABLE FEATURE_DRIFT_PSI AS
WITH train_pct AS (
  SELECT
    FEATURE_NAME,
    BIN_NAME,
    ROW_COUNT / SUM(ROW_COUNT) OVER (PARTITION BY FEATURE_NAME) AS TRAIN_PCT
  FROM TRAINING_FEATURE_PROFILE
),
prod_pct AS (
  SELECT
    FEATURE_NAME,
    BIN_NAME,
    ROW_COUNT / SUM(ROW_COUNT) OVER (PARTITION BY FEATURE_NAME) AS PROD_PCT
  FROM PRODUCTION_FEATURE_PROFILE
),
joined AS (
  SELECT
    t.FEATURE_NAME,
    t.BIN_NAME,
    GREATEST(t.TRAIN_PCT, 0.0001) AS TRAIN_PCT,
    GREATEST(p.PROD_PCT, 0.0001) AS PROD_PCT
  FROM train_pct t
  JOIN prod_pct p
    ON t.FEATURE_NAME = p.FEATURE_NAME
   AND t.BIN_NAME = p.BIN_NAME
)
SELECT
  FEATURE_NAME,
  SUM((PROD_PCT - TRAIN_PCT) * LN(PROD_PCT / TRAIN_PCT)) AS PSI
FROM joined
GROUP BY FEATURE_NAME;

SELECT
  FEATURE_NAME,
  PSI,
  CASE
    WHEN PSI >= 0.25 THEN 'HIGH_DRIFT'
    WHEN PSI >= 0.10 THEN 'MODERATE_DRIFT'
    ELSE 'STABLE'
  END AS DRIFT_STATUS
FROM FEATURE_DRIFT_PSI
ORDER BY PSI DESC;

Step 3: Track prediction quality

CREATE OR REPLACE TABLE MODEL_PREDICTION_LOG (
  SCORED_AT DATE,
  CUSTOMER_ID NUMBER,
  PREDICTED_CHURN NUMBER,
  ACTUAL_CHURN NUMBER,
  LATENCY_MS NUMBER
);

INSERT INTO MODEL_PREDICTION_LOG
SELECT * FROM VALUES
  ('2026-04-01'::DATE, 101, 1, 1, 180),
  ('2026-04-01'::DATE, 102, 0, 0, 165),
  ('2026-04-02'::DATE, 103, 0, 1, 210),
  ('2026-04-02'::DATE, 104, 0, 0, 190),
  ('2026-04-03'::DATE, 105, 1, 0, 240)
AS v (SCORED_AT, CUSTOMER_ID, PREDICTED_CHURN, ACTUAL_CHURN, LATENCY_MS);

CREATE OR REPLACE TABLE MODEL_QUALITY_DAILY AS
SELECT
  SCORED_AT,
  COUNT(*) AS SCORED_ROWS,
  AVG(IFF(PREDICTED_CHURN = ACTUAL_CHURN, 1, 0)) AS ACCURACY,
  AVG(LATENCY_MS) AS AVG_LATENCY_MS
FROM MODEL_PREDICTION_LOG
GROUP BY SCORED_AT;

SELECT * FROM MODEL_QUALITY_DAILY ORDER BY SCORED_AT;

Step 4: Create retraining signals

CREATE OR REPLACE TABLE RETRAINING_REVIEW_QUEUE AS
SELECT
  CURRENT_TIMESTAMP() AS CHECKED_AT,
  MAX(IFF(PSI >= 0.25, 1, 0)) AS HAS_HIGH_DRIFT,
  (SELECT MIN(ACCURACY) FROM MODEL_QUALITY_DAILY) AS WORST_DAILY_ACCURACY,
  (SELECT MAX(AVG_LATENCY_MS) FROM MODEL_QUALITY_DAILY) AS WORST_AVG_LATENCY_MS,
  CASE
    WHEN MAX(IFF(PSI >= 0.25, 1, 0)) = 1 THEN 'REVIEW_RETRAINING'
    WHEN (SELECT MIN(ACCURACY) FROM MODEL_QUALITY_DAILY) < 0.70 THEN 'REVIEW_RETRAINING'
    ELSE 'CONTINUE_MONITORING'
  END AS RECOMMENDATION
FROM FEATURE_DRIFT_PSI;

SELECT * FROM RETRAINING_REVIEW_QUEUE;

Step 5: Cleanup

DROP TABLE IF EXISTS RETRAINING_REVIEW_QUEUE;
DROP TABLE IF EXISTS MODEL_QUALITY_DAILY;
DROP TABLE IF EXISTS MODEL_PREDICTION_LOG;
DROP TABLE IF EXISTS FEATURE_DRIFT_PSI;
DROP TABLE IF EXISTS PRODUCTION_FEATURE_PROFILE;
DROP TABLE IF EXISTS TRAINING_FEATURE_PROFILE;

Similar Posts