Table of contents
Open Table of contents
- PostgreSQL: Full SQL:2003 Compliance
- FIRST_VALUE/LAST_VALUE
- COUNT
- SUM
- AVG
- LAG & LEAD
- Window Function + GROUP BY
- Rolling/Moving Window Concepts
- Rolling/Moving Window
PostgreSQL: Full SQL:2003 Compliance
-- ✅ All standard window functions supported
SELECT
customer_id,
order_date,
amount,
-- Ranking functions
ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY order_date),
RANK() OVER (PARTITION BY customer_id ORDER BY amount DESC),
DENSE_RANK() OVER (PARTITION BY customer_id ORDER BY amount DESC),
PERCENT_RANK() OVER (PARTITION BY customer_id ORDER BY amount),
CUME_DIST() OVER (PARTITION BY customer_id ORDER BY amount),
NTILE(4) OVER (PARTITION BY customer_id ORDER BY amount),
-- Offset functions
LAG(amount, 1) OVER (PARTITION BY customer_id ORDER BY order_date),
LEAD(amount, 2) OVER (PARTITION BY customer_id ORDER BY order_date),
FIRST_VALUE(amount) OVER (PARTITION BY customer_id ORDER BY order_date),
LAST_VALUE(amount) OVER (PARTITION BY customer_id ORDER BY order_date),
NTH_VALUE(amount, 3) OVER (PARTITION BY customer_id ORDER BY order_date),
-- Aggregate window functions
SUM(amount) OVER (PARTITION BY customer_id ORDER BY order_date),
AVG(amount) OVER (PARTITION BY customer_id ORDER BY order_date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW),
COUNT(*) OVER (PARTITION BY customer_id ORDER BY order_date RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
FROM orders;
FIRST_VALUE/LAST_VALUE
512. Game Play Analysis II
See LeetCode SQL Analysis - 512. Game Play Analysis II
COUNT
1303. Find the Team Size
SELECT
employee_id,
COUNT(employee_id) OVER (PARTITION BY team_id) AS team_size
FROM Employee
SUM
1204. Last Person to Fit in the Bus
WITH cte AS (
SELECT
person_name,
turn,
SUM(weight) OVER (ORDER BY turn ASC) AS total_weight
FROM Queue
)
SELECT person_name
FROM cte
WHERE total_weight <= 1000
ORDER BY turn DESC
LIMIT 1
2004. The Number of Seniors and Juniors to Join the Company
WITH ranked_candidates AS (
SELECT
*,
SUM(salary) OVER (PARTITION BY experience ORDER BY salary ASC, employee_id ASC) AS total_salary
FROM Candidates
), hired_seniors AS (
SELECT COALESCE(total_salary, 0) AS total_salary
FROM ranked_candidates
WHERE
experience = 'Senior'
AND total_salary <= 70000
)
SELECT
'Senior' AS experience,
COUNT(*) AS accepted_candidates
FROM hired_seniors
UNION ALL
SELECT
'Junior' AS experience,
COUNT(*) AS accepted_candidates
FROM cte
WHERE
experience = 'Junior'
AND total_salary <= 70000 - (SELECT COALESCE(MAX(total_salary), 0) FROM hired_seniors)
AVG
615. Average Salary: Departments VS Company
DROP TABLE IF EXISTS salary;
DROP TABLE IF EXISTS employee;
CREATE TABLE
If NOT EXISTS salary (id INT, employee_id INT, amount INT, pay_date DATE);
CREATE TABLE
If NOT EXISTS employee (employee_id INT, department_id INT);
TRUNCATE TABLE salary;
INSERT INTO
salary (id, employee_id, amount, pay_date)
VALUES
('1', '1', '9000', '2017/03/31');
INSERT INTO
salary (id, employee_id, amount, pay_date)
VALUES
('2', '2', '6000', '2017/03/31');
INSERT INTO
salary (id, employee_id, amount, pay_date)
VALUES
('3', '3', '10000', '2017/03/31');
INSERT INTO
salary (id, employee_id, amount, pay_date)
VALUES
('4', '1', '7000', '2017/02/28');
INSERT INTO
salary (id, employee_id, amount, pay_date)
VALUES
('5', '2', '6000', '2017/02/28');
INSERT INTO
salary (id, employee_id, amount, pay_date)
VALUES
('6', '3', '8000', '2017/02/28');
TRUNCATE TABLE employee;
INSERT INTO
employee (employee_id, department_id)
VALUES
('1', '1');
INSERT INTO
employee (employee_id, department_id)
VALUES
('2', '2');
INSERT INTO
employee (employee_id, department_id)
VALUES
('3', '2');
/*Compare the department average amount to the company average amount
- salary PK: id
- SOLUTION: Calculate the department average and the company average amount,
Then compare them.
*/
WITH
company_avg AS
(
SELECT
DATE_FORMAT(s.pay_date, '%Y-%m') AS pay_month,
AVG(s.amount) AS company_avg
FROM salary s
LEFT JOIN employee e
ON s.employee_id = e.employee_id
GROUP BY pay_month
),
department_avg AS
(
SELECT
DATE_FORMAT(s.pay_date, '%Y-%m') AS pay_month,
e.department_id,
AVG(s.amount) AS department_avg
FROM salary s
LEFT JOIN employee e
ON s.employee_id = e.employee_id
GROUP BY e.department_id, pay_month
)
SELECT
d.department_id,
d.pay_month,
CASE
WHEN department_avg = company_avg THEN 'same'
WHEN department_avg > company_avg THEN 'higher'
WHEN department_avg < company_avg THEN 'lower'
END AS comparison
FROM department_avg d
LEFT JOIN company_avg c
ON d.pay_month = c.pay_month;
LAG & LEAD
180. Consecutive Numbers
See LeetCode SQL Consecutive Records
1709. Biggest Window Between Visits
WITH cte AS (
SELECT
*,
COALESCE(
LEAD(visit_date, 1) OVER (PARTITION BY user_id ORDER BY visit_date ASC),
'2021-1-1'::DATE
) AS date_lead
FROM UserVisits
)
SELECT
user_id,
MAX(date_lead - visit_date) AS biggest_window
FROM cte
GROUP BY user_id
ORDER BY user_id
Window Function + GROUP BY
1082. Sales Analysis I
1355. Activity Participants
WITH cte AS (
SELECT
activity,
DENSE_RANK() OVER (ORDER BY COUNT(*) ASC) participants_asc,
DENSE_RANK() OVER (ORDER BY COUNT(*) DESC) participants_desc
FROM Friends
GROUP BY activity
)
SELECT activity
FROM cte
WHERE
participants_asc != 1
AND participants_desc != 1
1164. Product Price at a Given Date
JOIN
WITH latest_prices AS (
SELECT
product_id,
new_price,
ROW_NUMBER() OVER(PARTITION BY product_id ORDER BY change_date DESC) AS rn
FROM Products
WHERE change_date <= '2019-08-16'::DATE
),
all_products AS (
SELECT DISTINCT product_id
FROM Products
)
SELECT
ap.product_id,
COALESCE(lp.new_price, 10) as price
FROM all_products ap
LEFT JOIN latest_prices lp
ON ap.product_id = lp.product_id
AND lp.rn = 1
UNION
WITH latest_prices AS (
SELECT
product_id,
new_price,
ROW_NUMBER() OVER(PARTITION BY product_id ORDER BY change_date DESC) AS rn
FROM Products
WHERE change_date <= '2019-08-16'::DATE
),
SELECT
product_id,
new_price AS price
FROM latest_prices
WHERE rn = 1
UNION
SELECT
product_id,
10 AS price
FROM Products
WHERE product_id NOT IN (SELECT DISTINCT product_id FROM latest_prices)
Rolling/Moving Window Concepts
Similar logic to Python Sliding Window.
A rolling window (also called moving window or sliding window) calculates aggregates over a fixed-size range of rows that “slides” through your data.
Visual Example:
Data: [10, 20, 30, 40, 50, 60, 70]
3-day rolling sum:
Day 1: [10] → can't calculate (need 3 days)
Day 2: [10, 20] → can't calculate (need 3 days)
Day 3: [10, 20, 30] → 60
Day 4: [20, 30, 40] → 90
Day 5: [30, 40, 50] → 120
Day 6: [40, 50, 60] → 150
Day 7: [50, 60, 70] → 180
The window “slides” forward one row at a time.
Core Syntax & Common Mistakes
Core Syntax
aggregate_function(...) OVER (
[PARTITION BY column]
ORDER BY column
ROWS BETWEEN start AND end
)
Common Mistakes
Here aggregate_function is incorrectly used with GROUP BY. Problem: This calculates rolling sum on individual rows, not totals!
aggregate_function(...) OVER (
[PARTITION BY column]
ORDER BY column
ROWS BETWEEN start AND end
)
GROUP BY column
Correct Version:
aggregate_function(aggregate_function(...)) OVER (
[PARTITION BY column]
ORDER BY column
ROWS BETWEEN start AND end
)
GROUP BY column
1. ROWS BETWEEN Clause
Common Patterns
-- Current row + 2 preceding rows (3-day window)
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
-- Current row + 1 preceding + 1 following (centered 3-day window)
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
-- Current row + all preceding rows (cumulative)
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-- All rows (entire partition)
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
2. Common Rolling Calculations
A. Rolling Sum
SELECT
date,
amount,
SUM(amount) OVER (
ORDER BY date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS rolling_7day_sum
FROM sales;
Example:
date | amount | rolling_7day_sum
-----------|--------|------------------
2024-01-01 | 100 | 100
2024-01-02 | 150 | 250
2024-01-03 | 200 | 450
2024-01-04 | 120 | 570
2024-01-05 | 180 | 750
2024-01-06 | 140 | 890
2024-01-07 | 160 | 1050 (sum of 7 days)
2024-01-08 | 130 | 1080 (drops day 1, adds day 8)
B. Rolling Average (Moving Average)
SELECT
date,
price,
AVG(price) OVER (
ORDER BY date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) AS moving_avg_3day
FROM stock_prices;
Example (3-day moving average):
date | price | moving_avg_3day
-----------|-------|----------------
2024-01-01 | 100 | 100.00
2024-01-02 | 110 | 105.00 (avg of 100, 110)
2024-01-03 | 90 | 100.00 (avg of 100, 110, 90)
2024-01-04 | 120 | 106.67 (avg of 110, 90, 120)
2024-01-05 | 130 | 113.33 (avg of 90, 120, 130)
C. Rolling Min/Max
SELECT
date,
temperature,
MAX(temperature) OVER (
ORDER BY date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS max_temp_7days,
MIN(temperature) OVER (
ORDER BY date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS min_temp_7days
FROM weather;
D. Rolling Count
SELECT
date,
user_id,
COUNT(*) OVER (
ORDER BY date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) AS active_users_30days
FROM logins;
E. Cumulative Sum (Running Total)
SELECT
date,
revenue,
SUM(revenue) OVER (
ORDER BY date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS cumulative_revenue
FROM sales;
Example:
date | revenue | cumulative_revenue
-----------|---------|-------------------
2024-01-01 | 1000 | 1000
2024-01-02 | 1500 | 2500
2024-01-03 | 1200 | 3700
2024-01-04 | 1800 | 5500
3. PARTITION BY with Rolling Windows
Use case: Calculate rolling metrics separately for each group.
SELECT
product_id,
date,
sales,
AVG(sales) OVER (
PARTITION BY product_id
ORDER BY date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS avg_sales_7days
FROM product_sales;
Example:
product_id | date | sales | avg_sales_7days
-----------|------------|-------|----------------
A | 2024-01-01 | 100 | 100.00
A | 2024-01-02 | 120 | 110.00
A | 2024-01-03 | 110 | 110.00
B | 2024-01-01 | 200 | 200.00 ← separate window for B
B | 2024-01-02 | 220 | 210.00
B | 2024-01-03 | 180 | 200.00
Each product has its own independent rolling window!
4. RANGE vs ROWS
ROWS: Physical row count
-- Exactly 3 rows
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
RANGE: Logical value range
-- All rows within same value (handles ties)
RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-- All rows within 7 days (date arithmetic)
RANGE BETWEEN INTERVAL '6 days' PRECEDING AND CURRENT ROW
Example difference:
Data with duplicate dates:
date | amount
-----------|-------
2024-01-01 | 100
2024-01-01 | 150 ← same date
2024-01-02 | 200
-- ROWS: treats as 3 separate rows
-- RANGE: groups rows with same date together
5. Real-World Use Cases
Stock Trading - Moving Averages
-- 50-day and 200-day moving averages
SELECT
date,
close_price,
AVG(close_price) OVER (
ORDER BY date
ROWS BETWEEN 49 PRECEDING AND CURRENT ROW
) AS ma_50,
AVG(close_price) OVER (
ORDER BY date
ROWS BETWEEN 199 PRECEDING AND CURRENT ROW
) AS ma_200
FROM stock_prices;
E-commerce - Revenue Trends
-- 7-day rolling revenue
SELECT
order_date,
SUM(amount) OVER (
ORDER BY order_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS revenue_7day
FROM orders;
SaaS - Active Users
-- Monthly Active Users (30-day rolling)
SELECT
date,
COUNT(DISTINCT user_id) OVER (
ORDER BY date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) AS mau
FROM user_activity;
Healthcare - Patient Monitoring
-- 24-hour rolling average heart rate
SELECT
timestamp,
heart_rate,
AVG(heart_rate) OVER (
ORDER BY timestamp
ROWS BETWEEN 23 PRECEDING AND CURRENT ROW
) AS avg_heart_rate_24h
FROM patient_vitals;
6. Common Patterns Summary
| Pattern | Syntax | Use Case |
|---|---|---|
| N-day rolling sum | SUM(...) ROWS BETWEEN N-1 PRECEDING AND CURRENT ROW | Revenue trends |
| N-day moving average | AVG(...) ROWS BETWEEN N-1 PRECEDING AND CURRENT ROW | Stock prices, KPIs |
| Cumulative sum | SUM(...) ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW | Running totals |
| Centered moving avg | AVG(...) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING | Smoothing data |
| Year-to-date | SUM(...) PARTITION BY YEAR(date) ... | Annual metrics |
7. Performance Tips
✅ DO:
- Index the
ORDER BYcolumn(s) - Use
ROWSinstead ofRANGEwhen possible (faster) - Materialize results if used repeatedly
❌ AVOID:
- Rolling windows on unindexed columns
- Extremely large windows (e.g., 1000+ rows) on huge datasets
- Multiple different window sizes in same query (pre-aggregate instead)
Quick Reference Cheat Sheet
-- 7-day rolling sum
SUM(x) OVER (ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW)
-- 3-day moving average
AVG(x) OVER (ORDER BY date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW)
-- Running total
SUM(x) OVER (ORDER BY date ROWS UNBOUNDED PRECEDING)
-- Per-group 30-day rolling
AVG(x) OVER (PARTITION BY group ORDER BY date ROWS BETWEEN 29 PRECEDING AND CURRENT ROW)
-- Centered 5-point average
AVG(x) OVER (ORDER BY date ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)
Rolling/Moving Window
1321. Restaurant Growth
Aggregation -> Window Function
WITH cte AS (
SELECT
visited_on,
SUM(amount) AS daily_amount
FROM Customer
GROUP BY visited_on
)
SELECT
visited_on,
SUM(daily_amount) OVER (
ORDER BY visited_on
ROWS BETWEEN 6 preceding AND current ROW
) AS amount,
ROUND(AVG(daily_amount) OVER (
ORDER BY visited_on
ROWS BETWEEN 6 preceding AND current ROW
), 2) AS average_amount
FROM cte
OFFSET 6
Aggregation + Window Function
- This is harder to understand but a valid solution
- Aggregation will be executed first, then window function; e.g.,
SUM(amount)will be first executed, thenSUM(SUM(amount))
WITH cte AS (
SELECT
visited_on,
SUM(amount) AS daily_total,
SUM(SUM(amount)) OVER (
ORDER BY visited_on
ROWS BETWEEN 6 preceding AND CURRENT ROW
) AS rolling_sum_7d,
AVG(SUM(amount)) OVER (
ORDER BY visited_on
ROWS BETWEEN 6 preceding AND CURRENT ROW
) AS rolling_avg_7d
FROM Customer
GROUP BY visited_on
)
SELECT
visited_on,
rolling_sum_7d AS amount,
ROUND(rolling_avg_7d, 2) AS average_amount
FROM cte
OFFSET 6
Question: Assume you have a DataFrame df_engagement with columns user_id, login_date, and activity_count. Write a Python script to calculate the 7-day rolling average of activity counts for each user.
import pandas as pd
# Sample data for df_engagement
data = {
'user_id': [1, 1, 1, 2, 2, 2, 2, 3, 3, 3],
'login_date': ['2024-06-01', '2024-06-02', '2024-06-03', '2024-06-01', '2024-06-02', '2024-06-04', '2024-06-05', '2024-06-01', '2024-06-03', '2024-06-04'],
'activity_count': [5, 6, 7, 1, 3, 2, 0, 3, 4, 5]
}
# Create DataFrame
df_engagement = pd.DataFrame(data)
# Convert login_date to datetime type
df_engagement['login_date'] = pd.to_datetime(df_engagement['login_date'])
# Ensure the data is sorted by user and date
df_engagement.sort_values(by=['user_id', 'login_date'], inplace=True)
# Display the DataFrame to understand what it looks like
print(df_engagement)
# Ensure the DataFrame is sorted
df_engagement.sort_values(by=['user_id', 'login_date'], inplace=True)
# Set index as login_date for rolling calculation
df_engagement.set_index('login_date', inplace=True)
# Calculate the 7-day rolling average of activity counts
df_rolling_avg = df_engagement.groupby('user_id')['activity_count'].rolling(window='7D').mean().reset_index(name='7-day rolling average')
print(df_rolling_avg)
-- Create a table named 'engagement'
CREATE TABLE engagement (
user_id INT,
login_date DATE,
activity_count INT
);
-- Insert sample data into the 'engagement' table
INSERT INTO engagement (user_id, login_date, activity_count) VALUES
(1, '2024-06-01', 5),
(1, '2024-06-02', 6),
(1, '2024-06-03', 7),
(2, '2024-06-01', 1),
(2, '2024-06-02', 3),
(2, '2024-06-04', 2),
(2, '2024-06-05', 0),
(3, '2024-06-01', 3),
(3, '2024-06-03', 4),
(3, '2024-06-04', 5);
-- Ensure the data is indexed properly for performance
CREATE INDEX idx_user_login ON engagement(user_id, login_date);
-- Display the data to verify insertion
SELECT * FROM engagement;
SELECT
user_id,
login_date,
activity_count,
AVG(activity_count) OVER (
PARTITION BY user_id
ORDER BY login_date ASC RANGE BETWEEN INTERVAL 6 DAY PRECEDING AND CURRENT ROW
) AS rolling_avg
FROM engagement;