SQLTeam.com | Weblogs | Forums

Finding the largest, continuous set of data within a dataset

I am attempting to find all seasonal sales for a product. However, sometimes outside of the season there are peaks in sales. I've managed to get a dataset to look like this. If the sales are above average they are seasonal & their value is displayed. If below average, they are just $0.

The catch is that I cannot just call MIN date & MAX date on this dataset. The season began on 09/01 but the minimum date with sales above average is 07/20.

Is there a way to filter by the single largest cluster of sales?

CREATE TABLE #seasonalsales ( [Date] date, [Sales] decimal(18,2) ) ;

INSERT INTO #seasonalsales ( [Date], [Sales] ) VALUES 
	('2022-01-01', '0')
	,('2022-01-02', '0')
	,('2022-01-03', '0')
	,('2022-01-04', '0')
	,('2022-01-05', '0')
	,('2022-01-06', '0')
	,('2022-01-07', '0')
	,('2022-01-08', '0')
	,('2022-01-09', '0')
	,('2022-01-10', '0')
	,('2022-01-11', '0')
	,('2022-01-12', '0')
	,('2022-01-13', '0')
	,('2022-01-14', '0')
	,('2022-01-15', '0')
	,('2022-01-16', '0')
	,('2022-01-17', '0')
	,('2022-01-18', '0')
	,('2022-01-19', '0')
	,('2022-01-20', '0')
	,('2022-01-21', '0')
	,('2022-01-22', '0')
	,('2022-01-23', '0')
	,('2022-01-24', '0')
	,('2022-01-25', '0')
	,('2022-01-26', '0')
	,('2022-01-27', '0')
	,('2022-01-28', '0')
	,('2022-01-29', '0')
	,('2022-01-30', '0')
	,('2022-01-31', '0')
	,('2022-02-01', '0')
	,('2022-02-02', '0')
	,('2022-02-03', '0')
	,('2022-02-04', '0')
	,('2022-02-05', '0')
	,('2022-02-06', '0')
	,('2022-02-07', '0')
	,('2022-02-08', '0')
	,('2022-02-09', '0')
	,('2022-02-10', '0')
	,('2022-02-11', '0')
	,('2022-02-12', '0')
	,('2022-02-13', '0')
	,('2022-02-14', '0')
	,('2022-02-15', '0')
	,('2022-02-16', '0')
	,('2022-02-17', '0')
	,('2022-02-18', '0')
	,('2022-02-19', '0')
	,('2022-02-20', '0')
	,('2022-02-21', '0')
	,('2022-02-22', '0')
	,('2022-02-23', '0')
	,('2022-02-25', '0')
	,('2022-02-26', '0')
	,('2022-02-27', '0')
	,('2022-02-28', '0')
	,('2022-03-01', '0')
	,('2022-03-02', '0')
	,('2022-03-03', '0')
	,('2022-03-04', '0')
	,('2022-03-06', '0')
	,('2022-03-08', '0')
	,('2022-03-09', '0')
	,('2022-03-10', '0')
	,('2022-03-12', '0')
	,('2022-03-13', '0')
	,('2022-03-14', '0')
	,('2022-03-15', '0')
	,('2022-03-16', '0')
	,('2022-03-18', '0')
	,('2022-03-19', '0')
	,('2022-03-20', '0')
	,('2022-03-21', '0')
	,('2022-03-22', '0')
	,('2022-03-23', '0')
	,('2022-03-24', '0')
	,('2022-03-25', '0')
	,('2022-03-26', '0')
	,('2022-03-27', '0')
	,('2022-03-30', '0')
	,('2022-03-31', '0')
	,('2022-04-01', '0')
	,('2022-04-02', '0')
	,('2022-04-03', '0')
	,('2022-04-04', '0')
	,('2022-04-05', '0')
	,('2022-04-07', '0')
	,('2022-04-08', '0')
	,('2022-04-09', '0')
	,('2022-04-10', '0')
	,('2022-04-11', '0')
	,('2022-04-17', '0')
	,('2022-04-20', '0')
	,('2022-04-22', '0')
	,('2022-04-23', '0')
	,('2022-04-24', '0')
	,('2022-04-25', '0')
	,('2022-04-27', '0')
	,('2022-04-29', '0')
	,('2022-04-30', '0')
	,('2022-05-01', '0')
	,('2022-05-02', '0')
	,('2022-05-05', '0')
	,('2022-05-07', '0')
	,('2022-05-08', '0')
	,('2022-05-09', '0')
	,('2022-05-11', '0')
	,('2022-05-14', '0')
	,('2022-05-15', '0')
	,('2022-05-16', '0')
	,('2022-05-17', '0')
	,('2022-05-20', '0')
	,('2022-05-22', '0')
	,('2022-05-23', '0')
	,('2022-05-26', '0')
	,('2022-05-27', '0')
	,('2022-05-29', '0')
	,('2022-05-30', '0')
	,('2022-05-31', '0')
	,('2022-06-02', '0')
	,('2022-06-05', '0')
	,('2022-06-10', '0')
	,('2022-06-11', '0')
	,('2022-06-13', '0')
	,('2022-06-14', '0')
	,('2022-06-15', '0')
	,('2022-06-16', '0')
	,('2022-06-17', '0')
	,('2022-06-18', '0')
	,('2022-06-21', '0')
	,('2022-06-23', '0')
	,('2022-06-25', '0')
	,('2022-06-27', '0')
	,('2022-06-28', '0')
	,('2022-06-29', '0')
	,('2022-07-01', '0')
	,('2022-07-03', '0')
	,('2022-07-04', '0')
	,('2022-07-05', '0')
	,('2022-07-06', '0')
	,('2022-07-07', '0')
	,('2022-07-08', '0')
	,('2022-07-09', '0')
	,('2022-07-10', '0')
	,('2022-07-12', '0')
	,('2022-07-13', '0')
	,('2022-07-15', '0')
	,('2022-07-16', '0')
	,('2022-07-17', '0')
	,('2022-07-20', '20')
	,('2022-07-21', '25')
	,('2022-07-22', '0')
	,('2022-07-23', '0')
	,('2022-07-24', '0')
	,('2022-07-25', '0')
	,('2022-07-27', '0')
	,('2022-07-28', '0')
	,('2022-07-29', '0')
	,('2022-07-30', '0')
	,('2022-07-31', '0')
	,('2022-08-01', '0')
	,('2022-08-02', '0')
	,('2022-08-03', '0')
	,('2022-08-04', '0')
	,('2022-08-05', '0')
	,('2022-08-06', '0')
	,('2022-08-07', '0')
	,('2022-08-08', '0')
	,('2022-08-09', '0')
	,('2022-08-10', '0')
	,('2022-08-11', '0')
	,('2022-08-12', '0')
	,('2022-08-13', '0')
	,('2022-08-14', '0')
	,('2022-08-15', '0')
	,('2022-08-16', '0')
	,('2022-08-17', '0')
	,('2022-08-18', '46.45')
	,('2022-08-19', '0')
	,('2022-08-20', '0')
	,('2022-08-21', '0')
	,('2022-08-22', '0')
	,('2022-08-23', '0')
	,('2022-08-24', '0')
	,('2022-08-25', '0')
	,('2022-08-26', '0')
	,('2022-08-27', '0')
	,('2022-08-28', '0')
	,('2022-08-29', '0')
	,('2022-08-30', '0')
	,('2022-08-31', '0')
	,('2022-09-01', '50.058')
	,('2022-09-02', '58.183')
	,('2022-09-03', '55.499')
	,('2022-09-04', '92.504')
	,('2022-09-05', '109.957')
	,('2022-09-06', '103.916')
	,('2022-09-07', '86.919')
	,('2022-09-08', '97.134')
	,('2022-09-09', '146.538')
	,('2022-09-10', '162.744')
	,('2022-09-11', '215.066')
	,('2022-09-12', '187.433')
	,('2022-09-13', '176.911')
	,('2022-09-14', '201.933')
	,('2022-09-15', '210.253')
	,('2022-09-16', '223.564')
	,('2022-09-17', '207.942')
	,('2022-09-18', '351.673')
	,('2022-09-19', '291.705')
	,('2022-09-20', '360.208')
	,('2022-09-21', '298.442')
	,('2022-09-22', '368.954')
	,('2022-09-23', '361.624')
	,('2022-09-24', '389.761')
	,('2022-09-25', '477.223')
	,('2022-09-26', '526.274')
	,('2022-09-27', '440.082')
	,('2022-09-28', '489.786')
	,('2022-09-29', '384.759')
	,('2022-09-30', '374.01')
	,('2022-10-01', '391.506')
	,('2022-10-02', '70.885')


SELECT 
*
FROM #seasonalsales

You can use row_number function to assign a group id to clusters like this:

SELECT 
    *, 
    GroupId = 
        ROW_NUMBER() OVER (ORDER BY date)
        - 
        ROW_NUMBER() OVER(PARTITION BY CASE WHEN sales = 0 THEN 0 ELSE 1 END ORDER BY date )
FROM #seasonalsales

Then, you can use the groupId to do pretty much anything you want to do. For example, to see the count of dates in each cluster, do the following:

;WITH cte AS
(
    SELECT 
        *, 
        GroupId = 
            ROW_NUMBER() OVER (ORDER BY date)
            - 
            ROW_NUMBER() OVER(PARTITION BY CASE WHEN sales = 0 THEN 0 ELSE 1 END ORDER BY date )
    FROM #seasonalsales
)
SELECT 
    groupStart = MIN(date), 
    groupEnd   = MAX(date), 
    ClusterDays= COUNT(*), 
    GroupId
FROM
    cte 
WHERE 
    cte.Sales <> 0
GROUP BY
    cte.GroupId;