Calculating Figures from dates

Afternoon all,

I am currently in the middle of a project and need to calculate Agency Workers Regulation (AWR).
I have a list of date ranges, from that I need to add up how many weeks a client has been with a customer, unless there has been a break of more than 6 weeks.

i.e.
CustID ClientID StartDate EndDate
1000 5000 10/05/2017 12/05/2017
1000 5000 16/05/2017 16/05/2017
1000 5000 25/05/2017 25/05/2017

1100 5100 01/05/2017 05/05/2017
1100 5100 22/05/2017 26/05/2017
1100 5100 10/07/2017 12/07/2017

1200 5200 01/05/2017 14/07/2017

Output should be:
CustID ClientID AWRWeeks
1000 5000 3
1100 5100 1
1200 5200 11

Hopefully that makes sense...

The SQL I have so far is:

SELECT
CustID,
ClientID,
DATEDIFF(ww, StartDate, EndDate) As AWRWeeks
ORDERBY StartDate

The 2 problems with the query at the moment are that less that a week output as 0 when I need it to output 1 & that it doesn't calculate a break... Any pointers as to how I should proceed??

Many thanks

Dave

This one uses recursive cte:

    ,cte1
  as (select custid
            ,clientid
            ,startdate
            ,enddate
            ,(datediff(day,startdate,enddate)/7)+1 as weeks
            ,row_number() over(partition by custid,clientid order by startdate desc) as rn
        from yourtable
     )
    ,cte2
  as (select a.custid
            ,a.clientid
            ,a.weeks
            ,a.rn
            ,datediff(week,b.enddate,a.startdate) as weeks2next
        from cte1 as a
             left outer join cte1 as b
                          on b.custid=a.custid
                         and b.clientid=a.clientid
                         and b.rn=a.rn+1
     )
    ,cte3
  as (select custid
            ,clientid
            ,weeks
            ,weeks2next
            ,rn
        from cte2
       where rn=1
      union all
      select a.custid
            ,a.clientid
            ,a.weeks
            ,a.weeks2next
            ,b.rn
        from cte3 as a
             inner join cte2 as b
                     on b.custid=a.custid
                    and b.clientid=a.clientid
                    and b.rn=a.rn+1
       where a.weeks2next<=6
     )
select custid
      ,clientid
      ,sum(weeks) as weeks
  from cte3
 group by custid
         ,clientid
;

Perhaps a query using lead/lag can be more efficient?!?

What version of SQL Server are you using?

Morning, thank for the replies.

Jason: I'm currently using SQL Server 2008 R2.

Bitsmed: Thank you, that looks to work but very slow to execute, I'm quite new to CTE so still trying to understand exactly what it does...

Many thanks

Dave

I don't have access to an instance of 2008R2 any more but I think the following should be compatible...

IF OBJECT_ID('tempdb..#TestData', 'U') IS NOT NULL 
DROP TABLE #TestData;

CREATE TABLE #TestData (
	CustID INT NOT NULL,
	ClientID INT NOT NULL,
	StartDate DATE NOT NULL,
	EndDate DATE NOT NULL,
	PRIMARY KEY CLUSTERED (CustID, ClientID, StartDate)
	);
INSERT #TestData (CustID, ClientID, StartDate, EndDate) VALUES 
	(1000, 5000, '05/10/2017', '05/12/2017'),
	(1000, 5000, '05/16/2017', '05/16/2017'),
	(1000, 5000, '05/25/2017', '05/25/2017'),
	(1100, 5100, '05/01/2017', '05/05/2017'),
	(1100, 5100, '05/22/2017', '05/26/2017'),
	(1100, 5100, '07/10/2017', '07/12/2017'),
	(1200, 5200, '05/01/2017', '07/14/2017');

--==========================================================

WITH 
	cte_AddRN AS (
		SELECT 
			td.CustID, td.ClientID, td.StartDate, td.EndDate,
			RN = ROW_NUMBER() OVER (PARTITION BY td.CustID, td.ClientID ORDER BY td.StartDate)
		FROM
			#TestData td
		),
	cte_Find6wkGaps AS (
		SELECT 
			ar1.CustID, ar1.ClientID, ar1.StartDate, ar1.EndDate, ar1.RN, 
			Weeks = ISNULL(NULLIF(CEILING(DATEDIFF(dd, ar1.StartDate, ar1.EndDate) / 7.0), 0), 1),
			StartRowGroup = IIF(DATEDIFF(dd, ISNULL(ar2.EndDate, '1900-01-01'), ar1.StartDate) / 7.0 > 6, 1, 0)
		FROM
			cte_AddRN ar1
			LEFT JOIN cte_AddRN ar2
				ON ar1.CustID = ar2.CustID
				AND ar1.ClientID = ar2.ClientID
				AND ar1.RN = ar2.RN + 1
		),
	cte_FillDateGroup AS (
		SELECT 
			fg.CustID, fg.ClientID, fg.Weeks,
			DateGroup = MAX(fg.RN * fg.StartRowGroup) OVER (PARTITION BY fg.CustID, fg.ClientID ORDER BY fg.StartDate)
		FROM
			cte_Find6wkGaps fg
		)
		SELECT TOP 1 WITH TIES 
			fdg.CustID, 
			fdg.ClientID, 
			SUM(fdg.Weeks)
		FROM
			cte_FillDateGroup fdg
		GROUP BY 
			fdg.CustID,
			fdg.ClientID,
			fdg.DateGroup
		ORDER BY 
			ROW_NUMBER() OVER (PARTITION BY fdg.CustID, fdg.ClientID ORDER BY fdg.DateGroup DESC);

Hi Jason,

Thanks for the reply, i'm getting the error " 'IIF' is not a recognised built-in function name " ?

Thanks

Dave

DOH!!! Yea, that was introduced in 2012... That's an easy fix though... See below...

IF OBJECT_ID('tempdb..#TestData', 'U') IS NOT NULL 
DROP TABLE #TestData;

CREATE TABLE #TestData (
	CustID INT NOT NULL,
	ClientID INT NOT NULL,
	StartDate DATE NOT NULL,
	EndDate DATE NOT NULL,
	PRIMARY KEY CLUSTERED (CustID, ClientID, StartDate)
	);
INSERT #TestData (CustID, ClientID, StartDate, EndDate) VALUES 
	(1000, 5000, '05/10/2017', '05/12/2017'),
	(1000, 5000, '05/16/2017', '05/16/2017'),
	(1000, 5000, '05/25/2017', '05/25/2017'),
	(1100, 5100, '05/01/2017', '05/05/2017'),
	(1100, 5100, '05/22/2017', '05/26/2017'),
	(1100, 5100, '07/10/2017', '07/12/2017'),
	(1200, 5200, '05/01/2017', '07/14/2017');

--==========================================================

WITH 
	cte_AddRN AS (
		SELECT 
			td.CustID, td.ClientID, td.StartDate, td.EndDate,
			RN = ROW_NUMBER() OVER (PARTITION BY td.CustID, td.ClientID ORDER BY td.StartDate)
		FROM
			#TestData td
		),
	cte_Find6wkGaps AS (
		SELECT 
			ar1.CustID, ar1.ClientID, ar1.StartDate, ar1.EndDate, ar1.RN, 
			Weeks = ISNULL(NULLIF(CEILING(DATEDIFF(dd, ar1.StartDate, ar1.EndDate) / 7.0), 0), 1),
			StartRowGroup = CASE WHEN DATEDIFF(dd, ISNULL(ar2.EndDate, '1900-01-01'), ar1.StartDate) / 7.0 > 6 THEN 1 ELSE 0 END
		FROM
			cte_AddRN ar1
			LEFT JOIN cte_AddRN ar2
				ON ar1.CustID = ar2.CustID
				AND ar1.ClientID = ar2.ClientID
				AND ar1.RN = ar2.RN + 1
		),
	cte_FillDateGroup AS (
		SELECT 
			fg.CustID, fg.ClientID, fg.Weeks,
			DateGroup = MAX(fg.RN * fg.StartRowGroup) OVER (PARTITION BY fg.CustID, fg.ClientID ORDER BY fg.StartDate)
		FROM
			cte_Find6wkGaps fg
		)
		SELECT TOP 1 WITH TIES 
			fdg.CustID, 
			fdg.ClientID, 
			SUM(fdg.Weeks)
		FROM
			cte_FillDateGroup fdg
		GROUP BY 
			fdg.CustID,
			fdg.ClientID,
			fdg.DateGroup
		ORDER BY 
			ROW_NUMBER() OVER (PARTITION BY fdg.CustID, fdg.ClientID ORDER BY fdg.DateGroup DESC);

Hi Jason,

thanks for the ammendment. Line 45 incorrect syntax near 'order'. I don't think cumulative sums are supported in 2008 r2. I've just read it may be possible to change the compatability setting?

EDIT: Cancel that, the highest compatability settings I can use is 100 which its already using.

Thanks

Dave

Like I said, I don't have access to a 2008R2 instance but I do recall windowed aggregates being supported. Window frames weren't introduced until 2012, but I'mm 99% sure that what I have written, should execute...

Where you able to execute the code in my 2nd post?

Hi Jason,

As it stands no it doesn't execute. The problem lies at this line

DateGroup = MAX(fg.RN * fg.StartRowGroup) OVER (PARTITION BY fg.CustID, fg.ClientID ORDER BY fg.StartDate)

Msg 102, Level 15, State 1, Line 45 Incorrect syntax near 'order'

However if I change it to the following it does execute.

DateGroup = MAX(fg.RN * fg.StartRowGroup) OVER (PARTITION BY fg.CustID, fg.ClientID)

Many thanks

Dave

Hi Jason,

I'm struggeling a bit here, I just about understand what & how your SQL works. I'm not sure the 'Find6WKGap' is working correctly, either its not finding the gaps between the 2 sets of dates or its not 'resetting' the weeks counter...

Thanks

Dave

Unfortunately, the following two lines of code aren't functionally equivalent.

DateGroup = MAX(fg.RN * fg.StartRowGroup) OVER (PARTITION BY fg.CustID, fg.ClientID ORDER BY fg.StartDate)
DateGroup = MAX(fg.RN * fg.StartRowGroup) OVER (PARTITION BY fg.CustID, fg.ClientID)

I'll see if I can find an alternate syntax...

Here are two options... 1st one is keeping w/ the CTEs and the 2nd replaces them w/ temp tables.
They both produce the desired result but one should run better than the other against your production data.

Test Data...

IF OBJECT_ID('tempdb..#TestData', 'U') IS NOT NULL 
DROP TABLE #TestData;

CREATE TABLE #TestData (
	CustID INT NOT NULL,
	ClientID INT NOT NULL,
	StartDate DATE NOT NULL,
	EndDate DATE NOT NULL,
	PRIMARY KEY CLUSTERED (CustID, ClientID, StartDate)
	);
INSERT #TestData (CustID, ClientID, StartDate, EndDate) VALUES 
	(1000, 5000, '05/10/2017', '05/12/2017'),
	(1000, 5000, '05/16/2017', '05/16/2017'),
	(1000, 5000, '05/25/2017', '05/25/2017'),
	(1100, 5100, '05/01/2017', '05/05/2017'),
	(1100, 5100, '05/22/2017', '05/26/2017'),
	(1100, 5100, '07/10/2017', '07/12/2017'),
	(1200, 5200, '05/01/2017', '07/14/2017');

--==========================================================

#1 CTE...

WITH 
	cte_AddRN AS (
		SELECT 
			td.CustID, td.ClientID, td.StartDate, td.EndDate,
			RN = ROW_NUMBER() OVER (PARTITION BY td.CustID, td.ClientID ORDER BY td.StartDate)
		FROM
			#TestData td
		),
	cte_Find6wkGaps AS (
		SELECT 
			ar1.CustID, ar1.ClientID, ar1.StartDate, ar1.EndDate, ar1.RN, 
			Weeks = ISNULL(NULLIF(CEILING(DATEDIFF(dd, ar1.StartDate, ar1.EndDate) / 7.0), 0), 1),
			StartRowGroup = CASE WHEN DATEDIFF(dd, ISNULL(ar2.EndDate, '1900-01-01'), ar1.StartDate) / 7.0 > 6 THEN 1 ELSE 0 END
		FROM
			cte_AddRN ar1
			LEFT JOIN cte_AddRN ar2
				ON ar1.CustID = ar2.CustID
				AND ar1.ClientID = ar2.ClientID
				AND ar1.RN = ar2.RN + 1
		),
	cte_FillDateGroup AS (
		SELECT  
			fg1.CustID, fg1.ClientID, fg1.StartDate, fg1.EndDate, fg1.RN, fg1.Weeks, fg1.StartRowGroup, x.DateGroup
		FROM
			cte_Find6wkGaps fg1
			OUTER APPLY (
						SELECT 
							DateGroup = MAX(fg2.StartRowGroup * fg2.RN)
						FROM 
							cte_Find6wkGaps fg2
						WHERE 
							fg1.CustID = fg2.CustID
							AND fg1.ClientID = fg2.ClientID
							AND fg1.EndDate >= fg2.EndDate
						) x
		)
		SELECT TOP 1 WITH TIES 
			fdg.CustID, 
			fdg.ClientID, 
			SUM(fdg.Weeks)
		FROM
			cte_FillDateGroup fdg
		GROUP BY 
			fdg.CustID,
			fdg.ClientID,
			fdg.DateGroup
		ORDER BY 
			ROW_NUMBER() OVER (PARTITION BY fdg.CustID, fdg.ClientID ORDER BY fdg.DateGroup DESC);

#2 Temp Tables...

IF OBJECT_ID('tempdb..#AddRN', 'U') IS NOT NULL 
DROP TABLE #AddRN;

IF OBJECT_ID('tempdb..#Find6wkGaps', 'U') IS NOT NULL 
DROP TABLE #Find6wkGaps;

IF OBJECT_ID('tempdb..#FillDateGroup', 'U') IS NOT NULL 
DROP TABLE #FillDateGroup;

SELECT 
	td.CustID, td.ClientID, td.StartDate, td.EndDate,
	RN = ISNULL(ROW_NUMBER() OVER (PARTITION BY td.CustID, td.ClientID ORDER BY td.StartDate), 0)
	INTO #AddRN
FROM
	#TestData td;

SELECT 
	ar1.CustID, ar1.ClientID, ar1.StartDate, ar1.EndDate, ar1.RN, 
	Weeks = ISNULL(NULLIF(CEILING(DATEDIFF(dd, ar1.StartDate, ar1.EndDate) / 7.0), 0), 1),
	StartRowGroup = CASE WHEN DATEDIFF(dd, ISNULL(ar2.EndDate, '1900-01-01'), ar1.StartDate) / 7.0 > 6 THEN 1 ELSE 0 END
	INTO #Find6wkGaps
FROM
	#AddRN ar1
	LEFT JOIN #AddRN ar2
		ON ar1.CustID = ar2.CustID
		AND ar1.ClientID = ar2.ClientID
		AND ar1.RN = ar2.RN + 1;

SELECT  
	CustID = ISNULL(fg1.CustID, 0), ClientID = ISNULL(fg1.ClientID, 0), StartDate = ISNULL(fg1.StartDate, '1900-01-01'), fg1.EndDate, fg1.RN, fg1.Weeks, fg1.StartRowGroup, DateGroup = ISNULL(x.DateGroup, 0)
	INTO #FillDateGroup
FROM
	#Find6wkGaps fg1
	OUTER APPLY (
				SELECT 
					DateGroup = MAX(fg2.StartRowGroup * fg2.RN)
				FROM 
					#Find6wkGaps fg2
				WHERE 
					fg1.CustID = fg2.CustID
					AND fg1.ClientID = fg2.ClientID
					AND fg1.EndDate >= fg2.EndDate
				) x
ALTER TABLE #FillDateGroup ADD PRIMARY KEY CLUSTERED (CustID ASC, ClientID ASC, DateGroup DESC, StartDate ASC);

SELECT TOP 1 WITH TIES 
	fdg.CustID, 
	fdg.ClientID, 
	SUM(fdg.Weeks)
FROM
	#FillDateGroup fdg
GROUP BY 
	fdg.CustID,
	fdg.ClientID,
	fdg.DateGroup
ORDER BY 
	ROW_NUMBER() OVER (PARTITION BY fdg.CustID, fdg.ClientID ORDER BY fdg.DateGroup DESC);