SQLTeam.com | Weblogs | Forums

Calculating Figures from dates


#1

Afternoon all,

I am currently in the middle of a project and need to calculate Agency Workers Regulation (AWR).
I have a list of date ranges, from that I need to add up how many weeks a client has been with a customer, unless there has been a break of more than 6 weeks.

i.e.
CustID ClientID StartDate EndDate
1000 5000 10/05/2017 12/05/2017
1000 5000 16/05/2017 16/05/2017
1000 5000 25/05/2017 25/05/2017

1100 5100 01/05/2017 05/05/2017
1100 5100 22/05/2017 26/05/2017
1100 5100 10/07/2017 12/07/2017

1200 5200 01/05/2017 14/07/2017

Output should be:
CustID ClientID AWRWeeks
1000 5000 3
1100 5100 1
1200 5200 11

Hopefully that makes sense...

The SQL I have so far is:

SELECT
CustID,
ClientID,
DATEDIFF(ww, StartDate, EndDate) As AWRWeeks
ORDERBY StartDate

The 2 problems with the query at the moment are that less that a week output as 0 when I need it to output 1 & that it doesn't calculate a break... Any pointers as to how I should proceed??

Many thanks

Dave


#2

This one uses recursive cte:

    ,cte1
  as (select custid
            ,clientid
            ,startdate
            ,enddate
            ,(datediff(day,startdate,enddate)/7)+1 as weeks
            ,row_number() over(partition by custid,clientid order by startdate desc) as rn
        from yourtable
     )
    ,cte2
  as (select a.custid
            ,a.clientid
            ,a.weeks
            ,a.rn
            ,datediff(week,b.enddate,a.startdate) as weeks2next
        from cte1 as a
             left outer join cte1 as b
                          on b.custid=a.custid
                         and b.clientid=a.clientid
                         and b.rn=a.rn+1
     )
    ,cte3
  as (select custid
            ,clientid
            ,weeks
            ,weeks2next
            ,rn
        from cte2
       where rn=1
      union all
      select a.custid
            ,a.clientid
            ,a.weeks
            ,a.weeks2next
            ,b.rn
        from cte3 as a
             inner join cte2 as b
                     on b.custid=a.custid
                    and b.clientid=a.clientid
                    and b.rn=a.rn+1
       where a.weeks2next<=6
     )
select custid
      ,clientid
      ,sum(weeks) as weeks
  from cte3
 group by custid
         ,clientid
;

Perhaps a query using lead/lag can be more efficient?!?


#3

What version of SQL Server are you using?


#4

Morning, thank for the replies.

Jason: I'm currently using SQL Server 2008 R2.

Bitsmed: Thank you, that looks to work but very slow to execute, I'm quite new to CTE so still trying to understand exactly what it does...

Many thanks

Dave


#5

I don't have access to an instance of 2008R2 any more but I think the following should be compatible...

IF OBJECT_ID('tempdb..#TestData', 'U') IS NOT NULL 
DROP TABLE #TestData;

CREATE TABLE #TestData (
	CustID INT NOT NULL,
	ClientID INT NOT NULL,
	StartDate DATE NOT NULL,
	EndDate DATE NOT NULL,
	PRIMARY KEY CLUSTERED (CustID, ClientID, StartDate)
	);
INSERT #TestData (CustID, ClientID, StartDate, EndDate) VALUES 
	(1000, 5000, '05/10/2017', '05/12/2017'),
	(1000, 5000, '05/16/2017', '05/16/2017'),
	(1000, 5000, '05/25/2017', '05/25/2017'),
	(1100, 5100, '05/01/2017', '05/05/2017'),
	(1100, 5100, '05/22/2017', '05/26/2017'),
	(1100, 5100, '07/10/2017', '07/12/2017'),
	(1200, 5200, '05/01/2017', '07/14/2017');

--==========================================================

WITH 
	cte_AddRN AS (
		SELECT 
			td.CustID, td.ClientID, td.StartDate, td.EndDate,
			RN = ROW_NUMBER() OVER (PARTITION BY td.CustID, td.ClientID ORDER BY td.StartDate)
		FROM
			#TestData td
		),
	cte_Find6wkGaps AS (
		SELECT 
			ar1.CustID, ar1.ClientID, ar1.StartDate, ar1.EndDate, ar1.RN, 
			Weeks = ISNULL(NULLIF(CEILING(DATEDIFF(dd, ar1.StartDate, ar1.EndDate) / 7.0), 0), 1),
			StartRowGroup = IIF(DATEDIFF(dd, ISNULL(ar2.EndDate, '1900-01-01'), ar1.StartDate) / 7.0 > 6, 1, 0)
		FROM
			cte_AddRN ar1
			LEFT JOIN cte_AddRN ar2
				ON ar1.CustID = ar2.CustID
				AND ar1.ClientID = ar2.ClientID
				AND ar1.RN = ar2.RN + 1
		),
	cte_FillDateGroup AS (
		SELECT 
			fg.CustID, fg.ClientID, fg.Weeks,
			DateGroup = MAX(fg.RN * fg.StartRowGroup) OVER (PARTITION BY fg.CustID, fg.ClientID ORDER BY fg.StartDate)
		FROM
			cte_Find6wkGaps fg
		)
		SELECT TOP 1 WITH TIES 
			fdg.CustID, 
			fdg.ClientID, 
			SUM(fdg.Weeks)
		FROM
			cte_FillDateGroup fdg
		GROUP BY 
			fdg.CustID,
			fdg.ClientID,
			fdg.DateGroup
		ORDER BY 
			ROW_NUMBER() OVER (PARTITION BY fdg.CustID, fdg.ClientID ORDER BY fdg.DateGroup DESC);

#6

Hi Jason,

Thanks for the reply, i'm getting the error " 'IIF' is not a recognised built-in function name " ?

Thanks

Dave


#7

DOH!!! Yea, that was introduced in 2012... That's an easy fix though... See below...

IF OBJECT_ID('tempdb..#TestData', 'U') IS NOT NULL 
DROP TABLE #TestData;

CREATE TABLE #TestData (
	CustID INT NOT NULL,
	ClientID INT NOT NULL,
	StartDate DATE NOT NULL,
	EndDate DATE NOT NULL,
	PRIMARY KEY CLUSTERED (CustID, ClientID, StartDate)
	);
INSERT #TestData (CustID, ClientID, StartDate, EndDate) VALUES 
	(1000, 5000, '05/10/2017', '05/12/2017'),
	(1000, 5000, '05/16/2017', '05/16/2017'),
	(1000, 5000, '05/25/2017', '05/25/2017'),
	(1100, 5100, '05/01/2017', '05/05/2017'),
	(1100, 5100, '05/22/2017', '05/26/2017'),
	(1100, 5100, '07/10/2017', '07/12/2017'),
	(1200, 5200, '05/01/2017', '07/14/2017');

--==========================================================

WITH 
	cte_AddRN AS (
		SELECT 
			td.CustID, td.ClientID, td.StartDate, td.EndDate,
			RN = ROW_NUMBER() OVER (PARTITION BY td.CustID, td.ClientID ORDER BY td.StartDate)
		FROM
			#TestData td
		),
	cte_Find6wkGaps AS (
		SELECT 
			ar1.CustID, ar1.ClientID, ar1.StartDate, ar1.EndDate, ar1.RN, 
			Weeks = ISNULL(NULLIF(CEILING(DATEDIFF(dd, ar1.StartDate, ar1.EndDate) / 7.0), 0), 1),
			StartRowGroup = CASE WHEN DATEDIFF(dd, ISNULL(ar2.EndDate, '1900-01-01'), ar1.StartDate) / 7.0 > 6 THEN 1 ELSE 0 END
		FROM
			cte_AddRN ar1
			LEFT JOIN cte_AddRN ar2
				ON ar1.CustID = ar2.CustID
				AND ar1.ClientID = ar2.ClientID
				AND ar1.RN = ar2.RN + 1
		),
	cte_FillDateGroup AS (
		SELECT 
			fg.CustID, fg.ClientID, fg.Weeks,
			DateGroup = MAX(fg.RN * fg.StartRowGroup) OVER (PARTITION BY fg.CustID, fg.ClientID ORDER BY fg.StartDate)
		FROM
			cte_Find6wkGaps fg
		)
		SELECT TOP 1 WITH TIES 
			fdg.CustID, 
			fdg.ClientID, 
			SUM(fdg.Weeks)
		FROM
			cte_FillDateGroup fdg
		GROUP BY 
			fdg.CustID,
			fdg.ClientID,
			fdg.DateGroup
		ORDER BY 
			ROW_NUMBER() OVER (PARTITION BY fdg.CustID, fdg.ClientID ORDER BY fdg.DateGroup DESC);

#8

Hi Jason,

thanks for the ammendment. Line 45 incorrect syntax near 'order'. I don't think cumulative sums are supported in 2008 r2. I've just read it may be possible to change the compatability setting?

EDIT: Cancel that, the highest compatability settings I can use is 100 which its already using.

Thanks

Dave


#9

Like I said, I don't have access to a 2008R2 instance but I do recall windowed aggregates being supported. Window frames weren't introduced until 2012, but I'mm 99% sure that what I have written, should execute...

Where you able to execute the code in my 2nd post?


#10

Hi Jason,

As it stands no it doesn't execute. The problem lies at this line

DateGroup = MAX(fg.RN * fg.StartRowGroup) OVER (PARTITION BY fg.CustID, fg.ClientID ORDER BY fg.StartDate)

Msg 102, Level 15, State 1, Line 45 Incorrect syntax near 'order'

However if I change it to the following it does execute.

DateGroup = MAX(fg.RN * fg.StartRowGroup) OVER (PARTITION BY fg.CustID, fg.ClientID)

Many thanks

Dave


#11

Hi Jason,

I'm struggeling a bit here, I just about understand what & how your SQL works. I'm not sure the 'Find6WKGap' is working correctly, either its not finding the gaps between the 2 sets of dates or its not 'resetting' the weeks counter...

Thanks

Dave


#12

Unfortunately, the following two lines of code aren't functionally equivalent.

DateGroup = MAX(fg.RN * fg.StartRowGroup) OVER (PARTITION BY fg.CustID, fg.ClientID ORDER BY fg.StartDate)
DateGroup = MAX(fg.RN * fg.StartRowGroup) OVER (PARTITION BY fg.CustID, fg.ClientID)

I'll see if I can find an alternate syntax...


#13

Here are two options... 1st one is keeping w/ the CTEs and the 2nd replaces them w/ temp tables.
They both produce the desired result but one should run better than the other against your production data.

Test Data...

IF OBJECT_ID('tempdb..#TestData', 'U') IS NOT NULL 
DROP TABLE #TestData;

CREATE TABLE #TestData (
	CustID INT NOT NULL,
	ClientID INT NOT NULL,
	StartDate DATE NOT NULL,
	EndDate DATE NOT NULL,
	PRIMARY KEY CLUSTERED (CustID, ClientID, StartDate)
	);
INSERT #TestData (CustID, ClientID, StartDate, EndDate) VALUES 
	(1000, 5000, '05/10/2017', '05/12/2017'),
	(1000, 5000, '05/16/2017', '05/16/2017'),
	(1000, 5000, '05/25/2017', '05/25/2017'),
	(1100, 5100, '05/01/2017', '05/05/2017'),
	(1100, 5100, '05/22/2017', '05/26/2017'),
	(1100, 5100, '07/10/2017', '07/12/2017'),
	(1200, 5200, '05/01/2017', '07/14/2017');

--==========================================================

#1 CTE...

WITH 
	cte_AddRN AS (
		SELECT 
			td.CustID, td.ClientID, td.StartDate, td.EndDate,
			RN = ROW_NUMBER() OVER (PARTITION BY td.CustID, td.ClientID ORDER BY td.StartDate)
		FROM
			#TestData td
		),
	cte_Find6wkGaps AS (
		SELECT 
			ar1.CustID, ar1.ClientID, ar1.StartDate, ar1.EndDate, ar1.RN, 
			Weeks = ISNULL(NULLIF(CEILING(DATEDIFF(dd, ar1.StartDate, ar1.EndDate) / 7.0), 0), 1),
			StartRowGroup = CASE WHEN DATEDIFF(dd, ISNULL(ar2.EndDate, '1900-01-01'), ar1.StartDate) / 7.0 > 6 THEN 1 ELSE 0 END
		FROM
			cte_AddRN ar1
			LEFT JOIN cte_AddRN ar2
				ON ar1.CustID = ar2.CustID
				AND ar1.ClientID = ar2.ClientID
				AND ar1.RN = ar2.RN + 1
		),
	cte_FillDateGroup AS (
		SELECT  
			fg1.CustID, fg1.ClientID, fg1.StartDate, fg1.EndDate, fg1.RN, fg1.Weeks, fg1.StartRowGroup, x.DateGroup
		FROM
			cte_Find6wkGaps fg1
			OUTER APPLY (
						SELECT 
							DateGroup = MAX(fg2.StartRowGroup * fg2.RN)
						FROM 
							cte_Find6wkGaps fg2
						WHERE 
							fg1.CustID = fg2.CustID
							AND fg1.ClientID = fg2.ClientID
							AND fg1.EndDate >= fg2.EndDate
						) x
		)
		SELECT TOP 1 WITH TIES 
			fdg.CustID, 
			fdg.ClientID, 
			SUM(fdg.Weeks)
		FROM
			cte_FillDateGroup fdg
		GROUP BY 
			fdg.CustID,
			fdg.ClientID,
			fdg.DateGroup
		ORDER BY 
			ROW_NUMBER() OVER (PARTITION BY fdg.CustID, fdg.ClientID ORDER BY fdg.DateGroup DESC);

#2 Temp Tables...

IF OBJECT_ID('tempdb..#AddRN', 'U') IS NOT NULL 
DROP TABLE #AddRN;

IF OBJECT_ID('tempdb..#Find6wkGaps', 'U') IS NOT NULL 
DROP TABLE #Find6wkGaps;

IF OBJECT_ID('tempdb..#FillDateGroup', 'U') IS NOT NULL 
DROP TABLE #FillDateGroup;

SELECT 
	td.CustID, td.ClientID, td.StartDate, td.EndDate,
	RN = ISNULL(ROW_NUMBER() OVER (PARTITION BY td.CustID, td.ClientID ORDER BY td.StartDate), 0)
	INTO #AddRN
FROM
	#TestData td;

SELECT 
	ar1.CustID, ar1.ClientID, ar1.StartDate, ar1.EndDate, ar1.RN, 
	Weeks = ISNULL(NULLIF(CEILING(DATEDIFF(dd, ar1.StartDate, ar1.EndDate) / 7.0), 0), 1),
	StartRowGroup = CASE WHEN DATEDIFF(dd, ISNULL(ar2.EndDate, '1900-01-01'), ar1.StartDate) / 7.0 > 6 THEN 1 ELSE 0 END
	INTO #Find6wkGaps
FROM
	#AddRN ar1
	LEFT JOIN #AddRN ar2
		ON ar1.CustID = ar2.CustID
		AND ar1.ClientID = ar2.ClientID
		AND ar1.RN = ar2.RN + 1;

SELECT  
	CustID = ISNULL(fg1.CustID, 0), ClientID = ISNULL(fg1.ClientID, 0), StartDate = ISNULL(fg1.StartDate, '1900-01-01'), fg1.EndDate, fg1.RN, fg1.Weeks, fg1.StartRowGroup, DateGroup = ISNULL(x.DateGroup, 0)
	INTO #FillDateGroup
FROM
	#Find6wkGaps fg1
	OUTER APPLY (
				SELECT 
					DateGroup = MAX(fg2.StartRowGroup * fg2.RN)
				FROM 
					#Find6wkGaps fg2
				WHERE 
					fg1.CustID = fg2.CustID
					AND fg1.ClientID = fg2.ClientID
					AND fg1.EndDate >= fg2.EndDate
				) x
ALTER TABLE #FillDateGroup ADD PRIMARY KEY CLUSTERED (CustID ASC, ClientID ASC, DateGroup DESC, StartDate ASC);

SELECT TOP 1 WITH TIES 
	fdg.CustID, 
	fdg.ClientID, 
	SUM(fdg.Weeks)
FROM
	#FillDateGroup fdg
GROUP BY 
	fdg.CustID,
	fdg.ClientID,
	fdg.DateGroup
ORDER BY 
	ROW_NUMBER() OVER (PARTITION BY fdg.CustID, fdg.ClientID ORDER BY fdg.DateGroup DESC);