To assign number incrementally and re-inititialize when column value changes

Mateen · August 31, 2020, 7:01am

SQL Server 2008:

--alter proc question as

declare @t table (scanCode varchar(6), dates datetime, flag varchar(1))

insert @t ( scanCode, dates, flag)
select '182086','01 Jul 2020','P' union all
select'182086','02 Jul 2020','P' union all
select'182086','03 Jul 2020','A' union all
select'182086','04 Jul 2020','A' union all
select'182086','06 Jul 2020','P' union all
select'182086','07 Jul 2020','P' union all
select'182086','08 Jul 2020','P' union all
select'182086','09 Jul 2020','P' union all
select'182086','10 Jul 2020','A' union all
select'182086','11 Jul 2020','A' union all
select'182086','13 Jul 2020','A' union all
select'182086','14 Jul 2020','A'

select scanCode
, dates
, flag
, prn = row_number() over (partition by scanCode,flag order by scanCode, dates)
from @t t
order by t.dates

--currently the result is

/*
scanCode dates flag prn
182086 2020/07/01 P 1
182086 2020/07/02 P 2
182086 2020/07/03 A 1
182086 2020/07/04 A 2
182086 2020/07/06 P 3
182086 2020/07/07 P 4
182086 2020/07/08 P 5
182086 2020/07/09 P 6
182086 2020/07/10 A 3
182086 2020/07/11 A 4
182086 2020/07/13 A 5
182086 2020/07/14 A 6
*/

-- I want the result to be

/*
scanCode dates flag prn
182086 2020/07/01 P 1
182086 2020/07/02 P 2
182086 2020/07/03 A 1
182086 2020/07/04 A 2
182086 2020/07/06 P 1
182086 2020/07/07 P 2
182086 2020/07/08 P 3
182086 2020/07/09 P 4
182086 2020/07/10 A 1
182086 2020/07/11 A 2
182086 2020/07/13 A 3
182086 2020/07/14 A 4
*/

mike01 · August 31, 2020, 12:58pm

;with cte as(
select scanCode
, dates
, flag
, lag(scanCode) over (order by ScanCode, dates) PrevScanCode
, lag(flag) over (order by ScanCode, dates) Prevflag
from @t t),
cte1 as (
select scanCode
, dates
, flag
,Sum(Case when ScanCode = PrevScanCode and Flag = PrevFlag then 0 else 1 end ) over (order by scanCode, flag, dates rows unbounded preceding) as change
from cte)

select scanCode,
dates,
flag,
row_number() over (partition by change order by scancode, flag, dates) as rowID
from cte1
order by 1, 2

harishgg1 · August 31, 2020, 1:09pm

hi

Nice Mike

i just did some formatting .. please excuse me

;with cte as
    (
	select 
	      scanCode
        , dates
        , flag
        , lag(scanCode) over (order by ScanCode, dates) PrevScanCode
        , lag(flag)     over (order by ScanCode, dates) Prevflag
    from 
	    @t t
		),
   cte1 as 
        (
         select   scanCode
                , dates
                , flag
                , Sum(Case when ScanCode = PrevScanCode and Flag = PrevFlag then 0 else 1 end ) over (order by scanCode, flag, dates rows unbounded preceding) as change
         from 
		     cte
			 )
select 
     scanCode
   , dates
   , flag
   , row_number() over (partition by change order by scancode, flag, dates) as rowID
from 
    cte1
order by 
    1, 2

harishgg1 · August 31, 2020, 2:48pm

hi

this is another way of doing it .. using recursive cte .. please take it as demo purpose
.. Jeff please forgive me for the ultimate SIN ..

; with cte_rn  as
( 
select scancode,dates,flag , ROW_NUMBER() over( order by dates ) as rn, lag(flag) over(order by dates ) as lag_flag  from @t 
) , cte_grp as 
(
SELECT scancode,dates,flag,rn,lag_flag ,1 AS grp FROM   cte_rn  WHERE  rn = 1        
union all 
select a.scancode,a.dates,a.flag,a.rn,a.lag_flag ,case when a.lag_flag <> a.flag then b.grp + 1 else b.grp end as grp  FROM   cte_rn a  JOIN cte_grp b   ON a.rn = b.rn + 1
)
select scancode,dates,flag, ROW_NUMBER() over(partition by grp order by dates,grp)  from cte_grp

Mateen · August 31, 2020, 4:23pm

Thanks you all guys.
The solutions do work in SQL Server 2012 but unfortunately I want in sql server 2008
as I stated in the first line in my question.

ScottPletcher · August 31, 2020, 5:31pm

This should work, although I can't guarantee its efficiency. Hopefully it will perform as well as you need it to.

if object_id('tempdb.dbo.#first_rows') is not null
    drop table #first_rows

select identity(int, 1, 1) AS set#, t1.*
into #first_rows
from @t t1
outer apply (
    select top (1) *
    from @t t2
    where t2.dates < t1.dates
    order by dates desc
) as oa1
where oa1.dates is null or oa1.scanCode <> t1.scanCode or oa1.flag <> t1.flag
create unique clustered index first_rows__cl on #first_rows ( scanCode, dates ) with ( fillfactor = 100 );

--select * from #first_rows

select t.*, row_number() over(partition by ca1.set# order by t.dates) as prn
from @t t
cross apply (
    select top (1) fr.*
    from #first_rows fr 
    where fr.scanCode = t.scanCode and fr.dates <= t.dates
    order by fr.dates desc
) as ca1

Mateen · September 1, 2020, 4:47am

Yes! this worked exactly as I wanted.

Thank you very much

harishgg1 · September 1, 2020, 9:14am

hi mateen

i know Scott has given a solution for 2008 way .. many thanks to mike also for 2012 way ..

this is also ANOTHER 2008 way ..
Scott's is in 2 steps .. this is in 1 step ..

for any reason if this helps !! great :+

; with cte as 
( 
select *,ROW_NUMBER() over(order by dates ) as rn  from @t 
) , cte_lag as 
(
select row_number() over(order by a.dates ) as rn1, a.scanCode, a.dates,case when a.flag <> b.flag then 1 else 0 end as goingto_sum,a.flag as curr_flag from cte a left join cte b on a.rn = b.rn +1 
) , cte_sum_prev as 
( select   b.curr_flag,SUM(t.goingto_sum) as grp,b.scanCode,b.dates from cte_lag b inner join cte_lag t  on b.rn1 >= t.rn1 group by b.scanCode,b.dates,b.curr_flag
)
select scanCode, dates, curr_flag ,ROW_NUMBER() over(partition by grp order by dates) as rn_ok from cte_sum_prev

Mateen · September 1, 2020, 10:48am

This also works.

Great!

Mateen · September 1, 2020, 10:54am

BTW:

I actually needed my way of sequencing where flag='A' (absent)
other flags not so important.
If so then exploring net I found a simple query which is sequencing my way where flag='A'

select scancode, dates,flag,
row_number() over (partition by scancode, grp order by dates) as row_num
from (select t.*,
(row_number() over (order by dates) -
row_number() over (partition by scancode,flag order by dates)
) as grp
from @t t
) t
order by dates

Mateen · September 1, 2020, 11:07am

What I am upto?

To find employee who has been absent consecutive for nth days (say 2 or greater) with their starting and ending date.

How to find:
This may not be elegant but this what I think of.

To all above solutions add simple "serial_number" (rownumber function order by scancode,date)
and take into CTE.
then
a) by applying max on rn_ok easily find the row
scancode, date (ending_date) absentcount serial_number
182086 2020/07/02 2 2
182086 2020/07/14 4 12

b) use a subquery to find startdate whoose serial_number will always be equal to the following expression
start_date = serial_number - absent_count+1

harishgg1 · September 1, 2020, 11:24am

Anything we can discuss .. !! please let me know

there is a lot of back and forth . .. here in the forum ..
makes life much easier and simpler to chat live with each other and fix any issues !!

i can remote desktop to your machine also !!

Mateen · September 1, 2020, 12:03pm

<<i can remote desktop to your machine also !!>>
Great!

Whenever I am stuck I will certainly post a question for help here.

This is a great forum.

harishgg1 · September 1, 2020, 1:17pm

hi

i tried to do what you are trying !!

Scenario 1 .. Data .. Please click arrow to the left

drop table sample_data 
go 

create table sample_data 
(
employee_id int, 
start_date date , 
end_date date 
)
go 

insert into sample_data select 100 , '2010-09-10','2010-09-15'
insert into sample_data select 200 , '2010-09-10','2010-09-11'
insert into sample_data select 300 , '2010-09-10','2010-09-20'

select 'SQL Output',
   * 
from 
   sample_data 
where 
   datediff(dd,start_date , end_date  ) > = 2 
go

Scenario 2 .. Data .. Please click arrow to the left

drop table sample_data 
go 

create table sample_data 
(
employee_id int, 
date_entry date 
)
go 

insert into sample_data select 100 , '2010-09-10'
insert into sample_data select 100 , '2010-09-15'

insert into sample_data select 200 , '2010-09-10'
insert into sample_data select 200 , '2010-09-11'

; with cte_rn as 
( 
select ROW_NUMBER() over(partition by employee_id order by date_entry)  as rn, * from sample_data
) 
select 'SQL Output ' , 
   a.* 
from 
  cte_rn a 
    join 
  cte_rn b 
     on a.employee_id = b.employee_id and a.rn+1 = b.rn 
where 
  datediff(dd,a.date_entry ,b.date_entry  ) > = 2 
go

Mateen · September 2, 2020, 4:40am

Brilliant idea!!!

As soon as I create my query on actual data, I will post my solution with table schema and actual one month data (insert statement in text file) and hope you hard working helping people come with better and faster solution.

Thanks to you and all.

Mateen · September 2, 2020, 11:54am

declare @t table (scanCode varchar(6), dates datetime, flag varchar(1))

insert @t ( scanCode, dates, flag)
select '182086','01 Jul 2020','P' union all
select'182086','02 Jul 2020','P' union all
select'182086','03 Jul 2020','A' union all
select'182086','04 Jul 2020','A' union all
select'182086','06 Jul 2020','P' union all
select'182086','07 Jul 2020','P' union all
select'182086','08 Jul 2020','P' union all
select'182086','09 Jul 2020','P' union all
select'182086','10 Jul 2020','A' union all
select'182086','11 Jul 2020','A' union all
select'182086','13 Jul 2020','A' union all
select'182086','14 Jul 2020','A'

;WITH
N AS
(
SELECT
*,
rn1 = ROW_NUMBER() OVER (
PARTITION BY T.scanCode
ORDER BY T.dates),
rn2 = ROW_NUMBER() OVER (
PARTITION BY T.scanCode
ORDER BY T.flag, T.dates)
FROM @t AS T
)
,
N2 as (
SELECT
N.scanCode,
N.dates,
N.flag,
prn = ROW_NUMBER() OVER (
PARTITION BY N.scanCode, (N.rn1 - N.rn2)
ORDER BY N.dates)
, serialNumber = row_number() over(order by n.scanCode, n.dates)
FROM N
where flag='A'
),
final as
(
select scanCode, prn = max(prn)
from n2
where prn = 2
group by scanCode
)
select f.scanCode
, start_date = ( select dates
from N2 N3
where N3.serialNumber = N2.serialNumber - n2.prn+1)
, end_date = n2.dates
, absentcount = n2.prn
from final f
join n2 on (f.scanCode = n2.scanCode and f.prn=n2.prn)

scanCode start_date end_date absentcount
182086 2020-07-03 2020-07-04 2
182086 2020-07-10 2020-07-11 2 --wrong
182086 2020-07-10 2020-07-14 4 --correct

Mateen · September 2, 2020, 12:19pm

Hi harishgg1

incorrect result
scanCode start_date end_date absentcount
182086 2020-07-03 2020-07-04 2
182086 2020-07-10 2020-07-11 2

correct result
scanCode start_date end_date absentcount
182086 2020-07-03 2020-07-04 2
182086 2020-07-10 2020-07-14 4

I am stuck again.

Help please.

harishgg1 · September 2, 2020, 1:36pm

hi mateen

there is a problem with your data ..
how are you getting this result
correct result
scanCode start_date end_date absentcount
182086 2020-07-03 2020-07-04 2
182086 2020-07-10 2020-07-14 4

for this 182086 2020-07-03 2020-07-04 2 .. ok !!!!
for this 182086 2020-07-10 2020-07-14 4 ... there is gap in data 10 , 11 ,13 ,14 ..

so i changed the data you gave to 10,11,12,13 ...

This is my attempt ..

please click arrow to the left for my Changed Data

declare @t table (scanCode varchar(6), dates datetime, flag varchar(1))

insert @t ( scanCode, dates, flag)
select '182086','01 Jul 2020','P' union all
select'182086','02 Jul 2020','P' union all
select'182086','03 Jul 2020','A' union all
select'182086','04 Jul 2020','A' union all
select'182086','06 Jul 2020','P' union all
select'182086','07 Jul 2020','P' union all
select'182086','08 Jul 2020','P' union all
select'182086','09 Jul 2020','P' union all
select'182086','10 Jul 2020','A' union all
select'182086','11 Jul 2020','A' union all
select'182086','12 Jul 2020','A' union all
select'182086','13 Jul 2020','A'

select * from @t

; with cte as 
(
     select 
	          ROW_NUMBER() OVER(ORDER BY dates) AS ROW_NUMBER
		  ,   DATEDIFF(D, ROW_NUMBER() OVER(ORDER BY dates), dates) AS Diff
		  ,   * 
     from 
	     @t 
  	 where 
	    flag = 'A'	
)
select 'SQL Output '
  , scanCode 
  , min(dates) as start_date
  , max(dates) as end_date 
  , datediff(dd,min(dates),max(dates))+1 
from 
  cte 
group by 
  scancode , diff

Mateen · September 2, 2020, 4:07pm

Hi harishgg1.

The gap in data is genuine.
The missing date or dates may be employees rest day or it may be gazetted holiday and this certainly will not count as absent.

Mateen · September 2, 2020, 4:11pm

<<how are you getting this result
correct result
scanCode start_date end_date absentcount
182086 2020-07-03 2020-07-04 2
182086 2020-07-10 2020-07-14 4>>

I am not getting the correct result.
I mean the correct result would be.