SQLTeam.com | Weblogs | Forums

Add a running number column based on a text value

tsql

#1

Hi All,

I am trying to add a running number to differentiate between set of rows belonging to a distinct ID. tried Rank and row number but had no success. my source table is as following

create table #SourceTable (ID int, SomeText varchar(50))
insert into #SourceTable values (1111,'Header1')
insert into #SourceTable values (1111,'dtl1')
insert into #SourceTable values (1111,'dtl2')
insert into #SourceTable values (1111,'dtl3')
insert into #SourceTable values (1111,'Header2')
insert into #SourceTable values (1111,'dtl1')
insert into #SourceTable values (1111,'dtl2')
insert into #SourceTable values (1111,'Header3')
insert into #SourceTable values (1111,'dtl1')
insert into #SourceTable values (1111,'dtl2')
insert into #SourceTable values (2222,'Header1')
insert into #SourceTable values (2222,'dtl1')
insert into #SourceTable values (2222,'dtl2')
insert into #SourceTable values (2222,'Header2')
insert into #SourceTable values (2222,'dtl1')
insert into #SourceTable values (9990,'Header1')
insert into #SourceTable values (9990,'dtl1')
insert into #SourceTable values (9990,'dtl2')

select * from #SourceTable
drop table #SourceTable

and my Desired output is as following

create table #DesiredTable (ID int, SomeText varchar(50),Identifier int)
insert into #DesiredTable values (1111,'Header1',1)
insert into #DesiredTable values (1111,'dtl1',1)
insert into #DesiredTable values (1111,'dtl2',1)
insert into #DesiredTable values (1111,'dtl3',1)
insert into #DesiredTable values (1111,'Header2',2)
insert into #DesiredTable values (1111,'dtl1',2)
insert into #DesiredTable values (1111,'dtl2',2)
insert into #DesiredTable values (1111,'Header3',3)
insert into #DesiredTable values (1111,'dtl1',3)
insert into #DesiredTable values (1111,'dtl2',3)
insert into #DesiredTable values (2222,'Header1',1)
insert into #DesiredTable values (2222,'dtl1',1)
insert into #DesiredTable values (2222,'dtl2',1)
insert into #DesiredTable values (2222,'Header2',2)
insert into #DesiredTable values (2222,'dtl1',2)
insert into #DesiredTable values (9990,'Header1',1)
insert into #DesiredTable values (9990,'dtl1',1)
insert into #DesiredTable values (9990,'dtl2',1)

select * from #DesiredTable
drop table #DesiredTable

Note: The Text (Header1, Header2, Header3) will always be constant and they will appear in the same ascending order for a given ID. the details will be changing.

how can i achieve this? Appreciate any help.


Create and return a date string from calculated years, months, and days
#2

I modified your source table to include a primary key (identity in this case, but not sure what yours is)

create table #SourceTable (UniqueKey int identity(1,1),
						ID int, SomeText varchar(50))




;with cte as
(select distinct SomeText, dense_Rank() over (order by sometext) GroupNum
   from #SourceTable
  where SomeText like 'Header%'),
cte_ID as
(select s.UniqueKey, s.ID, s.SomeText, c.GroupNum,
		lead(s.UniqueKey,1,0) over (order by s.UniqueKey) NextUniqueKey   
   from #SourceTable s
	join cte c
		on s.SomeText = c.Sometext)

select s.ID, s.SomeText, c.GroupNum Identifier
 from #SourceTable s
	join cte_id c
		on s.UniqueKey >= c.UniqueKey
		and (s.UniqueKey < c.NextUniqueKey or c.NextUniqueKey = 0)

#3

Awesome! a perfect solution. Appreciate your help .


#4

@mike01 - Also, is it possible to do a similar thing for records found before "Header1" for each ID and mark all such records as 0? following is the new source and desired tables. please help.

create table #SourceTable (UniqueKey int identity(1,1),ID int, SomeText varchar(50))

insert into #SourceTable values (1111,'someRandomText')
insert into #SourceTable values (1111,'someOtherRandomText')
insert into #SourceTable values (1111,'NonHeaderText')
insert into #SourceTable values (1111,'Header1')
insert into #SourceTable values (1111,'dtl1')
insert into #SourceTable values (1111,'dtl2')
insert into #SourceTable values (1111,'dtl3')
insert into #SourceTable values (1111,'Header2')
insert into #SourceTable values (1111,'dtl1')
insert into #SourceTable values (1111,'dtl2')
insert into #SourceTable values (1111,'Header3')
insert into #SourceTable values (1111,'dtl1')
insert into #SourceTable values (1111,'dtl2')
insert into #SourceTable values (2222,'someRandomText')
insert into #SourceTable values (2222,'Header1')
insert into #SourceTable values (2222,'dtl1')
insert into #SourceTable values (2222,'dtl2')
insert into #SourceTable values (2222,'Header2')
insert into #SourceTable values (2222,'dtl1')
insert into #SourceTable values (1111,'someOtherRandomText')
insert into #SourceTable values (1111,'NonHeaderText')
insert into #SourceTable values (9990,'Header1')
insert into #SourceTable values (9990,'dtl1')
insert into #SourceTable values (9990,'dtl2')

select * from #SourceTable
drop table #SourceTable

and desired table is as following

create table #DesiredTable (ID int, SomeText varchar(50),Identifier int)

insert into #DesiredTable values (1111,'someRandomText',0)
insert into #DesiredTable values (1111,'someOtherRandomText',0)
insert into #DesiredTable values (1111,'NonHeaderText',0)
insert into #DesiredTable values (1111,'Header1',1)
insert into #DesiredTable values (1111,'dtl1',1)
insert into #DesiredTable values (1111,'dtl2',1)
insert into #DesiredTable values (1111,'dtl3',1)
insert into #DesiredTable values (1111,'Header2',2)
insert into #DesiredTable values (1111,'dtl1',2)
insert into #DesiredTable values (1111,'dtl2',2)
insert into #DesiredTable values (1111,'Header3',3)
insert into #DesiredTable values (1111,'dtl1',3)
insert into #DesiredTable values (1111,'dtl2',3)
insert into #DesiredTable values (2222,'someRandomText',0)
insert into #DesiredTable values (2222,'Header1',1)
insert into #DesiredTable values (2222,'dtl1',1)
insert into #DesiredTable values (2222,'dtl2',1)
insert into #DesiredTable values (2222,'Header2',2)
insert into #DesiredTable values (2222,'dtl1',2)
insert into #DesiredTable values (1111,'someOtherRandomText',0)
insert into #DesiredTable values (1111,'NonHeaderText',0)
insert into #DesiredTable values (9990,'Header1',1)
insert into #DesiredTable values (9990,'dtl1',1)
insert into #DesiredTable values (9990,'dtl2',1)

select * from #DesiredTable
drop table #DesiredTable

Highly appreciate your help with this on. Thanks @mike01


Create and return a date string from calculated years, months, and days
#5

Sure, plus the join to the CTE was wrong anyway

;with cte as
(select distinct SomeText, dense_Rank() over (order by sometext) GroupNum
   from #SourceTable
  where SomeText like 'Header%'),
cte_ID as
(select s.UniqueKey, s.ID, s.SomeText, c.GroupNum,
		lead(s.UniqueKey,1,0) over (order by s.UniqueKey) NextUniqueKey   
   from #SourceTable s
	join cte c
		on s.SomeText = c.Sometext)


select s.ID, s.SomeText, IsNull(c.GroupNum,0) Identifier
 from #SourceTable s
	left join cte_id c
		on s.UniqueKey >= c.UniqueKey
		and s.ID = c.ID
		and (s.UniqueKey < c.NextUniqueKey or c.NextUniqueKey = 0)

#6

@mike01 - Thanks Mike. That worked like a charm! Appreciate all your help.

But now i ended up looking for an alternative method as the query when executed against 70 k IDs takes more than an hour. tried replacing the CTEs with Temp tables and indexes on it , but that doesn't seem to help. please do suggest any alternative methods (specially for large sets of data) if you will.

Have a great day!


#7

hi

i know this topic is from long ago

I tried something different ..
recursive cte
"Seniors" have suggested recursive cte is not good for performance

If t helps GREAT
:slight_smile:
:slight_smile:

drop create data ..
use tempdb 
go 

drop table SourceTable 
create table SourceTable (ID int, SomeText varchar(50))
insert into SourceTable values (1111,'Header1')
insert into SourceTable values (1111,'dtl1')
insert into SourceTable values (1111,'dtl2')
insert into SourceTable values (1111,'dtl3')
insert into SourceTable values (1111,'Header2')
insert into SourceTable values (1111,'dtl1')
insert into SourceTable values (1111,'dtl2')
insert into SourceTable values (1111,'Header3')
insert into SourceTable values (1111,'dtl1')
insert into SourceTable values (1111,'dtl2')
insert into SourceTable values (2222,'Header1')
insert into SourceTable values (2222,'dtl1')
insert into SourceTable values (2222,'dtl2')
insert into SourceTable values (2222,'Header2')
insert into SourceTable values (2222,'dtl1')
insert into SourceTable values (9990,'Header1')
insert into SourceTable values (9990,'dtl1')
insert into SourceTable values (9990,'dtl2')
go 

select * from SourceTable
go
SQL .. Recursive CTE
; WITH cte 
     AS (SELECT Row_number() 
                  OVER( 
                    ORDER BY (SELECT NULL)) AS rn, 
                * 
         FROM   sourcetable), 
     rec_cte 
     AS (SELECT *, 
                RIGHT(sometext, 1) AS grp 
         FROM   cte 
         WHERE  rn = 1 
         UNION ALL 
         SELECT a.*, 
                CASE 
                  WHEN a.sometext LIKE 'Header%' THEN RIGHT(a.sometext, 1) 
                  ELSE b.grp 
                END 
         FROM   cte a 
                JOIN rec_cte b 
                  ON a.rn = b.rn + 1) 
SELECT * 
FROM   rec_cte 

go
Results


#8

Can you provide execution plan? Also, are you using wildcard before the Search Word (i.e. %Header% instead of Header%)? The second one would be better since we could actually get an index seek using like. The first one will always be a scan and much slower. If the first one is used, then I would think that is where the issue is and would recommend maybe putting in a Full Text Index and using the FTI syntax contains


#9

hi mike ..
heres the execution plan ..

.i have done some performance tuning in the past
i have idea about logical reads index seek index scan sort expensive

:slight_smile:
:slight_smile:

Execution Plan


#10

just what I thought. There are table scans on both tables, so you must be using wild cards at the beginning and end which will cause this. Maybe FTI will be better/faster solution


#11

i am sure what you said is right Mike ..
Thank You
:slight_smile:
:slight_smile:

Table Scan if you can avoid will help performance
in general


#12

In My case, I was only using 'Header%'.


#13

@harishgg1 - Will try this. Thanks for your input.