SQLTeam.com | Weblogs | Forums

Delete duplicates needs help on row_number() Over part


#1

Hi,

I got into a dead end and could really appreciate some help.

Here is my sample code and data, and it worked.

create table #SampleData
( id uniqueidentifier primary key,
[key] nvarchar not null,
[key2] nvarchar not null,
created_at datetime not null

)

INSERT INTO #SampleData ([id], [key], [Key2], [created_at])
select newID(), 'ABC', 'aa', getdate() union
select newID(), 'ABC', 'aa', getdate()-1 union
select newID(), 'CBC', 'cc', getdate()-2 union
select newID(), 'CBC', 'cc', getdate() union
select newID(), 'CBC', 'cc', getdate()+1 union
select newID(), 'ZBC', 'zz', getdate()
GO

DELETE FROM #SampleData WHERE Id IN
(
SELECT Id FROM (
SELECT
Id
,ROW_NUMBER() OVER (PARTITION BY [key], [Key2] ORDER BY Id) AS [ItemNumber]
FROM
#SampleData --where [key] = 'CBC'
) a WHERE ItemNumber > 1 -- Keep only the first unique item
)

But my real data did not work, and the reason is that the ItemNumber returned are 1s for all of them. What did I miss?

Thanks!


#2

Row numbers are grouped by whatever you put in the PARTITION BY clause. In your example, you are partitioning by key and key2. That means, each distinct combination of key and key2 is considered as a group.

In your sample data, there are two rows for the combination 'ABC', 'aa'. So one of them will get deleted. In your actual data, if nothing gets deleted, that means you don't have any duplicates on key and key2.


#3

Hi

Please find your solution

;
WITH abc_cte
AS (
SELECT dense_rank() OVER (PARTITION BY [key],key2 ORDER BY id) AS dr
,id
,[key]
,key2
,created_at
FROM #SampleData
)
DELETE FROM abc_cte WHERE dr <> 1


#4

May be a pedantic point? and only a side effect of your sample data rather than your real data, but I would want to ORDER BY Date rather than a GUID - i.e. delete the oldest and retain the most recent, or something like that.


#5

Hi

I posted above .. I am posting the same thing again with nice formatting

;
WITH abc_cte
AS (
	SELECT dense_rank() OVER (PARTITION BY [key],key2 ORDER BY id) AS dr
		,id
		,[key]
		,key2
		,created_at
	FROM #SampleData
	)
DELETE
FROM abc_cte
WHERE dr <> 1