SQLTeam.com | Weblogs | Forums

How to remove duplicate values from all tables of database

tsql

#1

Hi all,
I am looking for a dynamic script which can be helpful to remove duplicate rows (based on all columns of table) from all tables of my database.
My database contains more then 100 tables. No relations defined. No identity columns exists.

In other words I just want following script for all tables.

;WITH CTE
AS
(SELECT *,ROW_NUMBER() OVER (PARTITION BY col1, col2, col3.....coln ORDER BY col1) DupRow
FROM MyTable)

DELETE FROM CTE WHERE DupRow >1


#2

using while loop you can remove duplicates

Create Table #tables (tID Int, SchemaName Nvarchar(256), TableName Nvarchar(256))

Insert #tables
Select Row_Number() Over (Order By s.name, so.name), s.name, so.name
From sysobjects so
Join sys.schemas s
On so.uid = s.schema_id
Where so.xtype = 'U'

Declare @SQL Nvarchar(Max),
@schema Nvarchar(256),
@table Nvarchar(256),
@iter Int = 1

While Exists (Select 1
From #tables
Where tID = @iter
)
Begin

    Select  @schema = SchemaName,
            @table = TableName
    From    #tables
    Where   tID = @iter
   
    Set     @Sql = 'Delete t from [' + @schema + '].[' + @table + '] t' --change the statement as per your criteria

    Exec    sp_executeSQL @SQL;
          
    Set     @iter = @iter + 1

End


#3

Thanks @viggneshwar
Here how i have achieved it.

CREATE TABLE #tables
    (
      tID INT,
      SchemaName NVARCHAR(256),
      TableName NVARCHAR(256)
    )
GO
INSERT  #tables
        SELECT  Row_Number() OVER ( ORDER BY s.name, so.name ),
                s.name,
                so.name
        FROM    sysobjects so
                JOIN sys.schemas s ON so.uid = s.schema_id
        WHERE   so.xtype = 'U'
GO

DECLARE @SQL NVARCHAR(MAX),
    @schema NVARCHAR(256),
    @table NVARCHAR(256),
    @iter INT,
    @columns NVARCHAR(MAX)
SET @iter = 1

WHILE EXISTS ( SELECT   1
               FROM     #tables
               WHERE    tID = @iter )
    BEGIN

        SELECT  @schema = SchemaName,
                @table = TableName
        FROM    #tables
        WHERE   tID = @iter

        SET @columns = ''
        SELECT  @columns = @columns + ', [' + NAME + ']'
        FROM    SYS.COLUMNS
        WHERE   OBJECT_ID = OBJECT_ID(@table)
        SELECT  @columns = SUBSTRING(@COLUMNS, 2, LEN(@COLUMNS))

        SET @Sql = ';WITH CTE
							AS
							(SELECT *,ROW_NUMBER() OVER (PARTITION BY '
            + @columns + ' ORDER BY (SELECT NULL)) DupRow 
							FROM ' + @schema + '.' + @table + ')

							DELETE FROM CTE WHERE DupRow >1' 
        PRINT ( @SQL )

        SET @iter = @iter + 1
    END
GO
DROP TABLE #tables