SQLTeam.com | Weblogs | Forums

Remove duplicate entries from tables


#1

How to remove duplicate data from tables. Each table has a number of relation tables and a number of supporting view and dashboard.

The tables are used in live environment. Is there a solution to the problem


#2

Please explain how to define if rows are duplicate.

The following shows a table where the first two column "ought" to be unique, and thus:
col1=4, col2='D' has two duplicates
col1=6, col2='F' has one duplicate

create table dup_table(col1 int,col2 varchar(10),col3 date);

insert into dup_table values
   (1,'A','2018-01-01')
  ,(2,'B','2018-01-01')
  ,(3,'C','2018-01-01')
  ,(4,'D','2018-01-01')
  ,(4,'D','2018-01-02')
  ,(4,'D','2018-01-03')
  ,(5,'E','2018-01-01')
  ,(6,'F','2018-01-01')
  ,(6,'F','2018-01-02')
  ,(7,'G','2018-01-01')
  ,(8,'H','2018-01-01')
  ,(9,'I','2018-01-01')
;

select * from dup_table;

delete a
  from (select row_number() over(partition by col1,col2 order by col3 desc) as rn
          from dup_table
       ) as a
 where rn>1
;

select * from dup_table;

drop table dup_table;