SQLTeam.com | Weblogs | Forums

Find the duplicate record based on two tables


#1

Overview:

We have 2 tables (Primay & Duplicate). The primary table contains the unique id with other columns and the duplicate table contains with the unique id with multple duplicate row of data.

Problem:

We want to mark the duplicate record on the duplicate table based on the primary first name, last name and date of birth columns and only want to keep those records in duplicate table that are unique and id exists in the primary table.

Table;

Primary table

id parentid firstname last name DOB Gender Status codenumber

18286961 NULL Barry Delaney 1971-07-23 Male NOT ACTIVE 147721948
18286965 NULL Philippe Di marco 1970-03-28 Male ACTIVE 050025200
18286969 NULL James Dolan 1951-08-28 Male ACTIVE 137463178
18286973 NULL Frantz Dorcin 1961-03-21 Male NOT ACTIVE 157768772
18286977 NULL Herman Duroseau 1970-12-09 Male NOT ACTIVE 443677694
18286981 NULL Marie Edmond 1982-04-26 Female ACTIVE 052027935
18286985 NULL Lee ellen Ferguson 1969-07-01 Female NOT ACTIVE 008647504
18286989 NULL Juan Giraldo 1977-11-27 Male ACTIVE 768342209
18286993 NULL Matthieu Godard 1979-03-26 Male ACTIVE 075964792
18286997 NULL Maritza Gonzalez 1965-04-17 Female ACTIVE 051764449
NULL NULL NULL NULL NULL NULL NULL NULL

Duplicate table

id parentid firstname last name DOB Gender Status codenumber

37438893 NULL Scott Nadell 1958-07-28 M ACTIVE QCWE4305UQKV
37438897 NULL Nancy Halpern 1962-07-03 F ACTIVE CEDL1806ULTR
37438901 NULL Janice Fey 1955-05-17 F ACTIVE WBSG0953MPQI
37438905 NULL Frances Albanese 1956-10-21 F ACTIVE VXSK4605KOLN
37438909 NULL Michael Lomolino 1982-07-24 M ACTIVE DXVQ4762SOQJ
37438917 NULL Nancy Hernandez 1961-10-08 F ACTIVE QVHB1697LTJH
37438925 NULL Roberto Espinosa 1960-01-23 M ACTIVE JULW1243RKUL
37438929 NULL Abhishek Tiwari 1984-08-17 M ACTIVE EVWJ8653KOSG
37438933 NULL Ivonne Carpio Sotomayor 1983-09-02 F ACTIVE KZPI6452RPSQ
37438937 NULL Adrian Mioduszewski 1987-11-02 M ACTIVE ZBGE0284LJPK
18286997 NULL Maritza Gonzalez 1965-04-17 Female ACTIVE 051764449
18286991 NULL Maritza Gonzalez 1965-04-17 Female ACTIVE 051764449
NULL NULL NULL NULL NULL NULL NULL NULL

So in the above example we want to mark delete in duplicate table for this record a

18286991 NULL Maritza Gonzalez 1965-04-17 Female ACTIVE 051764449

Any ideas how we can do that?


#2

you need to provide DDL to create your tables and data. I am not sure I fully understand your requirements but the query below at least provide required result.

create table PrimaryTable
(
id int,
parentid int,
firstname varchar(100),
lastname varchar(100),
DOB DATETIME,
Gender varchar(10),
Status varchar(20),
codenumber varchar(20)
)

create table DuplicateTable
(
id int,
parentid int,
firstname varchar(100),
lastname varchar(100),
DOB DATETIME,
Gender varchar(10),
Status varchar(20),
codenumber varchar(20)
)

insert into PrimaryTable
values
(18286961, NULL, 'Barry', 'Delaney', '1971-07-23', 'Male', 'NOT ACTIVE', '147721948'),
(18286965, NULL, 'Philippe', 'Di marco', '1970-03-28', 'Male', 'ACTIVE', '050025200'),
(18286969, NULL, 'James', 'Dolan', '1951-08-28', 'Male', 'ACTIVE', '137463178'),
(18286973, NULL, 'Frantz', 'Dorcin', '1961-03-21', 'Male', 'NOT ACTIVE', '157768772'),
(18286977, NULL, 'Herman', 'Duroseau', '1970-12-09', 'Male', 'NOT ACTIVE', '443677694'),
(18286981, NULL, 'Marie', 'Edmond', '1982-04-26', 'Female', 'ACTIVE', '052027935'),
(18286985, NULL, 'Lee ellen', 'Ferguson', '1969-07-01','Female', 'NOT ACTIVE', '008647504'),
(18286989, NULL, 'Juan', 'Giraldo', '1977-11-27', 'Male', 'ACTIVE', '768342209'),
(18286993, NULL, 'Matthieu', 'Godard', '1979-03-26', 'Male', 'ACTIVE', '075964792'),
(18286997, NULL, 'Maritza', 'Gonzalez', '1965-04-17', 'Female','ACTIVE', '051764449')

insert into DuplicateTable
values
(37438893, NULL, 'Scott', 'Nadell', '1958-07-28', 'M', 'ACTIVE', 'QCWE4305UQKV'),
(37438897, NULL, 'Nancy', 'Halpern', '1962-07-03', 'F', 'ACTIVE', 'CEDL1806ULTR'),
(37438901, NULL, 'Janice', 'Fey', '1955-05-17', 'F', 'ACTIVE', 'WBSG0953MPQI'),
(37438905, NULL, 'Frances', 'Albanese', '1956-10-21', 'F', 'ACTIVE', 'VXSK4605KOLN'),
(37438909, NULL, 'Michael', 'Lomolino', '1982-07-24', 'M', 'ACTIVE', 'DXVQ4762SOQJ'),
(37438917, NULL, 'Nancy', 'Hernandez', '1961-10-08', 'F', 'ACTIVE', 'QVHB1697LTJH'),
(37438925, NULL, 'Roberto', 'Espinosa', '1960-01-23', 'M', 'ACTIVE', 'JULW1243RKUL'),
(37438929, NULL, 'Abhishek', 'Tiwari', '1984-08-17', 'M', 'ACTIVE', 'EVWJ8653KOSG'),
(37438933, NULL, 'Ivonne Carpio', 'Sotomayor', '1983-09-02', 'F', 'ACTIVE', 'KZPI6452RPSQ'),
(37438937, NULL, 'Adrian', 'Mioduszewski', '1987-11-02', 'M', 'ACTIVE', 'ZBGE0284LJPK'),
(18286997, NULL, 'Maritza', 'Gonzalez', '1965-04-17', 'Female', 'ACTIVE', '051764449'),
(18286991, NULL, 'Maritza', 'Gonzalez', '1965-04-17', 'Female', 'ACTIVE', '051764449')

select d.* from PrimaryTable p
join DuplicateTable d
on p.firstname = d.firstname and p.lastname = d.lastname and p.DOB = d.DOB
where p.id <> d.id


#3
select
 'c'=Case when d.firstname =p.firstname and d.id= p.id and d.DOB =p.DOB then 'Keep' else 'Delete' end,
  d.* from #DpTbl as d
left join #PrTbl as p
on d.firstname=p.firstname and d.id =p.id