As a one-time cleanup I don't think it is too bad ... but SQL will only do a 99% job, there are bound to be unexpected separator characters in the data that are not anticipated - but when they are found they can be added to the parsing-SQL ...
RegEx/etc. would more likely find more of them ... but may also match something NOT intended!!
If only Humans would enter the data cleanly in the first place ...
So basically we have a list of City and State names and Abbreviations, and we want to remove those from the end of any address string, where they match (and presumably store an ID for the State in the address record instead)
; with UserAddress(Address) as
(
select '124 Florida Avenue,Florida,FL' union all
select '320 California street California,CA' union all
select '333 Manhattan street, Manhattan NY' union all
select '123 Texas Avenue Texas TX' UNION ALL
select '124 Texas Avenue Texas, TX' UNION ALL
select '125 Texas Avenue Texas ,TX' UNION ALL
select '126 Texas Avenue Texas , TX' UNION ALL
select 'City Wrong State, Manhattan TX' union all
select 'Unknown City, Unknown, TX' union all
select 'Unknown State, Manhattan, XX'
)
SELECT [Address], [NewAddress] = CONVERT(varchar(100), NULL), [City] = CONVERT(varchar(100), NULL), [State] = CONVERT(varchar(2), NULL)
INTO #UserAddress
FROM UserAddress
--
; With City(CityName,State) as
(
Select 'Florida','FL' union all
Select 'California','CA' union all
Select 'Manhattan','NY' union all
Select 'Texas','TX'
)
SELECT CityName,State
INTO #City
FROM City
--
-- Remove any trailing spaces from [Address] - otherwise the rest of the code is full of RTrim statements)
UPDATE U
SET Address = RTrim(Address)
FROM #UserAddress AS U
WHERE Address LIKE '% '
--
-- Split off valid STATE abbrieviations
DECLARE @Address varchar(1000) -- Working storage
UPDATE U
SET
State = S.State,
@Address = RTrim(LEFT(Address, PATINDEX ('%[ ,;]' + S.State, Address))),
NewAddress = RTrim(CASE WHEN @Address LIKE '%[,;]' THEN LEFT(@Address, LEN(@Address)-1) ELSE @Address END)
FROM #UserAddress AS U
JOIN
(
SELECT DISTINCT State
FROM #City
) AS S
ON RTrim(Address) LIKE '%[ ,;]' + S.State
--
-- Split off valid CITY
DECLARE @City varchar(1000) -- Working storage
UPDATE U
SET
City = C.CityName,
@City = RTrim(LEFT(Address, PATINDEX ('%[ ,;]' + C.CityName, NewAddress))),
NewAddress = RTrim(CASE WHEN @City LIKE '%[,;]' THEN LEFT(@City, LEN(@City)-1) ELSE @City END)
FROM #UserAddress AS U
JOIN
(
SELECT DISTINCT CityName
FROM #City
) AS C
ON RTrim(NewAddress) LIKE '%[ ,;]' + C.CityName
--
-- Review results
SELECT *
FROM #UserAddress
--
DROP TABLE #UserAddress
GO
DROP TABLE #City
GO