T-sql 2012 compare 2 strings

In a sql server 2012 database, I want to find all the records where the lastname is not at least 'part' of the teacherD field.
The following sql works about 90% of the time:
select lastname,teacherD
from test.dbo.table145
WHERE CHARINDEX(lower(lastName),lower([teacherD])) = 0
Different parts:

  1. The problem is sometimes the teacherD field has special characters right next to the time. The above sql thinks there is a difference.
    Here are examples of where there are problems

lastname --- teacherD
smith -- smith, darell
smith -- smith-bright joan

  1. If possible the user would also like to check if part of lastname is contained within teacherD.
    Here are some examples of what I am referring to:

lastname -- teacherD
corbly-tip -- corbly, annette b
baker-hage -- cubs - bakerhage - 8c
king grie -- king, bev

Thus would you show me some sql on how to solve part #1 and part #2 if possible?

is this homework or real world scenario?
if real world why is the data in this situation? if real world I could recommend a change, are you open to change or do you have the power to make changes?

exercise in futility. this will fail guaranteed with any possibility of data delimiters. get your data in order otherwise you will need this type of cirque du soleil gymnastics to sort things out.

Below code NOT RECOMMENDED !

create table #sample(id int identity(1,1), LastName nvarchar(50), TeacherD varchar(50))

insert into #sample
select 'corbly-tip', 'corbly, annette b' union
select 'baker-hage', 'cubs - bakerhage - 8c' union
select 'king grie', 'king, bev' 

create table #lastnames(id int, lastname varchar(50))
create table #teacherDs(id int, teacherD varchar(50))


create table #clean_sample(id int, LastName nvarchar(50), TeacherD varchar(50), LastNameClean nvarchar(50), TeacherDClean varchar(50))
insert into #clean_sample
select id, LastName, TeacherD, replace(replace(lastname,',',' '),'-',' '),  replace(replace(TeacherD,',',' '),'-',' ')  from #sample

;with details
as
(
	SELECT A.id,  
		Split.a.value('.', 'VARCHAR(100)') AS Data  
	FROM  
	(
		SELECT id,  
			 cast(REPLACE('<r>' + LastNameClean + '</r>', SPACE(1), '</r><r>') as xml) as Data
		FROM  #clean_sample
	) AS A CROSS APPLY Data.nodes ('/r') AS Split(a)
)
insert into #lastnames
select a.id,       
	   b.Data as lastname
  from #clean_sample a
  join details b on a.id = b.id

;with details
as
(
	SELECT A.id,  
		Split.a.value('.', 'VARCHAR(100)') AS Data  
	FROM  
	(
		SELECT id,  
			 cast(REPLACE('<r>' + TeacherDClean + '</r>', SPACE(1), '</r><r>') as xml) as Data
		FROM  #clean_sample
	) AS A CROSS APPLY Data.nodes ('/r') AS Split(a)
)
insert into #teacherDs
select a.id,       
	   b.Data as teacherD
  from #clean_sample a
  join details b on a.id = b.id
  where b.Data <> ''
    	
--select * From #clean_sample

--create table #lastnames(id int, lastname varchar(50))
--insert into #lastnames

--;with cte
--as
--(
--select id, 
--	    cast(REPLACE('<r>' + LastNameClean + '</r>', SPACE(1), '</r><r>') as xml) as snap
--	From #clean_sample
--)
--select snap.value('(/r)[text]', 'varchar(50)') as lastname
--from cte


select distinct s.* 
  from #sample s
  join #lastnames ln on s.id = ln.id
  join #teacherDs td on s.id = td.id
  where s.lastname like '%' + ln.lastname + '%'
    or s.TeacherD like '%' + td.teacherD + '%'
--select * from #teacherDs

drop table #sample
drop table #clean_sample
drop table #lastnames
drop table #teacherDs

This is a real life situation. I cannot change the data.

Could you show me the first part of the question by possbibly using regular expressions or whatever logic you think would work?

hi

i tried to figure this out !!!!

i think you can remove the special characters ..
and do your query

:slight_smile:
:slight_smile:

select lastname,teacherD
from test.dbo.table145
WHERE CHARINDEX(lower(lastName),lower([teacherD])) = 0

-- here remove special characters in teacherID
-- OR only get letters in teacherID field

another way to do this !!!!

is create a function and use it

i seached google and found this
https://www.sqlservercentral.com/Forums/Topic1494406-391-1.aspx

please note Jeff Moden in the article
hi jeff :slight_smile
:slight_smile:

If you think its expensive to hire a professional to do the job, wait until you hire an amateur.
I love this quote Jeff

especially in India piece of shit country .. poverty .. nobody wants to PAY
how do I pay for my vacation in hawaii :slight_smile:
this is just my opinion .. please no insult to anybody
everybody has their own experience

function ..
CREATE FUNCTION [dbo].[fn_GetAlphabetsOnly](@input  *VARCHAR* (50)) 
returns  *VARCHAR* (1000) AS 
BEGIN 
WHILE  *Patindex* ('%[^a-z]%',@input) &amp;gt; 
0 
SET @input =  *Stuff* (@input, *Patindex* ('%[^a-z]%',@input),1,'') 
RETURN @input 
END

hope it helps
:slight_smile:
:slight_smile:

hi

Simple way of doing this
only replacing - character if thats only character you expect

Simple SQL
SELECT lastname, 
       teacherid 
FROM   data 
WHERE  Replace([teacherid], '-', '') LIKE '%' + Replace(lastname, '-', '') + '%'