# Challenge: Find the correct order hierarchy

I bumped into a problem that in my opinion is really interesting. I found a relatively easy itinerary solution, but it completely lacks performance when applied to a bigger stack of records... and in the end it turned out to be even wrong!

Imagine a list of names with an underlying, BUT NOT KNOWN order: (we make it visible to make it easier to follow )

Peter, Paul, John, George, Mary, Anne, Eric, Thomas, Susan, Liz

The only thing you have are samples with subsets of those names, listed in the correct hierarchical order.
sample1: John, Anne, Susan
sample2: George, Thomas, Susan, Liz
sample3: Peter, Paul, Eric, Liz
sample4: Paul, Susan
sample5: Mary, Eric, Susan
sample6: Paul, George, Mary, Anne, Thomas
sample7: Paul, John, Mary, Eric

The goal is to find the correct order; or at least a possible correct order for the amount of given samples. Just imagine that there will be 400 names and 500'000 samples.

I post what I thought was a solution: Taking the average ranking within the samples as a measure. This will for sure bring #1 in Top position but lower ranked names will be calculated wrong. You'll need a splitter function that returns the name and its position. like this one -> http://www.sqlservercentral.com/articles/Tally+Table/72993/

``````Create TABLE #Sample
(
ID		int,
Sample varchar(100)
)
INSERT into #Sample (ID,Sample) values (1,'John,Anne,Susan'),(2,'George,Thomas,Susan,Liz'), (3,'Peter,Paul,Eric,Liz'),(4,'Paul,Susan'),(5,'Mary,Eric,Susan'),(6,'Paul,George,Mary,Anne,Thomas'), (7,'Paul,John,Mary,Eric')

Create TABLE #Names
(
ID		int,
Name	varchar(10),
rank	int
)
Insert into #Names
(
ID,
Name,
rank
)
SELECT ID, Item, ItemNumber

FROM #Sample
CROSS APPLY
(
SELECT	Item, Itemnumber FROM dbo.Split(Sample, ',')
) AS Y

select * from #Names

select name, 1.0*sum(rank)/count(rank) as ord
from #Names
Group by name
order by ord asc

drop Table #Sample
drop Table #Names``````

This is a solution but hardly applicable for the amount of record I have: I look for the name on #top, save his name in the #Ranking table, eliminate him from the samples, and restart again

``````Create TABLE #Ranking
(
ID	    int IDENTITY(1,1) NOT NULL,
Name    varchar(10),
rank    real
)

declare @loop int
set @loop = 1
declare @max int
set @max = (SELECT COUNT(DISTINCT Name) from #Names)

WHILE @loop <= @max
BEGIN

INSERT into #Ranking
(
Name,
rank
)
select TOP 1 Name, COALESCE(1.0*SUM(rank)/COUNT(rank),1) as ord from #Names where Name not in (SELECT Name from #Ranking) GROUP BY Name ORDER BY ord asc

update #Sample set Sample = Replace(Replace(Replace('XXX'+Replace(Replace(Sample,(Select TOP 1 Name from #Ranking order by ID desc),'') + 'XXX',',XXX',''),'XXX,',''),',,',','),'XXX','')

delete #Names

Insert into #Names
(
ID,
Name,
rank
)
SELECT ID, Item, ItemNumber

FROM #Sample
CROSS APPLY
(
SELECT	Item, COALESCE(ItemNumber,1) as ItemNumber FROM dbo.Split(Sample, ',')
) AS Y
WHERE Item <> ''

set @loop = @loop + 1

END
select * from #Ranking

drop Table #Sample
drop Table #Names
drop Table #Ranking``````

Maybe the code below?! Not sure how long it will run on a large sample table, but of course you can test that :-).

``````IF OBJECT_ID('tempdb.dbo.#names') IS NOT NULL
DROP TABLE #names
CREATE TABLE #names (
name varchar(100) not null,
sample_id int not null,
sequence smallint not null
)
--DROP INDEX names__CL ON #names;
CREATE UNIQUE CLUSTERED INDEX names__CL
ON #names ( name, sequence, sample_id )
WITH ( FILLFACTOR = 100 );

IF OBJECT_ID('tempdb.dbo.#results') IS NOT NULL
DROP TABLE #results
CREATE TABLE #results (
final_sequence int IDENTITY(1, 1) NOT NULL,
name varchar(100) NOT NULL
)
CREATE UNIQUE CLUSTERED INDEX results__CL
ON #results ( final_sequence )
WITH ( FILLFACTOR = 100 );

INSERT INTO #names
SELECT LTRIM(RTRIM(ds.Item)) AS name, s.id AS sample_id, ds.ItemNumber AS sequence
FROM #sample s
CROSS APPLY dbo.DelimitedSplit8K(sample, ',') ds

SELECT DISTINCT name FROM #names ORDER BY name --just to show all names

DECLARE @max_sequence_before_this_insert int

WHILE EXISTS(SELECT 1 FROM #names)
BEGIN
SELECT @max_sequence_before_this_insert = ISNULL(MAX(final_sequence), 0)
FROM #results

INSERT INTO #results
SELECT name
FROM #names
GROUP BY name
HAVING MAX(sequence) = 1

UPDATE #names
SET sequence = sequence - 1
WHERE sample_id IN (SELECT DISTINCT sample_id FROM #names
WHERE name IN (SELECT name FROM #results WHERE final_sequence > @max_sequence_before_this_insert))

DELETE FROM #names
WHERE name IN (SELECT name FROM #results WHERE final_sequence > @max_sequence_before_this_insert)

END /*WHILE*/

SELECT * FROM #results --show all names in hierarchical order (I hope!)``````