SQLTeam.com | Weblogs | Forums

Recursive hierarchy


#1

--Source
DECLARE @TBL TABLE(ID1 INT, ID2 INT, Grp INT)
INSERT INTO @TBL (ID1,ID2)VALUES (1,2),(3,4),(9,10),(7,8),(8,9),(2,3),(10,20)
SELECT * FROM @TBL

--Final output should be this:
DECLARE @TBL1 TABLE(ID1 INT, ID2 INT, Grp INT)
INSERT INTO @TBL1 (ID1,ID2,Grp)VALUES (1,2,1),(3,4,1),(9,10,2),(7,8,2),(8,9,1),(2,3,1),(10,20,null)
SELECT * FROM @TBL1

The way it works is allocating groupings to values available on the left having a matching value available in another row on the right.

For Example: first row ID1=1 needs to be matched for values in ID2; then ID2=2 in first row needs to be matched in a similar way for a value in ID1.
For any matches found the same process needs to be repeated. So it is a circular relationship and needs to match every ID1 & ID2 till all are matched
and along the way need to marked with same group if the lineage is not broken else group number is incremented.

Group 1 is rows 1,2 and 6
Group 2 is rows 3,4 and 5 and so on

Thanks

Edit:
These are the final output values, typed incorrectly above:

--Final output should be this:
DECLARE @TBL1 TABLE(ID1 INT, ID2 INT, Grp INT)
INSERT INTO @TBL1 (ID1,ID2,Grp)VALUES (1,2,1),(3,4,1),(9,10,2),(7,8,2),(8,9,2),(2,3,1),(10,20,2)
SELECT * FROM @TBL1

Group 1 is rows 1,2 and 6
Group 2 is rows 3,4 ,5 and 7


#2

Um..... What?


#3

I know maybe I could not translate the requirement into words...If you check the desired final output you will understand what I mean. The column that needs to be updated is Grp (to identify groups).


#4

How does (8,9) become (8,9, 1)?
If I understand the hierarchy (and I'm NOT saying that I do), then (1,2) -> (2,3) -> (3,4) is Group 1
and (7,8) -> (8,9) -> (9,10) -> (10,20) is Group 2.
Do I have a handle on the problem or am I completely out in left field? (Either way works for me!)


#5

If I have the problem stated correctly, this should be the resultset desired.[code]DECLARE @TBL TABLE(ID1 INT, ID2 INT, Grp INT)
INSERT INTO @TBL (ID1,ID2)VALUES (1,2),(3,4),(9,10),(7,8),(8,9),(2,3),(10,20)
--SELECT * FROM @TBL

;with GroupHeaders -- All ID1's that don't match a ID2
as (
select
id1, id2, ROW_NUMBER() over (order by id1) GroupNumber
from
@tbl t1
where
not exists (
select *
from @tbl t2
where t2.id2 = t1.id1
)
)
,AddGroupNumbers
as (
select id1, id2, GroupNumber
from GroupHeaders

union all

select t.ID1, t.ID2, h.GroupNumber
from
@Tbl t
inner join
AddGroupNumbers h
on t.ID1 = h.ID2
)
select *
from AddGroupNumbers
order by
GroupNumber,
id1[/code]


#6

Hi, Yes you have understood the problem correctly and my bad, it is (8,9,2) and (10,20,2). Did not realize this while typing out the final output.

Your solution works and if I add another set say (40,50) it assigns a new group number to it, 3 in this case. It might be correct but I am not sure right now if it should be left NULL or should it be marked with the next group number. Will check about this.

Thanks for your help, much appreciated.


#7

Hi Stephen, Please check with the below data, the result is a bit incorrect. Data marked in Group 2 is actually part of Group 1.

Also, I have been working on this problem and came up with a solution. If this is applied to 200K records due to the loop and recursion it is slow but gives the correct output:

--Source
DECLARE @TBL TABLE(ID INT IDENTITY(1,1), ID1 INT, ID2 INT, Grp INT)
INSERT INTO @TBL (ID1,ID2)VALUES (100,110),(20,30),(9,10),(7,8),(8,9),(2,3),(10,20),(3,4),(1,2),(21,1),(4,22),(22,23),
(40,50),(50,60),(3,20),(60,70),(70,80),(5,21),(150,200),(300,400),(200,250)
--SELECT * FROM @TBL

DECLARE @LVL INT=1

--UPDATE VALUES WHICH DON'T HAVE A MATCH
UPDATE T
SET GRP = -1
FROM @TBL T
WHERE NOT EXISTS(SELECT 1 FROM @TBL WHERE ID1 = T.ID2)
AND NOT EXISTS(SELECT 1 FROM @TBL WHERE ID2 = T.ID1)

WHILE EXISTS(SELECT 1 FROM @TBL WHERE GRP IS NULL)
BEGIN

--STARTING FROM THE FIRST MATCHING VALUE TRAVERSE THE HIERARCHY TO FIND ALL VALUES FROM RIGHT TO LEFT
;WITH CTE
AS
(
SELECT TOP 1 T1.ID, TAB.ID1, TAB.ID2,@LVL AS LEVEL
FROM @TBL T1
CROSS APPLY(SELECT *
FROM @TBL WHERE T1.ID1=ID2
AND GRP IS NULL
)TAB
ORDER BY ID1

UNION ALL

SELECT T.ID, T.ID1,T.ID2, LEVEL
FROM @TBL T
INNER JOIN CTE C ON C.ID2=T.ID1
)
--TRAVERSE THE HIERARCHY TO FIND ALL VALUES FROM LEFT TO RIGHT
,CTE2
AS
(
SELECT ID,ID1,ID2,LEVEL
FROM CTE T1
WHERE LEVEL=@LVL

UNION ALL

SELECT T.ID,T.ID1,T.ID2, LEVEL
FROM @TBL T
INNER JOIN CTE2 C ON C.ID1=T.ID2
)

--UPDATE THE CHILD ITEMS WITH THE ROOT PARENT LEVEL
UPDATE T1
SET Grp=@LVL
FROM @TBL T1
INNER JOIN CTE2 TAB2 ON T1.ID=TAB2.ID --T1.ID1=TAB2.ID1 AND T1.ID2=TAB2.ID2
WHERE T1.Grp IS NULL

--JUST A CHECK FOR INFINITE LOOP AS EACH ROW SHOULD BE MARKED WITH A GROUP NO.
IF NOT EXISTS(SELECT 1 FROM @TBL WHERE GRP = @LVL)
     BREAK

SET @LVL = @LVL + 1

END

SELECT * FROM @TBL
ORDER BY GRP,ID1


#8

Posting the updated solution for my reference:

IF OBJECT_ID('TEMPDB..#TBL') IS NOT NULL
DROP TABLE #TBL

--Source
CREATE TABLE #TBL (ID INT IDENTITY(1,1) PRIMARY KEY, ID1 INT, ID2 INT, Grp INT)
INSERT INTO #TBL (ID1,ID2)VALUES (100,110),(20,30),(9,10),(7,8),(8,9),(2,3),(10,20),(3,4),(1,2),(21,1),(4,22),(22,23),
(40,50),(50,60),(3,20),(60,70),(70,80),(5,21),(150,200),(300,400),(200,250),(9,121),(1,1000)
--SELECT * FROM #TBL

DECLARE @LVL INT=1

--CREATE INDEXES-----------------------------------------------------
--ALTER TABLE #TBL ADD CONSTRAINT PK_TMP_CHILD PRIMARY KEY(ID)

CREATE NONCLUSTERED INDEX NCI_TMP_Child ON #TBL (ID1) 
INCLUDE (ID2, GRP)	

/*
CREATE NONCLUSTERED INDEX NCI_TMP_CHILD_COMPOSITE
ON #TBL ([ID2],[GRP]) INCLUDE(ID1)

CREATE NONCLUSTERED INDEX NCI_TMP_CHILD_GRP
ON #TBL ([GRP])
INCLUDE ([ID1],ID2)
*/
-----------------------------------------------------------------------

--UPDATE VALUES WHICH DON'T HAVE A MATCH
UPDATE T
SET GRP = -1
FROM #TBL T
WHERE NOT EXISTS(SELECT 1 FROM #TBL WHERE ID1 = T.ID2)
AND NOT EXISTS(SELECT 1 FROM #TBL WHERE ID2 = T.ID1)

DECLARE @Rowcount INT = 1
DECLARE @CurrentRowcount INT

WHILE EXISTS(SELECT 1 FROM #TBL WHERE GRP IS NULL)
BEGIN

WHILE @Rowcount > 0
BEGIN

UPDATE T1
	SET GRP = T2.GRP
	FROM #TBL T1
		 INNER JOIN #TBL T2 ON T1.ID1=T2.ID1
	WHERE T1.GRP IS NULL
			 AND T2.GRP IS NOT NULL

	SET @Rowcount = @@rowcount

	UPDATE T1
	SET GRP = T2.GRP
	FROM #TBL T1
		 INNER JOIN #TBL T2 ON T1.ID1=T2.ID2
	WHERE T1.GRP IS NULL
		  AND T2.GRP IS NOT NULL
	
	SET @CurrentRowcount = @@rowcount

	IF @Rowcount =0
		SET @Rowcount = @CurrentRowcount

	UPDATE T1
	SET GRP = T2.GRP
	FROM #TBL T1
		 INNER JOIN #TBL T2 ON T1.ID2=T2.ID1									 
	WHERE T1.GRP IS NULL
		  AND T2.GRP IS NOT NULL

	SET @CurrentRowcount = @@rowcount

	IF @Rowcount =0
		SET @Rowcount = @CurrentRowcount

	UPDATE T1
	SET GRP = T2.GRP
	FROM #TBL T1
		 INNER JOIN #TBL T2 ON T1.ID2=T2.ID2									 
	WHERE T1.GRP IS NULL
  		 AND T2.GRP IS NOT NULL

	SET @CurrentRowcount = @@rowcount

	IF @Rowcount =0
		SET @Rowcount = @CurrentRowcount
END

SET @Rowcount = 1

--STARTING FROM THE FIRST MATCHING VALUE TRAVERSE THE HIERARCHY TO FIND ALL VALUES FROM RIGHT TO LEFT
;WITH CTE
AS
(
SELECT TOP 1 T1.ID, TAB.ID1, TAB.ID2,@LVL AS LEVEL
FROM #TBL T1
CROSS APPLY(SELECT *
FROM #TBL WHERE T1.ID1=ID2
AND GRP IS NULL
)TAB
WHERE T1.GRP IS NULL
ORDER BY ID1

UNION ALL

SELECT T.ID, T.ID1,T.ID2, LEVEL
FROM #TBL T
INNER JOIN CTE C ON C.ID2=T.ID1
)
--TRAVERSE THE HIERARCHY TO FIND ALL VALUES FROM LEFT TO RIGHT
,CTE2
AS
(
SELECT ID,ID1,ID2,LEVEL
FROM CTE T1
WHERE LEVEL=@LVL

UNION ALL

SELECT T.ID,T.ID1,T.ID2, LEVEL
FROM #TBL T
INNER JOIN CTE2 C ON C.ID1=T.ID2
)

--UPDATE THE CHILD ITEMS WITH THE ROOT PARENT LEVEL
UPDATE T1
SET Grp=@LVL
FROM #TBL T1
INNER JOIN CTE2 TAB2 ON T1.ID=TAB2.ID --T1.ID1=TAB2.ID1 AND T1.ID2=TAB2.ID2
WHERE T1.Grp IS NULL

   --JUST A CHECK FOR INFINITE LOOP AS EACH ROW SHOULD BE MARKED WITH A GROUP NO.
IF NOT EXISTS(SELECT 1 FROM #TBL WHERE GRP = @LVL)
   BREAK

SET @LVL = @LVL + 1

END

--UPDATE GROUP NUMBER FOR VALUES WHICH DON'T HAVE A MATCH
UPDATE #TBL
SET @LVL = Grp = @LVL + 1
WHERE GRP = -1

SELECT ID1,ID2,GRP FROM #TBL
--WHERE GRP>0
ORDER BY GRP,ID1,ID2