I have a large table @tbl, I want to find out co-relation between each cid available in @tbl the formula is provided by client and we have done a sample testing for 2-3 cids and values are fine but when we tried to execute this on the whole dataset this is taking more time.
Is there any way to change query because I am using triangular join to find values of each cid to another cid on the basis of currency and mdate.
please suggest.
DECLARE @tbl TABLE
(CID int NOT NULL,
MDATE date NOT NULL,
CURRENCY char(3) NOT NULL,
LOG_VAL float,
PRIMARY KEY(CID,CURRENCY,MDATE )
);
insert into @tbl
values (19617,'2016-12-07','USD',0.0269613952992653),
(19617,'2016-12-08','USD',-0.0215989790114737),
(19617,'2016-12-09','USD',-0.00354231630415585),
(19617,'2016-12-12','USD',0.0181775886282026),
(20114,'2016-12-07','USD',0.0126117755008134),
(20114,'2016-12-08','USD',0.000804521846928715),
(20114,'2016-12-09','USD',0.0137173496839721),
(20114,'2016-12-12','USD',0.0227472087663449),
(19458,'2016-12-07','USD',0.0126117755008134),
(19458,'2016-12-08','USD',0.000804521846928715),
(19458,'2016-12-09','USD',0.0137173496839721),
(19458,'2016-12-12','USD',0.0227472087663449)
SELECT b.CID, C.CID
,(COUNT(*)*SUM(b.LOG_VAL*c.LOG_VAL) - SUM(b.LOG_VAL) * SUM(c.LOG_VAL))
/ (
SQRT(COUNT(*) * SUM(b.LOG_VAL*b.LOG_VAL) - SUM(b.LOG_VAL) * SUM(b.LOG_VAL))
* SQRT(COUNT(*) * SUM(c.LOG_VAL*c.LOG_VAL) - SUM(c.LOG_VAL) * SUM(c.LOG_VAL))
)AS correl
FROM @tbl B
JOIN @tbl AS C
ON b.CID <> c.CID
AND b.CURRENCY = c.CURRENCY
AND b.MDATE = c.MDATE
GROUP BY b.CID, C.CID