I need to find the start and end times for each location from my Locations table.
The script we are currently using excludes any UserID's that are only on their first location and station and it also excludes any NULL end times. I have added two bits of code to my script that remove any invalid end date times from the data set and another to add in any missing rows but there must be a better way to do this.
Here is my expected output:
Here is the script:
DROP TABLE Locations;
CREATE TABLE Locations(
[UserID] int,
[LocationID] CHAR(1),
[StationID] VARCHAR(10),
[StationStartDateTime] DATETIME,
[StationEndDateTime] DATETIME);
INSERT INTO Locations VALUES (1, 'A', 'GREEN', '14/08/2017 17:36', '15/08/2017 10:44');
INSERT INTO Locations VALUES (1, 'B', 'BLUE', '15/08/2017 10:44', '16/08/2017 14:16');
INSERT INTO Locations VALUES (1, 'C', 'PURPLE', '16/08/2017 14:16', '17/08/2017 08:33');
INSERT INTO Locations VALUES (1, 'C', 'YELLOW', '17/08/2017 08:33', '18/08/2017 15:48');
INSERT INTO Locations VALUES (1, 'C', 'PURPLE', '18/08/2017 15:48', '21/08/2017 16:22');
INSERT INTO Locations VALUES (1, 'B', 'BLUE', '21/08/2017 16:22', '29/08/2017 08:34');
INSERT INTO Locations VALUES (1, 'B', 'ORANGE', '29/08/2017 08:34', '29/08/2017 13:58');
INSERT INTO Locations VALUES (1, 'C', 'PURPLE', '29/08/2017 13:58', '29/08/2017 14:00');
INSERT INTO Locations VALUES (1, 'C', 'YELLOW', '29/08/2017 14:00', '29/08/2017 14:01');
INSERT INTO Locations VALUES (1, 'C', 'PURPLE', '29/08/2017 14:01', '11/09/2017 08:24');
INSERT INTO Locations VALUES (1, 'C', 'MAGENTA', '11/09/2017 08:24', '25/09/2017 08:37');
INSERT INTO Locations VALUES (1, 'C', 'PURPLE', '25/09/2017 08:37', '09/10/2017 08:31');
INSERT INTO Locations VALUES (1, 'C', 'YELLOW', '09/10/2017 08:31', '23/10/2017 08:42');
INSERT INTO Locations VALUES (1, 'C', 'PURPLE', '23/10/2017 08:42', '06/11/2017 08:26');
INSERT INTO Locations VALUES (1, 'C', 'YELLOW', '06/11/2017 08:26', '20/11/2017 08:08');
INSERT INTO Locations VALUES (1, 'C', 'PURPLE', '20/11/2017 08:08', '04/12/2017 08:04');
INSERT INTO Locations VALUES (1, 'C', 'YELLOW', '04/12/2017 08:04', '18/12/2017 08:05');
INSERT INTO Locations VALUES (1, 'C', 'PURPLE', '18/12/2017 08:05', '02/01/2018 07:44');
INSERT INTO Locations VALUES (1, 'C', 'YELLOW', '02/01/2018 07:44', '15/01/2018 08:18');
INSERT INTO Locations VALUES (1, 'C', 'PURPLE', '15/01/2018 08:18', NULL);
INSERT INTO Locations VALUES (2, 'F', 'RED', '11/01/2018 23:25', NULL);
INSERT INTO Locations VALUES (3, 'X', 'BLUE', '07/01/2018 05:52', '08/01/2018 20:21');
INSERT INTO Locations VALUES (3, 'D', 'PINK', '08/01/2018 20:21', '08/01/2018 20:41');
INSERT INTO Locations VALUES (3, 'I', 'GREEN', '08/01/2018 20:41', '08/01/2018 21:10');
INSERT INTO Locations VALUES (3, 'I', 'BLUE', '08/01/2018 21:10', '12/01/2018 21:00');
INSERT INTO Locations VALUES (3, 'J', 'PURPLE', '12/01/2018 21:00', '12/01/2018 21:47');
INSERT INTO Locations VALUES (3, 'J', 'ORANGE', '12/01/2018 21:47', '13/01/2018 08:05');
WITH tstrt
AS (
SELECT t1.UserID
,t1.LocationID
,t1.StationStartDateTime
FROM Locations t1
WHERE NOT EXISTS (
SELECT *
FROM Locations t2
WHERE t1.UserID = t2.UserID
AND t1.LocationID = t2.LocationID
AND t2.StationEndDateTime = t1.StationStartDateTime
)
)
,tend
AS (
SELECT t3.UserID
,t3.LocationID
,t3.StationEndDateTime
FROM Locations t3
WHERE NOT EXISTS (
SELECT *
FROM Locations t2
WHERE t3.UserID = t2.UserID
AND t3.LocationID = t2.LocationID
AND t3.StationEndDateTime = t2.StationStartDateTime
)
)
SELECT tstrt.UserID
,LEFT(tstrt.LocationID, 5)
,tstrt.StationStartDateTime
,tend.StationEndDateTime
,ROW_NUMBER() OVER (
PARTITION BY tstrt.UserID ORDER BY tstrt.UserID
,tstrt.StationStartDateTime
) AS SeqID
FROM tstrt
INNER JOIN tend ON tstrt.UserID = tend.UserID
AND tstrt.LocationID = tend.LocationID
AND tend.StationEndDateTime = (
SELECT min(t2.StationEndDateTime)
FROM tend t2
WHERE t2.UserID = tstrt.UserID
AND t2.LocationID = tstrt.LocationID
AND t2.StationEndDateTime > tstrt.StationStartDateTime
);