I have a task of taking data from a field and parsing it out into different fields.The original field is called theText and I need to parse the content in it into diffeent fields.
theText = 2019-05-19_123 Main Street_DOE, John_ABC123_DOE, Jane_part 1.pdf
As you can see above the content is delimited by '_' and contans a file ext at the end. I am trying to find a way of extracting each element as such
2019-05-19 = DATE
123 Main Street = SUBJECT
DOE, John = TO
ABC123 = CODE
DOE, Jane = FROM
Part 1 = Attribute
.pdf = EXT
declare @t table(txt varchar(100))
insert into @t(txt) select '2019-05-19_123 Main Street_DOE, John_ABC123_DOE, Jane_part 1.pdf'
select *
from @t
cross apply
(
select
[1] As [Date],
[2] As Subject,
[3] As [To],
[4] As [Code],
[5] As [From],
Left([6],CI.Period - 1) As Attribute,
SUBSTRING([6],Period, Len([6])) As Extension
from DelimitedSplit8K(txt,'_')
pivot
(
max(item) For ItemNumber In ([1],[2],[3],[4],[5],[6])
) PVT
outer apply (Select CHARINDEX('.',[6],1)) CI(PERIOD)
) X
When I tried downloading the code to create he function, and I run the command, I get a Msg 102 Level 15 STate 1, Procedure DelimitedSplit8K_Lead, Line 8
Incorrect syntax near ' ; '.
CREATE FUNCTION [dbo].[DelimitedSplit8K_LEAD]
--===== Define I/O parameters
(@pString VARCHAR(8000), @pDelimiter CHAR(1))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table” produces values from 0 up to 10,000...
-- enough to cover VARCHAR(8000)
; WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "zero base" and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT 0 UNION ALL
SELECT TOP (DATALENGTH(ISNULL(@pString,1))) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT t.N+1
FROM cteTally t
WHERE (SUBSTRING(@pString,t.N,1) = @pDelimiter OR t.N = 0)
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY s.N1),
Item = SUBSTRING(@pString,s.N1,ISNULL(NULLIF((LEAD(s.N1,1,1) OVER (ORDER BY s.N1) - 1),0)-s.N1,8000))
FROM cteStart s
;
GO
Select theText
from #TXT
cross apply
(
select
[1] As [Date],
[2] As Subject,
[3] As [To],
[4] As [Code],
[5] As [From],
Left([6],CI.Period - 1) As Attribute,
SUBSTRING([6],Period, Len([6])) As Extension
from dbo.DelimitedSplit8K_LEAD(theText,'_')
pivot
(
max(item) For ItemNumber In ([1],[2],[3],[4],[5],[6])
) PVT
outer apply (Select CHARINDEX('.',[6],1)) CI(PERIOD)
) X
I get only 1 column in the result, the column is the original theText column that is delimited and not parsed.
You've started the query with "Select theText from" so that's the only column coming back, replace it with "Select [Date], Subject, [To], [Code], [From], Attribute, Extension From"
Drop table #txt
Select *
into #TXT
from main_text
where text_type_id=8
Select [Date], Subject, [To], [Code], [From], Attribute, Extension
From #TXT
cross apply
(
select
[1] As [Date],
[2] As Subject,
[3] As [To],
[4] As [Code],
[5] As [From],
Left([6],CI.Period - 1) As Attribute,
SUBSTRING([6],Period, Len([6])) As Extension
from dbo.DelimitedSplit8K(theText,'_')
pivot
(
max(item) For ItemNumber In ([1],[2],[3],[4],[5],[6])
) PVT
outer apply (Select CHARINDEX('.',[6],1)) CI(PERIOD)
I get the following message:
Msg 537, Level 16, State 2, Line 2
Invalid length parameter passed to the LEFT or SUBSTRING function.