--note that you will need this function to run the script --source: https://stackoverflow.com/questions/2647/how-do-i-split-a-string-so-i-can-access-item-x /* CREATE FUNCTION [dbo].[fnSplitString] ( @string NVARCHAR(MAX), @delimiter CHAR(1) ) RETURNS @output TABLE(splitdata NVARCHAR(MAX) ) BEGIN DECLARE @start INT, @end INT SELECT @start = 1, @end = CHARINDEX(@delimiter, @string) WHILE @start < LEN(@string) + 1 BEGIN IF @end = 0 SET @end = LEN(@string) + 1 INSERT INTO @output (splitdata) VALUES(SUBSTRING(@string, @start, @end - @start)) SET @start = @end + 1 SET @end = CHARINDEX(@delimiter, @string, @start) END RETURN END */ --import text file with semi-cleaned script data drop table dbo.moana_raw; create table dbo.moana_raw ( script_data varchar(max) ); BULK INSERT dbo.moana_raw FROM '[enter your own file path]\script.txt' WITH ( FIELDTERMINATOR = '\t', ROWTERMINATOR = '\r', FIRSTROW = 1 ); -- add record id alter table dbo.moana_raw add parent_line_id int identity(1,1); -- parse out character name and line portion of script text DROP TABLE dbo.moana_parse1; select parent_line_id, script_data , CHARINDEX(':',script_data,1) as colon_loc , CASE WHEN CHARINDEX(':',script_data,1) = 0 THEN NULL ELSE SUBSTRING(script_data,1,CHARINDEX(':',script_data,1)-1) END AS character_name , CASE WHEN CHARINDEX(':',script_data,1) = 0 THEN script_data ELSE SUBSTRING(script_data,CHARINDEX(':',script_data,1)+1,LEN(script_data)) END AS character_line into dbo.moana_parse1 from dbo.moana_raw where CHARINDEX('(',LEFT(script_data,2),1)=0 order by parent_line_id -- this will copy down the data for character name -- source: https://stackoverflow.com/questions/1345065/sql-query-replace-null-value-in-a-row-with-a-value-from-the-previous-known-value declare @n varchar(150) update dbo.moana_parse1 set @n = coalesce(character_name,@n), character_name = coalesce(character_name,@n) -- next we parse lines based on punctuation -- first . drop table dbo.moana_parse2 select a.parent_line_id, a.script_data, a.character_name, splitdata as character_line into dbo.moana_parse2 from dbo.moana_parse1 a cross apply dbo.fnSplitString(a.character_line,'.') as t -- next ! drop table dbo.moana_parse3 select a.parent_line_id, a.script_data, a.character_name, splitdata as character_line into dbo.moana_parse3 from dbo.moana_parse2 a cross apply dbo.fnSplitString(a.character_line,'!') as t -- next / drop table dbo.moana_parse4 select a.parent_line_id, a.script_data, a.character_name, splitdata as character_line into dbo.moana_parse4 from dbo.moana_parse3 a cross apply dbo.fnSplitString(a.character_line,'/') as t -- last ? drop table dbo.moana_lines select a.parent_line_id, a.script_data, a.character_name, splitdata as character_line into dbo.moana_lines from dbo.moana_parse4 a cross apply dbo.fnSplitString(a.character_line,'?') as t -- add line id alter table dbo.moana_lines add line_id int identity(1,1); -- create final table drop table dbo.moana_lines_final select line_id, character_name, character_line into dbo.moana_lines_final from dbo.moana_lines where character_line <> ' ' -- now that we have lines we can parse words the same way drop table dbo.moana_words select a.line_id, REPLACE(LTRIM(RTRIM(splitdata)),' ','') as word into dbo.moana_words from dbo.moana_lines a cross apply dbo.fnSplitString(a.character_line,' ') as t where splitdata <> ' '; --done!