--note that you will need this function to run the script
--source: https://stackoverflow.com/questions/2647/how-do-i-split-a-string-so-i-can-access-item-x
/*
CREATE FUNCTION [dbo].[fnSplitString] 
( 
    @string NVARCHAR(MAX), 
    @delimiter CHAR(1) 
) 
RETURNS @output TABLE(splitdata NVARCHAR(MAX) 
) 
BEGIN 
    DECLARE @start INT, @end INT 
    SELECT @start = 1, @end = CHARINDEX(@delimiter, @string) 
    WHILE @start < LEN(@string) + 1 BEGIN 
        IF @end = 0  
            SET @end = LEN(@string) + 1
       
        INSERT INTO @output (splitdata)  
        VALUES(SUBSTRING(@string, @start, @end - @start)) 
        SET @start = @end + 1 
        SET @end = CHARINDEX(@delimiter, @string, @start)
        
    END 
    RETURN 
END
*/

--import text file with semi-cleaned script data
drop table dbo.moana_raw;

create table dbo.moana_raw
(
	script_data varchar(max)
);

BULK INSERT dbo.moana_raw
FROM '[enter your own file path]\script.txt'
WITH
(
  FIELDTERMINATOR = '\t',
  ROWTERMINATOR = '\r',
  FIRSTROW = 1
);

-- add record id
alter table dbo.moana_raw add parent_line_id int identity(1,1);

-- parse out character name and line portion of script text
DROP TABLE dbo.moana_parse1;

select	parent_line_id, script_data
	,	CHARINDEX(':',script_data,1) as colon_loc
	,	CASE WHEN CHARINDEX(':',script_data,1) = 0 THEN NULL ELSE SUBSTRING(script_data,1,CHARINDEX(':',script_data,1)-1) END AS character_name
	,	CASE WHEN CHARINDEX(':',script_data,1) = 0 THEN script_data ELSE SUBSTRING(script_data,CHARINDEX(':',script_data,1)+1,LEN(script_data)) END AS character_line
into dbo.moana_parse1
from dbo.moana_raw
where CHARINDEX('(',LEFT(script_data,2),1)=0
order by parent_line_id

-- this will copy down the data for character name
-- source: https://stackoverflow.com/questions/1345065/sql-query-replace-null-value-in-a-row-with-a-value-from-the-previous-known-value
declare @n varchar(150)
update dbo.moana_parse1
set 
	@n = coalesce(character_name,@n),
	character_name = coalesce(character_name,@n)

-- next we parse lines based on punctuation
-- first .
drop table dbo.moana_parse2
select a.parent_line_id, a.script_data, a.character_name, splitdata as character_line
into dbo.moana_parse2
from dbo.moana_parse1 a
cross apply dbo.fnSplitString(a.character_line,'.') as t

-- next !
drop table dbo.moana_parse3
select a.parent_line_id, a.script_data, a.character_name, splitdata as character_line
into dbo.moana_parse3
from dbo.moana_parse2 a
cross apply dbo.fnSplitString(a.character_line,'!') as t

-- next /
drop table dbo.moana_parse4
select a.parent_line_id, a.script_data, a.character_name, splitdata  as character_line
into dbo.moana_parse4
from dbo.moana_parse3 a
cross apply dbo.fnSplitString(a.character_line,'/') as t

-- last ?
drop table dbo.moana_lines
select a.parent_line_id, a.script_data, a.character_name, splitdata  as character_line
into dbo.moana_lines
from dbo.moana_parse4 a
cross apply dbo.fnSplitString(a.character_line,'?') as t

-- add line id
alter table dbo.moana_lines add line_id int identity(1,1);

-- create final table
drop table dbo.moana_lines_final
select line_id, character_name, character_line
into dbo.moana_lines_final
from dbo.moana_lines
where character_line <> ' '

-- now that we have lines we can parse words the same way
drop table dbo.moana_words
select a.line_id, REPLACE(LTRIM(RTRIM(splitdata)),' ','') as word
into dbo.moana_words
from dbo.moana_lines a
cross apply dbo.fnSplitString(a.character_line,' ') as t
where splitdata <> ' ';

--done!