#!/bin/ksh

######################################################################
# Program:	txt2vtt
# Purpose:	Create a VTT subtitle file from a text file
# Author:	Perette Barella
# $Id: txt2vtt 103 2025-09-28 01:32:17Z perette $
#---------------------------------------------------------------------




######################################################################
# Function:	usage
# Purpose:	Displays the usage of this command.
# Author:	Perette Barella
#---------------------------------------------------------------------
function usage {
	cat << EOF
Usage: $arg0 <duration> <text_file>
Duration is expressed as mins:ss

text_file contains the script, with each line containing one "flash" of
subtitle text.  Subtitles are inserted into the video with duration
based on their length, and a small pause between each.  Timing
must be refined with a subtitle editor such as Subtitle Composer.

Lines that start with a word ending with a colon are treated as a
character name.  They are removed from the subtitle.  Subtitles
are attributed to that character until a new character name is
encountered.

If duration is "chapter" then the input is expected to be a YouTube-style
chapter list (m:ss) and is formatted into a VTT chapter file.
EOF
	
	exit 1
}
##### End of function usage #####








# Convert m:ss time to a number of seconds
function time_to_seconds {
	typeset -r time="$1"
	mins="${time%:[0-5][0-9]}"
	secs="${time#+([0-9]):}"
	[[ "$mins" == +([0-9]) && "$secs" == +([0-9]) ]] || return 1
	print $((mins * 60 + secs))
	return 0
}

# Format time as required by webvtt format
function format_time {
	typeset -rF time="$1"
	typeset -i seconds=$time
	typeset -i frac=$(( ( time - seconds) * 1000))
	typeset -i hours=$((seconds / 3600))
	typeset -i mins=$(( (seconds - 3600 * hours) / 60))
	typeset -i secs=$(( seconds - 3600 * hours - 60 * mins))
	printf "%02d:%02d:%02d.%03d\n" $hours $mins $secs $frac
	return 0
}


function format_vtt_entry {
	typeset start=$(format_time "$1")
	typeset end=$(format_time "$2")
	print "$start --> $end"
	print -- "$3"
	print
}

function generate_chapter_vtt {
	typeset time description chapter
	integer line=0
	integer result=0
	float start=-1 start end seconds
	print "WEBVTT"
	print
	while read time description
	do
		set line++
		if ! seconds=$(time_to_seconds "$time")
		then
			print "Line $line: Invalid time: $time" 1>&2
			result=1
		else
			if ((start >= 0))
			then
				format_vtt_entry $start $((seconds - 1)) "$chapter"
			fi
			let start=seconds
			chapter="$description"
		fi
	done
	if ((start < 0))
	then
		print "No chapter information found." 1>&2
	else
		format_vtt_entry $start $((seconds - 1)) "$chapter"
		result=1
	fi
	return $result
}


##### Start of main #####

arg0=$(basename $0)

(( $# != 2 )) && usage
if [[ ! -f "$2" ]]
then
	print "$2: File not found."
	exit 1
fi

if [[ "$1" == "chapter" || "$1" == "chapters" ]]
then
	generate_chapter_vtt < "$2"
	exit $?
fi

if ! duration=$(time_to_seconds "$1")
then
	print "$1: Not a valid duration."
	usage
fi

words=$(wc -w < "$2")
lines=$(wc -l < "$2")
time_per_word=$((duration / (words + lines + 0.0) ))

time=0

print "WEBVTT"
print

sequence=0
time=0
who="narrator"

while read line
do
	let "time += time_per_word"
	[[ -z "$line" ]] && continue
	set -- $line
	if [[ "$1" == +([a-zA-Z]): ]]
	then
		who="$(print "${1%:}" | tr A-Z a-z)"
		shift
		let "time += time_per_word"
	fi
	[[ -z "$line" ]] && continue
	
	print "$sequence"
	let "sequence = sequence + 1"
	start="$time"
	let "time += $# * time_per_word"
	format_vtt_entry "$start" "$time" "<v $who>$*"
done < "$2"


##### End of main #####

