#!/bin/ksh
######################################################################
# Purpose:	web2html - make a .html from a .web, then validate it
#               - upon m4 error, delete the .html file
#               - otherwise, run HTML through HTML Tidy to clean it up.
#               - If Tidy gives warnings, delete .html but leave
#                 a .html.mid for examination
#               - If Tidy is good, run results through Validator.nu.
# Copyright:	Copyright 1997-2026 Perette Barella.
#		All rights reserved.
######################################################################
VERSION='$Id: web2html 303 2026-03-18 16:14:13Z perette $'

# Korn shell/zsh require
# Author: Perette Barella
# Copyright 2018 Devious Fish.  All rights reserved.
# $Id: require 61 2021-08-09 18:49:07Z perette $

function require {
	typeset requirement
	integer result=0
	for requirement
	do
		if ! whence -p "${requirement%:*}" >/dev/null 2>&1
		then
			print -- "$arg0${arg0+: }${requirement#*:} not found; please install." 1>&2
			result=1
		fi
	done
	(( $result != 0 )) && exit $result
	return 0
}

# Functions for colored messages/logging.
# Author: Perette Barella 
# Copyright 2026 Devious Fish.  All rights reserved.
# $Id: colored_logging 123 2026-02-10 21:53:03Z perette $  

function important {
        print -r -- "${TPUT_BOLD}$*${TPUT_RESET}"
}

function warning {
        print -r -- "${TPUT_WARN}${TPUT_BOLD}$*${TPUT_RESET}"
}                       
        
function success {
        [ "$1" = "-h" ] && shift && print -n -- "$TPUT_BOLD"
        print -r -- "${TPUT_GOOD}${TPUT_BOLD}$*${TPUT_RESET}"
}       
        
function failure {
        [ "$1" = "-h" ] && shift && print -n -- "$TPUT_BOLD"
        print -r -- "${TPUT_BAD}${TPUT_BOLD}$*${TPUT_RESET}"
}


# If we're outputting to a real terminal, use colors
function colored_logging {
	if [[ -t 1 && "$1" == "true" ]]
	then
		TPUT_BAD=$(tput setaf 1)
		TPUT_GOOD=$(tput setaf 2)
		TPUT_WARN=$(tput setaf 3)
		TPUT_BOLD=$(tput bold)
		TPUT_RESET=$(tput sgr0)
	else
		unset TPUT_BAD TPUT_GOOD TPUT_WARN TPUT_BOLD TPUT_RESET
	fi
}

# Validate that ksh supports the modern/extended getopts format.
# Author: Perette Barella 
# Copyright 2018 Devious Fish.  All rights reserved.
# $Id: modern_ksh_check 19 2018-07-28 23:40:39Z perette $  


function modern_ksh_check {
	if [[ $(getopts '[-][12:abc]' flag --abc; print -- 0$flag) != "012" ]]
	then
		print -- "$arg0${arg0+: }Outdated Korn shell." 1>&2
		exit 1
	fi
}
# dir_name - dirname implemented as a shell function.
# Author: Perette Barella
# Copyright 2020 Devious Fish.  All rights reserved.
# Why: In testing, performed >> 10x faster than external program.
# $Id$

function dir_name {
        typeset dir=${1%/*}
        [[ "$1" = "$dir" ]] && dir="."
        print -r -- "$dir"
}

# base_name - basename implemented as a shell function.
# Author: Perette Barella
# Copyright 2020 Devious Fish.  All rights reserved.
# Why: In testing, performed >> 10x faster than external program.
# $Id$

function base_name {
        typeset base=${1##*/}
        (( $# == 2 )) && eval "base=\${base%$2}"
        print -r -- "$base"
}

# Check that the WEBBASE environment variable is set and sane,
# and that current directory resides within it.
# Side effects: Sets the DIR_TOP, DIR_OFFSET and DIR_INCLUDE
# based on $WEBBASE and current directory.
# $Id: check_webbase 229 2026-02-08 22:38:36Z perette $
function check_webbase {
	if [[ -z "$WEBBASE" ]]
	then
		print -- "$arg0: WEBBASE undefined." 1>&2
		exit 1
	fi

	if [[ ! -d "$WEBBASE" ]]
	then
		print -- "$arg0: WEBBASE does not point to a valid directory." 1>&2
		exit 1
	fi

	typeset dir_to_check="${PWD:0:${#WEBBASE}}"
	if [ "$dir_to_check" != "$WEBBASE" ]
	then
		here=$(/bin/pwd)
		dir_to_check="${here:0:${#WEBBASE}}"
		if [ "$dir_to_check" != "$WEBBASE" ]
		then
			print -- "$arg0: $PWD not in $WEBBASE hierarchy" 1>&2
			exit 1
		fi
	fi
}


# Calculate the paths we need to pass to M4.
# $Id: calculate_web_paths 239 2026-02-12 03:52:29Z perette $
function calculate_web_paths {
	DIR_OFFSET=${PWD:${#WEBBASE}}
	[[ "${DIR_OFFSET:0:1}" == "/" ]] && DIR_OFFSET="${DIR_OFFSET:1}"

	if [[ -z "$DIR_OFFSET" ]]
	then
		DIR_OFFSET="."
		DIR_TOP="."
	else
		DIR_TOP=".."
		typeset dirwork=$(dirname "$DIR_OFFSET")
		while [[ "$dirwork" != "." ]]
		do
			DIR_TOP="../$DIR_TOP"
			dirwork=$(dirname "$dirwork")
		done

	fi
	DIR_INCLUDE="$DIR_TOP/Include"
}


arg0=$(base_name "$0")

modern_ksh_check
check_webbase

M4="${M4:-m4}"
M4_OPTS="-P"
TIDY="${TIDY:-tidy}"
JAVA="${JAVA:-java}"
VNU="${VNU:-/usr/local/lib/node_modules/vnu-jar/build/dist/vnu.jar}"

require $M4 $TIDY $JAVA mktemp

USAGE=$'
[-1?'$VERSION$']
[+NAME?'$arg0$' - compile a .web file into a .html file and validate]
[+DESCRIPTION?\b'$arg0$'\b compiles a M4/HTML web page into a web-ready HTML
document.  The document is subsequently cleaned up and validated with HTML Tidy
and Validator.Nu.]
[c:color?Highlight error messages.  By default, color is anbled for terminals only.]:[enable]
[e:errors-only?Ignore Tidy warnings and only fail on errors.]
[i:indent?Indent HTML neatly rather than minimizing.]
[o:output?Set target filename]:[target]
[q:quote?Quote an extracted value for m4]
[v:validation?Turn validation with Validator.nu \bon\b, \boff\b or do it but \bignore\b the result.]:[switch]
[x:extract?Extract data from a document instead of compiling it.]:[field]
[Z?Enable debugging and trace a macro behavior.  Repeat flag for multiple macros.  When enabled, intermediate files remain after compilation.]:[macro]
[+PREDEFINED MACROS?During processing by M4, the following macros are
predefined:]
{
  [+FILE_SOURCE?The filename of the source file.]
  [+FILE_DEST?The target filename.]
  [+DIR_TOP?A relative path from the current directory to \b$WEBBASE\b.]
  [+DIR_INCLUDE?The path to M4/HTML include files.]
  [+DIR_HERE?The directory path from \b$WEBBASE\b to the current directory.]
  [+_YEAR?The current year.]
}
[+ENVIRONMENT]
{
  [+WEBBASE?Indicates the "top" of the web development area, which usually corresponds to the root or home of th website.]
  [+TIDY_OPTIONS?Options passed to HTML Tidy.]
  [+TIDY_TOLERATE_WARNINGS?If set to 1, Tidy warnings are ignored.]
}
[+FILES?The primary macro sets are:]
{
  [+html.m4?Various macros for constructing HTML boilerplate, inserting pictures, formatting tables, printable links, and more.]
  [+navigation.m4?Macros for constructing navigation menus, both drop-down and side-bar.]
  [+toc3.m4?Macros for creating structured documents with chapters, sections and subsections.]
}
[+?Import macros using the form \bm4_include(DIR_INCLUDE/html.m4)\b.]
[+EXIT STATUS?0 on success, non-0 on error.]
[+SEE ALSO?\bmd2web\b(1), \bmkthumbnails\b(1)]

files ...

[-author?Perette Barella <perette@deviousfish.com>]
'


TIDY_STANDARD_OPTIONS="-q -utf8 --doctype auto --preserve-entities yes --anchor-as-name no"
TIDY_TOLERATE_WARNINGS="${TIDY_TOLERATE_WARNINGS:-0}"



VERBOSE="${VERBOSE:-false}"
TEMP="${TEMP:-/var/tmp}"

function show_tidy_errors {
	typeset source="$1"
	# Load the file into an array.
	# Don't set type: someone's tinkered with typeset -a; in older
	# Korn shell, it's a type; in newer, it's a size.
	typeset -a sourcefile
	integer line=1
	while read -r sourcefile[line]
	do
		let line=line+1
	done < "$source"

	# Iterate over error messages from stdin.
	integer line_number line_count=line
	typeset start rest
	while read -r start line_number rest
	do
		print
		if [[ $rest == *Warning* ]]
		then
			warning "$source: $start $line_number $rest"
		else
			failure "$source: $start $line_number $rest"
		fi
		# Starting at the problem line, count 3 non-blank lines back
		let line=line_number
		typeset count=2
		while ((line > 1 && count > 0))
		do
			let line=line-1
			[[ -n $sourcefile[line] ]] && let count=count-1
		done
		# Display up to 5 non-blank lines, highlighting the problem one
		count=0
		while ((line < line_count && count < 5))
		do
			if [[ -n $sourcefile[line] ]]
			then
				let count=count+1
				if ((line == line_number))
				then
					important "${sourcefile[line]}"
				else
					print -r -- "${sourcefile[line]}"
				fi
			fi
			let line=line+1
		done
	done
}

function compile_web_to_html {
	typeset source="$1"
	typeset destination="$2"
	typeset temporary="$(mktemp "${TEMP}/$destination.XXXXXX")" || exit 1
	typeset errors="$(mktemp "${TEMP}/$destination.XXXXXX.tmp")" || exit 1
	trap "rm -f '$temporary' '$errors'" EXIT
	calculate_web_paths
	rm -f "./$destination.mid"
	# Process file with M4, outputting to a temporary file.
	${M4} ${M4_OPTS} ${DEBUG} \
		-DFILE_SOURCE="$source" \
		-DFILE_DEST="$destination" \
		-DDIR_TOP="$DIR_TOP" \
		-DDIR_INCLUDE="$DIR_INCLUDE" \
		-DDIR_HERE="$DIR_OFFSET" \
		-D_YEAR="$YEAR" \
		"$source" > "$temporary"
	typeset status=$?
	if ((status == 0))
	then
		# Validate the temporary file with Tidy, and
		# write a cleaned-up version to the target HTML file.
		${TIDY} ${TIDY_STANDARD_OPTIONS} ${TIDY_INDENT} ${TIDY_OPTIONS} -o "$destination" "$temporary" 2> "$errors"
		status=$?
		if ((status > TIDY_TOLERATE_WARNINGS))
		then
			mv "$temporary" "./$destination.mid"
			show_tidy_errors "$destination.mid" < "$errors"
		else
			[[ -n $DEBUG ]] && mv "$temporary" "./$destination.mid"
			# Check the resulting file with Validator.Nu
			cat "$errors"
			${JAVA} -jar ${VNU} ${VALIDATE_OPTS} "$destination" && return 0
			status=$?
			$IGNORE_VALIDATOR && return 0
		fi
	fi
	rm -f "$destination"
	return $status
}

function get_domain_name {
	typeset siteinfo="${WEBBASE}/siteinfo.m4"
	if [[ ! -f "$siteinfo" ]]
	then
		print "$arg0: $siteinfo: Not found, cannot determine domain." 1>&2
		return 1
	fi
	typeset name
        if name=$(m4 -P -DDIR_WEBBASE="$WEBBASE" ${WEBBASE}/Include/readdomain.m4)
	then
		name=$(print -r -- $name)
		[[ -n "$name" ]] && print -r -- "$name" && return 0
	fi
	print "$arg0: $siteinfo: Unable to extract domain name from file."
	exit 1
}

function extract_data {
	typeset path="${1#./}" target="$2"
	typeset dir="$(dir_name "$path")"
	typeset here="$PWD"
	cd "$dir" || return 1
	calculate_web_paths || return 1
	
	typeset extractor="$DIR_INCLUDE/read${target}.m4"
	if [[ ! -r "$extractor" || ! -f "$extractor" ]]
	then
		print "$target: Invalid extraction item." 1>&2
		exit 1
	fi
	typeset source="$(base_name "$path")"
	typeset destination="$(base_name "$source" .web).html"
	${M4} ${M4_OPTS} ${DEBUG} \
		-DFILE_SOURCE="$source" \
		-DFILE_DEST="$destination" \
		-DDIR_TOP="$DIR_TOP" \
		-DDIR_INCLUDE="$DIR_INCLUDE" \
		-DDIR_HERE="$DIR_OFFSET" "$extractor" \
		-D_YEAR="$YEAR"
	typeset status=$?
	cd "$here" || return 1
	return $status
}

if [[ ! -r "$VNU" ]]
then
	print "$VNU: Not found.  Required for validation."
	exit 1
fi

use_color=true
extract=""
OUTPUT=""
TIDY_INDENT=""
M4QUOTE=false
DEBUG=""
YEAR="$(date '+%Y')"
IGNORE_VALIDATOR=false

# Process command line options
while getopts -a "$arg0" "$USAGE" option
do
	case "$option" in
		c)
			if [[ "$OPTARG" != "false" && "$OPTARG" != "true" ]]
			then
				print -- "$arg0: Bad color option, specify true or false." 1>&2
				exit 1
			fi
			use_color="$OPTARG"
			;;
		e)	TIDY_TOLERATE_WARNINGS=1
			;;
		i)	TIDY_INDENT="--indent yes"
			;;
		o)	OUTPUT="$OPTARG"
			;;
		q)	M4QUOTE=true
			;;
		v)	# Disable validation by just substituting Java with 'true'.
			if [[ "$OPTARG" == "ignore" ]]
			then
				IGNORE_VALIDATOR=true
			elif [[ "$OPTARG" == "off" || "$OPTARG" == "no" || "$OPTARG" == "false" ]]
			then
				JAVA=true
			elif [[ "$OPTARG" == "on" || "$OPTARG" == "yes" || "$OPTARG" == "true" ]]
			then
				JAVA=java
			else
				print -- "$arg0: --validation option: Invalid mode $OPTARG." 1>&2
				exit 1
			fi
			;;
		x)	extract="$OPTARG"
			;;
		Z)	DEBUG="${DEBUG:--d} -t$OPTARG"
			;;
	esac
done

shift $((OPTIND - 1))

if [[ -n "$OUTPUT" ]] && (($# > 1))
then
	print "$arg0: Specifying output is incompatible with multiple files." 1>&2
	exit 1
fi

colored_logging $use_color

if [[ -n $extract ]]
then
	if [[ "$extract" == "domain" ]]
	then
		get_domain_name
		return $?
	fi
	for path in "$@"
	do
		if [[ $extract == body || $extract == index ]]
		then
			extract_data "$path" "$extract" || exit $?
		else
			value=$(extract_data "$path" "$extract") || exit $?
			value=${value##+([ \t\r\n])}
			value=${value%%+([ \t\r\n])}
			if $M4QUOTE
			then
				print -r -- \`\`$value\'\'
			else
				print -r -- $value
			fi
		fi
	done
	exit 0
fi

# Process the arguments given
for path in "$@"
do
	if [[ -z "$OUTPUT" ]]
	then
		dir=$(dir_name "$path")
		file=$(base_name "$path" .web)
		output="$dir/$file.html"
	else
		output="$OUTPUT"
	fi
	compile_web_to_html "$path" "$output" || exit $?
done

