#!/bin/ksh
######################################################################
# Purpose:	mmdtool - a tool to get metadata, titles, and other
#		information from (Multi)markdown files.
# Copyright:	Copyright 2018-2026 Perette Barella.
#		All rights reserved.
######################################################################
VERSION='$Id: mmdtool 312 2026-03-22 19:23:03Z perette $'  
                
arg0="$(basename "$0")"
        
# Validate that ksh supports the modern/extended getopts format.
# Author: Perette Barella 
# Copyright 2018 Devious Fish.  All rights reserved.
# $Id: modern_ksh_check 19 2018-07-28 23:40:39Z perette $  


function modern_ksh_check {
	if [[ $(getopts '[-][12:abc]' flag --abc; print -- 0$flag) != "012" ]]
	then
		print -- "$arg0${arg0+: }Outdated Korn shell." 1>&2
		exit 1
	fi
}
        
modern_ksh_check
        
USAGE=$'
[-1?'$VERSION$']
[+NAME?'$arg0$' - extract portions of Markdown documents.]
[+DESCRIPTION?\b'$arg0$'\b extracts header entries, document titles, bodies and
headers in their entirety from Markdown and Multimarkdown documents.]
[h:html-encode?HTML-encode any entities in data.]
[w:wrap?Wrap extracted data with an HTML element.]:[element]
[+COMMANDS?The commands are allowed:]
{
  [+get?Get a named field from the document header.  Values spanning multiple lines will be combined into a single line.  When requesting \btitle\b, if it is not found in the header, a title at the top of the document will be used instead.]
  [+body?Return the document body with header removed.]
  [+strip?Synonym for \bbody\b.]
  [+header?Return the document header in entirety.]
  [+head?Synonym for \bheader\b.]
}
[+EXIT STATUS?0 on success, non-0 on error.]
[+SEE ALSO?\bmd2web\b(1)]

command parameter
command

[-author?Perette Barella <perette@deviousfish.com>]
'




######################################################################
# Function:	extract_value
# Purpose:	Retrieves a metadata value.
#		"Title" will alternately search for first # Title
#		or underlined value.
# Arguments:	$1 - the metadata field name
# Author:	Perette Barella
#---------------------------------------------------------------------
function extract_value
{
	typeset -l target="$1"
	state=HEADER
	while read name value
	do
		typeset -l lcname="$name"
		if [[ "$state" == "VALUE" ]]
		then
			# After we've encountered target: value,
			# keep going until we find another Item: value
			# or the end of the header.
			[[ -z "$name" || "$lcname" != "${lcname%:}" ]] && return 0
			print -n -- " $name $value"
		elif [[ "$target:" == "$lcname" ]]
		then
			# When we encounter target: value
			print -n -- "$value"
			state=VALUE
		fi
		# If we want a title, and encounter # First Heading
		if [[ "$target" == "title" && "$lcname" == "#" ]]
		then
			print -n -- "${value%#}"
			return 0
		fi
		# If we want a title, and encounter something underlined
		if [[ "$target" == "title" && ( "${name:0:3}" == "---" || "${name:0:3}" == "===" ) ]]
		then
			print -n -- "$lastline"
			return 0
		fi

		# When we get to blank lines, stop,
		# except for title, then continue on one paragraph.
		lastline="$name $value"
		if [[ -z "$name" ]]
		then
			[ "$target" != "title" ] && return 1
			[ "$state" = "CONTENT" ] && return 1
			state=POSTHEADER
		else
			[ "$state" = "POSTHEADER" ] && state="CONTENT"
		fi
	done
	return 1
}
##### End of function extract_value #####






######################################################################
# Function:	strip_header
# Purpose:	Strip the header off a markdown document.
#		Without the header, multimarkdown the content as HTML
#		without header/footer/etc.
# Author:	Perette Barella
#---------------------------------------------------------------------
function strip_header
{
	typeset name value
	read name value
	if [ "$name" != "${name%:}" ]
	then
		# There is a header
		while read name && [ "$name" != "" ]
		do
			:
		done
		while read name value && [ "$name" = "" ]
		do
			:
		done
	fi
	print -- "$name $value"
	cat
	return
}
##### End of function strip_header #####



######################################################################
# Function:	extract_header
# Purpose:	Extract the header off a markdown document.
#---------------------------------------------------------------------
function extract_header {
	typeset name value
	read -r name value
	if [ "$name" != "${name%:}" ]
	then
		print -r -- "$name value"
		while read name && [[ -n "$name" ]]
		do
			print -r -- "$name"
		done
		return 0
	fi
	return 1
}
##### End of function extract_header #####



HTML=false
WRAP=""
while getopts -a "$arg0" "$USAGE" option
do
	case "$option" in
		h)	HTML=true ;;
		w)	HTML=true ; WRAP="$OPTARG" ;;
	esac
done
unset option
typeset status=0

shift $((OPTIND - 1))

if (( $# < 1 || $# > 2 ))
then
	getopts -a "$arg0" "$USAGE" option "-?"
fi
if (( $# == 1 )) && [[ "$1" == "get" ]]
then
	print "$arg0: Must specify what to get." 1>&2
	getopts -a "$arg0" "$USAGE" option "-?"
fi

case "$1" in
	get)
		value="$(extract_value "$2")" || exit 1
		# Print snippets, with one carriage return at the end
		[[ -n "$WRAP" ]] && print -n "<$WRAP>"
		if $HTML
		then
			print -n -- "$value" |
			sed -E -e 's/&/\&amp;/g' -e 's/</\&lt;/g' -e 's/>/\&gt;/g' \
			    -e 's/"/\&quot;/g' -e "s/'/\\&apos;/g"
		else
			print -n -- "$value"
		fi
		[[ -n "$WRAP" ]] && print -n "</$WRAP>"
		print
		;;
	head|header)
		extract_header
		;;
	body|strip)
		strip_header
		;;
esac

exit 0
