#!/bin/ksh
# Archiving script for network backups
# Author: Perette Barella
# Copyright 2018 Devious Fish.  All rights reserved.
VERSION='$Id: netarchive 114 2025-12-17 18:36:40Z perette $'

arg0=$(basename "$0")
USAGE='nvap'
[[ $(getopts '[-][12:abc]' flag --abc; print -- 0$flag) == "012" ]] &&
	NAMEOPTS="-a $arg0" &&
	USAGE=$'
[-1?'$VERSION$']
[+NAME?netarchive - manage archival copies of network backups]
[+DESCRIPTION?\b\f?\f\b makes archival copies of backups on a remote
server using \brsync\b(1).  Old backups are progressively thinned as they
age.]
[+?All snapshots are retained for 24 hours.  Thereafter, a daily is retained
for a month, a weekly for a quarter, monthly for a year, and quarterly indefinitely.]
[a:all?Make snapshots of all backups present on server.]
[r:recycle?Number of expired snapshots retained for recycling.  Recycled snapshots are reused in the future, incurring only deltas to refresh and avoiding unnecessary unlinks/relinks.]#[count]
[p:purge?Purge only; do not make new snapshots.]
[v:verbose?Verbose output.]
[n:no-removal?Suppress removal of files.  Used with -v, this shows what
would be removed without actual removal.]
[+EXIT STATUS?0 on success, non-0 on error.]
[+SEE ALSO?\bnetbackup\b(1)]

backup-host

[-author?Perette Barella <perette@deviousfish.com>]
'

# Determine which date command we have
BSDDATE=false
date -j >/dev/null 2>&1 && BSDDATE=true

function adjusted_date {
	typeset date="$1" format="${3:-+%Y%m%d.%H%M}"
	integer duration=$2 seconds
	if $BSDDATE
	then
		seconds=$(date -j "${date:5:2}${date:8:2}${date:11:2}${date:14:2}${date:0:4}" '+%s') || return 1
		let "seconds += duration * 86400"
		date -r $seconds "$format"
	else
		typeset sign=+
		((duration < 0)) && sign=- && let "duration = -duration"
		result=$(date -d "${date} $sign $duration days" "$format")
		date -d "${date} $sign $duration days" "$format"
	fi
}


function date_test {
	typeset start="$1" offset="$2" expected="$3" result
	typeset expect="${expected:0:4}${expected:5:2}${expected:8:2}.${expected:11:2}${expected:14:2}"
	result=$(adjusted_date "$start$(date '+%z')" "$offset")
	if [[ "$result" != "$expect" ]]
	then
		print "$result != $expect."
		return 1
	fi
	return 0
}
	
function perform_unit_tests {
	integer status=0 result

	export TZ="UTC0"
	date_test '2018-07-18 17:02' 1 '2018-07-19 17:02' || status=1
	date_test '2018-07-18 17:02' -1 '2018-07-17 17:02' || status=2
	date_test '2018-07-18 17:02' 31 '2018-08-18 17:02' || status=3
	date_test '2018-07-18 17:02' -31 '2018-06-17 17:02' || status=4
	
	[ $status -eq 0 ] && print "Tests passed."
	exit $status
}


function find_expired_archives {
	# Modifies global EXPIRED_ARCHIVES
	typeset archive="$1"
	typeset file="/var/tmp/$arg0.$$.dates.tmp"

	typeset now="$(date '+%Y-%m-%d %H:%M%z')"
	float yesterday=$(adjusted_date "$now" -1 '+%Y%m%d.%H%M')
	integer last_month=$(adjusted_date "$now" -31 '+%Y%m%d')
	integer last_quarter=$(adjusted_date "$now" -90 '+%Y%m%d')
	integer last_year=$(adjusted_date "$now" -366 '+%Y%m%d')
	integer count=0

	if ssh $SERVER ls -1 "$archive" | sort > "$file"
	then
		$VERBOSE && print -- "Purging $archive:" 1>&2
		typeset arcdate last=20000101.0000 next
		integer interval
		while read arcdate
		do
			if [[ "$arcdate" != {8}([0-9]).{4}([0-9]) ]]
			then
				[[ "$arcdate" != "in_progress" ]] &&
					print "$arcdate: cruft in archive history" 1>&2
				continue
			fi
			if (( arcdate < last_year ))
			then
				interval=90
			elif (( arcdate < last_quarter ))
			then
				interval=30
			elif (( arcdate < last_month ))
			then
				interval=7
			elif (( arcdate < yesterday ))
			then
				interval=1
			else
				$VERBOSE && print "$arcdate: Recent backups retained." 1>&2
				break
			fi
			next="$(adjusted_date "${last:0:4}-${last:4:2}-${last:6:2} ${last:9:2}:${last:11:2}$(date '+%z')" $interval '+%Y%m%d.%H%M')" || break;
			if (( arcdate < next ))
			then
				$VERBOSE && print -- "$arcdate: within $interval days of $last, is prunable" 1>&2
				EXPIRED_ARCHIVES[count++]="$archive/$arcdate"
			else
				$VERBOSE && print "$arcdate: more than $interval days from $last, retaining" 1>&2
				last="$arcdate"
			fi
		done < "$file"
	fi
	rm -f "$file"
	return 0
}




function perform_snapshot {
	typeset source="$1" target="$2"
	typeset snapshot="$target/in_progress"

	find_expired_archives "$target"
	integer expired_count=${#EXPIRED_ARCHIVES[@]}

	# Prep a directory for the snapshot
	if ! $SNAPSHOT
	then
		# Target not needed
		$VERBOSE && print "Purge-only mode: Skipping snapshot creation." 1>&2
	elif ssh "$SERVER" test -d "$snapshot"
	then
		# Reuse existing in-progress directory
		$VERBOSE && print "Reusing existing in_process snapshot." 1>&2
	elif (( expired_count > 0 )) &&
	     ssh "$SERVER" mv "${EXPIRED_ARCHIVES[expired_count-1]}" "$snapshot"
	then
		$VERBOSE &&
			print "Recycling snapshot ${EXPIRED_ARCHIVES[expired_count-1]}" 1>&2
		unset EXPIRED_ARCHIVES[expired_count-1]
	else
		$VERBOSE && print "Creating fresh snapshot." 1>&2
		ssh "$SERVER" mkdir -p "$snapshot" || return 1
	fi


	# Create or update the snapshot
	typeset snapshot_ok=false
	if $SNAPSHOT
	then
		typeset command="ssh '$SERVER' rsync $VERBOSEFLAG -a -H --fuzzy --delete-after --force --link-dest '../../../Backups/$arc/' '$source' '$snapshot'"
		$VERBOSE && print -- "$arg0: Executing $command" 1>&2
		if eval "$command"
		then
			now="$(date '+%Y%m%d.%H%M')"
			if ssh "$SERVER" mv "$snapshot" "$target/$now"
			then
				snapshot_ok=true
				$VERBOSE && print "$arg0: Created snapshot $now." 1>&2
			fi
		fi
	else
		snapshot_ok=true
	fi

	if $snapshot_ok
	then
		integer status=0
		while expired_count=${#EXPIRED_ARCHIVES[@]} ; (( expired_count > RECYCLE))
		do
			$VERBOSE && print "Removing expired archive: ${EXPIRED_ARCHIVES[expired_count-1]}" 1>&2
			if $REMOVE
			then
				ssh "$SERVER" rm -rf "${EXPIRED_ARCHIVES[expired_count-1]}" ||
					status=$?
			fi
			unset EXPIRED_ARCHIVES[expired_count-1]
		done
		typeset retained
		for retained in "${EXPIRED_ARCHIVES[@]}"
		do
			$VERBOSE && print "Retained for recycling: $retained" 1>&2
		done
		return $status
	fi
	return 1
}


# Ensure numbers with decimal points are parsed correctly.
export LC_NUMERIC=C

VERBOSE=false
VERBOSEFLAG=""
REMOVE=true
ALL=false
SNAPSHOT=true
RECYCLE=0

while getopts $NAMEOPTS "$USAGE" option
do
	case "$option" in
	    a)
		ALL=true
		;;
	    n)
		REMOVE=false
		;;
	    r)	RECYCLE="$OPTARG"
		if ((RECYCLE < 0))
		then
			RECYCLE=0
		fi
		;;
	    p)
		SNAPSHOT=false
		;;
	    v)
		VERBOSE=true
		VERBOSEFLAG="--itemize-changes"
		;;
	esac
done

shift $((OPTIND - 1))
if (( $# != 1 ))
then
	OPTIND=0
	getopts $NAMEOPTS "$USAGE" option --short
	exit 1
fi

SERVER="$1"

if [ "$SERVER" = "RUN_UNIT_TEST_NOW" ]
then
	perform_unit_tests
fi

if $ALL
then
	ARCHIVES=$(ssh "$SERVER" ls -1 Archive)
else
	ARCHIVES=$(uname -n | cut -d. -f1)
fi

status=0
# Make a fresh copy of each of the archives, and date them.
for arc in $ARCHIVES
do
	$VERBOSE && print "Processing $arc:" 1>&2

	# Create or reset a global variable
	unset EXPIRED_ARCHIVES
	typeset -a EXPIRED_ARCHIVES

	perform_snapshot "Backups/$arc/" "Archive/$arc" || status=$?
done

exit $status

