#!/bin/ksh
# deduplicaate - locate duplicate files and act on them
# Author: Perette Barella
# @(#) $Id: deduplicate 35 2018-09-13 20:31:44Z perette $

arg0=$(basename "$0")

# Open file with window manager
# Author: Perette Barella
# Copyright 2018 Devious Fish.  All rights reserved.
# $Id: launch_document 58 2020-09-27 15:55:59Z perette $

# Korn/zsh shell find a utility to implement something
# Author: Perette Barella
# Copyright 2018 Devious Fish.  All rights reserved.
# $Id: find_implementation 58 2020-09-27 15:55:59Z perette $

function find_implementation {
	typeset impl utility
	for impl
	do
		utility="${impl%%[ 	]*}"
		if whence -p "$utility" >/dev/null 2>&1
		then
			print -- "$impl"
			return 0
		fi
	done
	return 1
}


# Parameters: List of documents or URLs to open.
# Returns: 0 on success, non-0 on error.
function launch_document {
	typeset opener document
	if [[ $(uname -s) == "Darwin" ]]
	then
		opener="open %s"
	else
		# Redirects prevent application X spooge all over the terminal
		if ! opener=$(find_implementation \
			"gnome-open %s >/dev/null 2>&1" \
			"xdg-open %s >/dev/null 2>&1" \
			"gvfs-open %s >/dev/null 2>&1")
		then
			print "$arg0: No file opening utility found for this platform."
			return 1
		fi
	fi
	for document
	do
		typeset cmd=$(printf "$opener" \'"$document"\')
		eval "$cmd"
		typeset result=$?
		# If the command fails and involves a redirect, perform it
		# again without the redirect to show the error.
		if (( result != 0 )) && [[ $opener == *">"* ]]
		then
			eval "${cmd%%">"*}"
		fi
		
	done
	return 0
}

# Korn shell/zsh require
# Author: Perette Barella
# Copyright 2018 Devious Fish.  All rights reserved.
# $Id: require 61 2021-08-09 18:49:07Z perette $

function require {
	typeset requirement
	integer result=0
	for requirement
	do
		if ! whence -p "${requirement%:*}" >/dev/null 2>&1
		then
			print -- "$arg0${arg0+: }${requirement#*:} not found; please install." 1>&2
			result=1
		fi
	done
	(( $result != 0 )) && exit $result
	return 0
}


function usage {
	print -- "$arg0: Duplicate file identifier and deduplication.
Usage: $arg0 [-h] [-i] [-l] directory ...
Options:
  -h       Display this extended help message.
  -l       Removes duplicates by hard linking identical files together.
  -i       Interactive mode: prompts user for action to take on each file." 1>&2
}

function user_interaction {
	join="Join"
	while true
	do
		[ "$1" -ef "$2" ] && print "Files are hard linked." && join=""
		select action in "Keep only $1" "Keep only $2" "Keep both" $join "View/Open"
		do
			if [ "$action" = "" ]
			then
				print "Unknown action."
				continue
			fi
			case "$action" in
				"Keep only $1")
					rm "$2"
					return 0 ;;
				"Keep only $2")
					rm "$1"
					return 0 ;;
				"Keep both")
					return 0 ;;
				"Join")
					ln "$1" "$2.$arg0.$$" &&
						mv -f "$2.$arg0.$$" "$2" &&
						return 0
					join=""
					;;
				View/Open)
					launch_document "$1"
					;;
				*)
					print "$arg0: Unknown action '$action'."
					;;
			esac
		done
	done
	return 0
}


LINK=false
INTERACTIVE=false

while getopts 'hli?' option
do
	case "$option" in
		h)	usage ;;
		l)	LINK=true ;;
		i)	INTERACTIVE=true ;;
	esac
done
shift $(($OPTIND - 1))

require realpath

last_checksum=""
last_size=""
last_filename=""
dupe_count=0
byte_count=0

for directory
do
	if [ -d "$directory" ]
	then
		find $(realpath "$directory") -type f -print0 | xargs -0 cksum
	elif [ ! -a "$directory" ]
	then
		print -- "$directory: Does not exist." 1>&2
	else
		print -- "$directory: Not a directory." 1>&2
	fi
done | sort | while read checksum size filename
do
	if [ "$last_checksum" = "$checksum" -a \
	     "$last_size" = "$size" -a \
	     \( ! "$last_filename" -ef "$filename" -o "$INTERACTIVE" = "true" \) ]
	then
		if cmp -s "$last_filename" "$filename"
		then
			print -- "$filename: duplicates $last_filename"
			if [ ! "$last_filename" -ef "$filename" ]
			then
				let "byte_count += size"
				let "dupe_count++"
				if $LINK
				then
					ln "$last_filename" "$filename.$arg0.$$" &&
						mv -f "$filename.$arg0.$$" "$filename"
				fi
			fi
			if $INTERACTIVE
			then
				user_interaction "$last_filename" "$filename" </dev/tty
			fi
		fi
	fi
	# Don't shift file data if it was deleted by interactive mode.
	if [ -f "$filename" ]
	then
		last_checksum="$checksum"
		last_size="$size"
		last_filename="$filename"
	fi
done

print -- "$arg0: $dupe_count duplicates wasting $byte_count bytes." 1>&2
if [ "$LINK" = "false" -a "$INTERACTIVE" = "false" ]
then
	print -- "$arg0: No changes made; run with -l option to hard-link files."
fi

