#!/bin/bash
#                       /usr/local/bin/doc2html
#  http://crystalfaeries.net/posix/bin/doc2html
# celeste:crystalfaery DOC2HTML 2020-07-16 05:28:14+00:00

# Invocation syntax:
# doc2html [TARGET_DIR] [absolute paths of files and/or directories]
# doc2html           	[absolute paths of files first, directories next]
#			(defaults to TARGET_DIR of ~/documents/html/)
# Invoked withOUT Arguments, Process ALL  FILES in ~/documents/docx/
# The option -i (for In-Place) will output into the same directory as source file(s):
# doc2html   -i	[absolute paths of files and/or directories]
let help=12	# the number of this line - 1

if [ \! -s "${HOME}"/.tidyrc ]
then					# create defaults for tidy if missing
	cat > "${HOME}"/.tidyrc << EOF
clean: yes
drop-empty-paras: yes
drop-proprietary-attributes: yes
force-output: yes
indent: auto
input-encoding: utf8
join-classes: yes
join-styles: yes
output-encoding: utf8
output-html: yes
quiet: yes
show-body-only: yes
EOF
fi

TARGET="${HOME}"/documents/html/	# default output directory
false					# default output directory
let INPLACE=$?				# default output directory

case "${1}" in
-v | --version)
	tail -n +4 $0 | head -n 1
	exit	$?
	;;
-h | --help)
	head -n $help $0
	exit	$?
	;;
-i | --inplace)
	true				# output dir = input dir (per file)
	let INPLACE=$?			# output dir = input dir (per file)
	shift				# dispose the option, all remaining arguments are inputs
	;;
*)
	case	$# in
	0)	# Invoked withOUT Arguments, Auto-Supply Default Arguments
		exec $0 "${HOME}"/documents/html/ ${HOME}/documents/docx/
		exit	254	# exec failed if we returned
		;;
	*)
		TARGET="${1}"	# resolve the output directory if supplied
		if [ -d "${TARGET}" ]
		then	# user specified TARGET_DIR
			shift	# dispose outputdir argument, leaving only sources to convert
		else
			TARGET="${HOME}"/documents/html/	# default output directory
		fi
		;;
	esac
	;;
esac	# OK, all remaining arguments SHOULD BE absolute paths of source files and/or directories to convert
mkdir -p "${TARGET}"	|| exit 252	# make sure the target directory exists

for FILE in $(find $@ -type f)	# find any files in list of files and recurse directories
do
    echo "FILE:	${FILE}"				1>&2	# DEBUGGING INFO
    DIR="$(echo ${FILE}		| sed 's/\/[^\/]*$/\//g')"	# Directory name
    echo "DIR:	${DIR}"					1>&2	# DEBUGGING INFO
    BASE="$(basename ${FILE}	| sed 's/.[^.]*$//')"	# base name without extension
    echo "BASE:	${BASE}"				1>&2
    EXT="$(echo ${FILE}		| sed 's/^.*\.//g')"		# file's extension
    echo "EXT:	${EXT}"					1>&2	# DEBUGGING INFO
    echo "INPLACE: $INPLACE"				1>&2	# DEBUGGING INFO
    case "$INPLACE" in
	0)
		TARGET="${DIR}"	# output directory is input directory
#		mkdir -p "${TARGET}"	|| exit 252	# make sure the target directory exists
#		echo "INPLACE: ${TARGET}"	1>&2	# DEBUGGING INFO
		;;
	*)
		TARGET="${HOME}/documents/html/"
#		mkdir -p "${TARGET}"	|| exit 252	# make sure the target directory exists
#		echo "TARGET:	${TARGET}"	1>&2	# DEBUGGING INFO
		;;
    esac
    mkdir -p	"${TARGET}"/"${BASE}" || exit 251	# subdirectory to group all the output files
    cd	  -P	"${TARGET}"/"${BASE}" || exit 250	# output everything here for THIS file
    echo "WORKDIR: `pwd`"		1>&2			# DEBUGGING INFO
    case "${EXT}" in
	pdf | PDF)
		pdftohtml -p -s -noframes -hidden -fmt png -nomerge -nodrm	"${FILE}"	\
					"${TARGET}"/"${BASE}"/"${BASE}".html
		/usr/local/bin/optimizeimg	.	# optimize the .png files we just extracted
		tidy -f	"${BASE}".err	"${TARGET}"/"${BASE}"/"${BASE}".html >	"${BASE}".htm
		firefox			"${TARGET}"/"${BASE}"/"${BASE}".html	"${BASE}".htm
		;;
	epub | EPUB)
		epub2html -o		"${TARGET}"/"${BASE}"			"${FILE}"
		/usr/local/bin/optimizeimg	.	# optimize the .png files we just extracted
		tidy -f	"${BASE}".err	"${TARGET}"/"${BASE}"/"${BASE}".html >	"${BASE}".htm
		firefox			"${TARGET}"/"${BASE}"/"${BASE}".html	"${BASE}".htm
		;;
	docx | DOCX)
		docx2txt <	"${FILE}" >	"${BASE}".txt
		gvim				"${BASE}".txt		# view the results
		abiword	-t html	      --to-name="${BASE}".html	\
						"${BASE}".txt
		/usr/local/bin/optimizeimg	.	# optimize the .png files we just extracted
		firefox				"${BASE}".html		# view the results
#		tidy -f	"${BASE}".err		"${BASE}".html	>	\
#						"${BASE}".htm
#		firefox				"${BASE}".htm		# view the results
		;;
	rtf | RTF)
		unrtf --quiet	"${FILE}" >	"${BASE}".html
		/usr/local/bin/optimizeimg	.	# optimize the .png files we just extracted
		firefox				"${BASE}".html		# view the results
#		tidy -f	"${BASE}".err		"${BASE}".html	>	\
#						"${BASE}".htm
#		firefox				"${BASE}".htm		# view the results
		;;
	*)
		abiword	-t html	      --to-name="${BASE}".html	"${FILE}"
		/usr/local/bin/optimizeimg	.	# optimize the .png files we just extracted
#		firefox				"${BASE}".html		# view the results
		tidy -f	"${BASE}".err		"${BASE}".html	|	\
		sed 's/ class="c[0-9]*"//g' >	"${BASE}".htm
		firefox				"${BASE}".htm		# view the results
		;;
    esac
done
exit $?
