#!/bin/bash

# Uses 'tidy' to find XML errors in HTML files

export IFS=$'\n'		# separate tokens by newline only (needed so 'find' command works with filenames having spaces)
TOTAL_COUNT=0
ERR_COUNT=0
WARN_COUNT=0
SHOW_WARNINGS=1
SHOW_OK=1
VERBOSE=0
FILES=

while getopts 'ewvh' OPTION; do
	
	case $OPTION in
		e) 	SHOW_WARNINGS=0
			SHOW_OK=0
			;;
			
		w)	SHOW_WARNINGS=1
			SHOW_OK=0
			;;
			
		v) 	VERBOSE=1
			;;
	
		h|?) 	printf "Usage: %s [-e|w] [-v] [-h] [root directory]\n\n" $(basename $0) >&2
			printf "      Uses 'tidy' command to find XML formatting errors in .html files.\n\n" >&2
			printf "  -e  Show only files with errors\n" >&2
			printf "  -w  Show only files with errors or warnings\n" >&2
			printf "  -v  Verbose; show error and warning messages\n" >&2
			printf "  -h  Help\n\n" >&2
			exit 2
			;;
	esac
done
shift $(($OPTIND - 1))
FILES=$*

if [[ "$FILES" == "" ]]; then
	FILES="."
fi

for i in $(find $FILES -path "*.html"); do
	(( TOTAL_COUNT += 1 ))
	TIDY_OUT=`tidy -xml -e -q $i 2>&1`
	ERROR_CODE=$?
	if [[ "$ERROR_CODE" == "2" ]]; then
		(( ERR_COUNT += 1))
		echo "ERRORS!   $i" >&2
		if [[ $VERBOSE == 1 ]]; then
			echo $TIDY_OUT >&2
			echo >&2
		fi

	elif [[ "$ERROR_CODE" == "1" ]]; then
		(( WARN_COUNT += 1 ))
		if [[ $SHOW_WARNINGS == 1 ]]; then
			echo "WARNINGS! $i" >&2
			if [[ $VERBOSE == 1 ]]; then
				echo $TIDY_OUT	>&2
				echo	>&2
			fi
		fi
		
	else
		if [[ $SHOW_OK == 1 ]]; then
		 	echo "OK        $i"
		fi
	fi
done

OK_COUNT=$(( TOTAL_COUNT - ERR_COUNT - WARN_COUNT ))

PERCENT_ERR=$(( ERR_COUNT * 100 / TOTAL_COUNT ))
PERCENT_WARN=$(( WARN_COUNT * 100 / TOTAL_COUNT ))
PERCENT_OK=$(( OK_COUNT * 100 / TOTAL_COUNT ))

echo
printf "               Total files: %4d\n" $TOTAL_COUNT
echo
printf "                  OK files: %4d %3d%%\n" $OK_COUNT $PERCENT_OK
printf "         Files with errors: %4d %3d%%\n" $ERR_COUNT $PERCENT_ERR
printf "  Files with warnings only: %4d %3d%%\n" $WARN_COUNT $PERCENT_WARN
echo
