wd

#!/bin/sh
#
# Copyright (c) 2007, 2008 Oligem.com.  All rights reserved.
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#

# wd, word diff

usage()
{
	echo 'NAME
  wd - word differences
SYNOPSIS
  wd [-123] old_file new_file
OPTIONS
  -1	  inhibit output of deleted words
  -2	  inhibit output of inserted words
  -3	  inhibit output of common words
  -w ob	  mark beginning of old word
  -x oe	  mark end of old word
  -y nb	  mark beginning of new word
  -z ne	  mark end of new word
  -V	  print version and exit
AUTHORS
  Philippe Bergheaud and Marc Vertes' >&2

	exit 2
}

opt1=0 opt2=0 opt3=0
red=""; red="$red[01;31m"
blue=""; blue="$blue[01;34m"
green=""; green="$green[01;35m"
white=""; white="$white[00m"
optv=$green optw=$blue optx=$white opty=$red optz=$white

while getopts :123w:x:y:z:V opt
do
	case $opt in
	[123])	eval opt$opt=1 ;;
	[wxyz])	eval opt$opt=$OPTARG ;;
	V)	echo wd-0.5; exit ;;
	*)	usage ;;
	esac
done
shift $((OPTIND - 1))

[ $# -eq 2 ] || usage

if [ $opt1 -eq 0 -a $opt2 -ne 0 ]
then
	# show old: swap files and colors
	old_file=$2 new_file=$1
	opt2=0 opt1=1 opty=$optw optz=$optx
else
	old_file=$1 new_file=$2
fi

old_word_file=/tmp/old_word_file.$$
trap 'rm -f $old_word_file' EXIT

tr -s '[:blank:]' '\n' <$old_file >$old_word_file
tr -s '[:blank:]' '\n' <$new_file |
diff -B --new-group-format='n %dF %dL
' --old-group-format='o %df %dl
' --line-format= $old_word_file - |
awk -v new_file=$new_file -v old_file=$old_file \
	-v opt1=$opt1 -v opt2=$opt2 -v opt3=$opt3 -v optv="$optv" \
	-v optw="$optw" -v optx="$optx" -v opty="$opty" -v optz="$optz" '
function print_tblank(file, blank1, display, resume)
{
	if (!display) return
	if (tblank ~ /\n/) {
		if (prev_file != file && resume) {
			if (blank1)
				printf("%s%s", tblank, blank1)
			else
				printf(" ")
		} else
			printf("%s%s", tblank, blank1)
	} else {
		if (prev_file && prev_file != file)
			printf(" ")
		else
			printf("%s%s", tblank, blank1)
	}
	prev_file = file
	tblank = ""
}

function print_word(file, begin, end, blank, word, display, bmark, emark)
{
	while (W[file] != end) {
		if (i[file] > nf[file]) {
			if (getline line <file == 0) {
				print_tblank(file, "", display, 0)
				exit	# end of file, exit
			}
			split(line, blank, "[^ 	]+")	# anti-split
			if (tblank || nf[file] != -1)
				print_tblank(file, blank[1], display, 0)

			if (!blank[1] || nf[file] == -1)
				i[file] = 1	# initialize the for loop
			else
				i[file] = 2	# line starts with blank
			nf[file] = split(line, word, "[ 	]+")
			if (nf[file] && word[nf[file]] == "")
				nf[file]--	# line ends with blank
		} else
			print_tblank(file, "", display, 1)

		for (; i[file] <= nf[file]; i[file]++) {
			if (++W[file] < begin)
				continue
			if (display) {
				printf("%s%s%s", bmark, word[i[file]], emark)
				if (W[file] == end)
					tblank = blank[i[file] + !blank[1]]
				else
					printf("%s", blank[i[file] + !blank[1]])
			}
			if (W[file] == end) {
				i[file]++
				break
			}
		}
		if (display && i[file] > nf[file]) {
			if (W[file] == end)
				tblank = tblank "\n"
			else {
				print ""
				tblank = ""
			}
		}
		if (W[file] == end)
			break
	}
	w[file] = end + 1
}

# display in white
function same_group(begin, end)
{
	print_word(old_file, w[old_file], w[old_file] + end - begin,
		   oblank, oword, 0, "", "")
	print_word(new_file, begin, end, nblank, nword, !opt3, "", "")
}

# display in blue
function old_group(begin, end)
{
	if (begin > w[old_file])
		same_group(w[new_file], w[new_file] + begin - w[old_file] - 1)
	print_word(old_file, begin, end, oblank, oword, !opt1, optw, optx)
}

# display in red
function new_group(begin, end)
{
	if (begin > w[new_file]) same_group(w[new_file], begin - 1)
	print_word(new_file, begin, end, nblank, nword, !opt2, opty, optz)
}

BEGIN {
	# index of old word read, to be printed
	W[old_file] = 0; w[old_file] = 1

	# index of new word read, to be printed
	W[new_file] = 0; w[new_file] = 1

	# initialize the for loop on old lines
	i[old_file] = 0; nf[old_file] = -1

	# initialize the for loop on new lines
	i[new_file] = 0; nf[new_file] = -1

	oblank[1] = ""; oword[1] = ""	# old blank and old word arrays
	nblank[1] = ""; nword[1] = ""	# new blank and new word arrays
	
	# trailing blank, replaced by a space after the last word
	# of an old group when followed by a word on the same line
	tblank = ""
	prev_file = ""		# previous file
	print optv "### wd " optw old_file " " opty new_file optx
}
/^n/ { new_group($2, $3) }	# display common and new words
/^o/ { old_group($2, $3) }	# display common and old words
END {				# display common words, up to the end of file
	if ((!opt1 || !opt2) && opt3)
		print ""
	print_word(new_file, w[new_file], -1, nblank, nword, !opt3, "", "")
}' |
if [ -t 1 ]
then
	less -CimqGrX -j9 -h0 +/'\[01'; echo -n ''
else
	cat
fi
echo -n '' # force color reset

Copyright © 2008, Oligem
html css