#!/usr/bin/perl
#
# $Id: dshbak 1240 2010-10-22 00:11:43Z mark.grondona $
# $Source$
# 
require 5.003;

use Getopt::Std;

getopts('c') or usage();

#
# Stdin consists of lines of the form "hostname: output...".
# Store these in a hash, keyed by hostname, of lists of lines.
#
while (<>) {
	($tag, $data) = m/^\s*(\S+?)\s*: ?(.*\n)$/;
	push(@{$lines{$tag}}, $data);
}

#
# If -c was specified, hosts with identical output are displayed as a list
# of hosts followed by one copy of the output.
#
if ($opt_c) {
	foreach $tag (sortn(keys %lines)) {		# look thru each host
		next if (!defined($lines{$tag}));	# skip deleted keys
		@identical = ();			# init list of matches
		foreach $tag2 (keys %lines) {	
			next if ($tag2 eq $tag);	# skip over myself
			if (cmp_list(\@{$lines{$tag}}, \@{$lines{$tag2}})) {
				push(@identical, $tag2);# equal?  stash match
				delete($lines{$tag2});	# delete data from hash
			}
		}
		print("----------------\n");		# header: list of hosts
		printf("%s\n", 				# plus myself
		    join(",", compress(sort(@identical, $tag)))); 
		print("----------------\n");
		foreach $data (@{$lines{$tag}}) {	# lines of data (once)
			print($data);
		}
	}
#
# If no -c, all hosts appear individually with their output.
#
} else {

	foreach $tag (sortn(keys %lines)) {
		print("----------------\n");		# header: one host
		print("$tag\n");
		print("----------------\n");
		foreach $data (@{$lines{$tag}}) {	# lines of data
			print($data);
		}
	}
}

#
# Compare two lists-o-strings
#	\@l1 (IN)	list1
#	\@l2 (IN)	list2
#	RETURN		1 if match, 0 if not
#
sub cmp_list
{
	my ($l1, $l2) = @_;
	my ($i, $retval);

	$retval = 1;

	if ($#{$l1} != $#{$l2}) {
		return 0;
	}
	for ($i = 0; $i <= $#{$l1} && $retval == 1; $i++) {
		if (!defined(${$l2}[$i]) || ${$l1}[$i] ne ${$l2}[$i]) {
			$retval = 0;
		}
	}

	return $retval;
}

sub usage
{
	printf STDERR ("Usage: dshbak [-c]\n");
}


sub compress 
{
	my %suffixes = ();
	my @list = ();
 
	#   Each suffix key points to a list of hostnames with corresponding
	#    suffix stripped off.
	push (@{$suffixes{$$_[1]}}, $$_[0]) 
	   for map { [/(.*?\d*)(\D*)$/] } sortn (@_);

	#
	#   For each suffix, run compress on hostnames without suffix, then
	#    reapply suffix name.
	for my $suffix (keys %suffixes) {
	    map { push (@list, "$_$suffix") } 
	        compress_inner (@{$suffixes{$suffix}}); 
	}

	local $"=",";
	return wantarray ?  @list : "@list";
}


sub compress_inner
{
	my %rng = comp(@_);
	my @list = ();

	local $"=",";

	@list = map {  $_ .
		      (@{$rng{$_}}>1 || ${$rng{$_}}[0] =~ /-/ ?
		                "[@{$rng{$_}}]" :
				 "@{$rng{$_}}"
		      )
	            } sort keys %rng;

	return wantarray ? @list : "@list";
}

#
#  Return the zeropadding, if any, of $n: 
#
sub zeropadding
{
   my ($n) = @_;
   return (($n =~ /^0/) and ($n ne "0")) ? length $n : 0;
}

sub comp
{
	my (%i) = ();
	my (%s) = ();

	# turn off warnings here to avoid perl complaints about 
	# uninitialized values for members of %i and %s
	local ($^W) = 0;


	for my $host (sortn (@_)) {
		my ($p, $n) = $host =~ /(.*?)(\d*)$/;
		my $zp = &zeropadding ($n);
		#
		#  $s{$p} is a reference to an array of arrays
		#   that indicate individual range elements of
		#   the form [ N_start, N_end]. If only one element
		#   is present then the range element is a singleton.
		#
		#  $i{$p}{$zp}${n} tracks the index of prefix $p and suffix $n
		#   with zero-padding $zp into the @{$s{$p}} array.

		#
		#  See if $n-1 exists in the $s{$p} array (with same zero-pad)
		#
		my $idx = $i{$p}{$zp}{$n-1};
		if (defined $idx) {
			#
			#  $n - 1 is already in array, so update END:
			#
			$s{$p}[$idx][1] = "$n";
			$i{$p}{$zp}{$n-0} = $idx;
		}
		else {
			#
			#   Otherwise, we create a new single entry
			#    and update $i{} (Use $n-0 to force a number)
			#
			push (@{$s{$p}}, [ $n ]);
			$i{$p}{$zp}{$n-0} = $#{$s{$p}};
		}
	}

	#
	#
	#  Now return $s{} as a hash of prefixes with a list of range elemts:
	#   e.g. $s{"host"} = [ "1-10", "25", "100-101" ]
	#
	for my $key (keys %s) {
		@{$s{$key}} =
			map { $#$_>0 ? "$$_[0]-$$_[$#$_]" : "$$_[0]" }  @{$s{$key}};
	}
	return %s;
}

# sortn:
#
# sort a group of alphanumeric strings by the last group of digits on
# those strings, if such exists (good for numerically suffixed host lists)
#
sub sortn
{
	map {$$_[0]} sort {($$a[1]||0)<=>($$b[1]||0)} map {[$_,/(\d*)$/]} @_;
}
