#!/bin/csh -f
#
# compare - build a table for comparison between group A and group B
#    -- one row for each pattern (defined by patfile)
#    -- columns for A and B
#    -- columns for ratio of B/A
#
# Part of
#              The STARFISH Parallel file-system simulator
#        (Simulation Tool for Advanced Research in File Systems)
# 
#                               David Kotz
#                           Dartmouth College
#                              Version 3.0
#                              October 1996
#                          dfk@cs.dartmouth.edu

set usage='usage: compare patfile dirA/groupnameA dirB/groupnameB\
 where a "groupname" will be used to select TWO directories from "dir". \
 For example: cache.nosort.contig might select \
   8.1280.cache.nosort.contig.16.0 8192.1280.cache.nosort.contig.16.0\
 if all configs in that directory are *.1280.*.16.0.'

set P=`runbin`	# where to find awk scripts

# fixed, for now
set nio=16
set ncomp=16
set ndisk=16

if ($#argv != 3) then 
 echo $usage:q
 exit 1
endif

set patfile="$1"
if (! -r $patfile) then
    echo $patfile not found
    exit 1
endif
set pats=(`cat $patfile`)

set A="$2"
set Adir="$A:h"
set Asel="$A:t"
if ("$A" !~ */* || ! -d "$Adir") then
 echo $usage:q
 exit 1
endif

set B="$3"
set Bdir="$B:h"
set Bsel="$B:t"
if ("$B" !~ */* || ! -d "$Bdir") then
 echo $usage:q
 exit 1
endif

echo looking for $Asel in $Adir
set Adirs=($Adir/*$Asel*.$ndisk.*/results)
if ($#Adirs != 1 && $#Adirs != 2) then
    echo found $#Adirs not 1 or 2
    echo $Adirs
    exit 1
endif
if (! -d $Adirs[1]) then
    echo missing directory $Adirs[1]
    exit 1
endif
if ($#Adirs == 2) then
	if (! -d $Adirs[2]) then
	    echo missing directory $Adirs[2]
	    exit 1
	endif
endif

echo looking for $Bsel in $Bdir
set Bdirs=($Bdir/*$Bsel*.$ndisk.*/results)
if ($#Bdirs != 1 && $#Bdirs != 2) then
    echo found $#Bdirs not 1 or 2
    echo $Bdirs
    exit 1
endif
if (! -d $Bdirs[1]) then
    echo missing directory $Bdirs[1]
    exit 1
endif
if ($#Bdirs == 2) then
	if (! -d $Bdirs[2]) then
	    echo missing directory $Bdirs[2]
	    exit 1
	endif
endif

if ($#Adirs != $#Bdirs) then
    echo mismatch: found $#Adirs in $Adir and $#Bdirs in $Bdir
    echo $Adirs
    echo $Bdirs
    exit 1
endif

#######################################################
# now start pulling things together

set tmp=/tmp/compare$$
set data=$tmp/data
set cvfile=$tmp/cv
set trials=$tmp/trials

# goto reawk

rm -rf $tmp
onintr cleanup
mkdir $tmp

# put list of patterns in data.0
sed '/^$/d' $patfile > $data.0

if ($#Adirs == 1) then
    set groups=(1)
    echo > $data.2
else
    set groups=(1 2)
endif

# there are one or two dirs in Adirs and in Bdirs.  Work on each pair in turn.
foreach group ($groups)
    # put the data for group G  in dataG
    set datafile=$data.$group
    echo "Comparing $Adirs[$group]"
    echo "with      $Bdirs[$group]"

    foreach pat ($pats)
	# we don't do 8-byte runs for these patterns
	if (($Adirs[$group] =~ */8.* || $Bdirs[$group] =~ */8.*)\
		  && ($pat =~ w?n || $pat == wrlw)) then
	    echo x x x no >> $datafile
	else
	    # make lists of the data files for A and B
	    set Adata=($Adirs[$group]/$pat.$nio.$ncomp.*.data)
	    set Bdata=($Bdirs[$group]/$pat.$nio.$ncomp.*.data)

	    if ($#Adata != $#Bdata) then
		echo Different number of trials:
		echo "  " $#Adata $Adirs[$group]/$pat
		echo "  " $#Bdata $Bdirs[$group]/$pat
		set nonomatch
		set Adata=($Adirs[$group]/$pat.$nio.$ncomp.1.data)
		set Bdata=($Bdirs[$group]/$pat.$nio.$ncomp.1.data)
		unset nonomatch
		if ($#Adata != $#Bdata) then
		    echo Skipping.
		    echo x x x no >> $datafile
		    continue
		else
		    echo Using only trial 1 from each.
		endif
	    endif

	    echo $#Adata >> $trials

	    set a=$tmp/$pat.A
	    set b=$tmp/$pat.B
	    set both=$tmp/$pat.both

	    # grab the tthruput for each trial of each case
	    cat $Adata | colex 13 > $a
	    cat $Bdata | colex 13 > $b
	    # align the two cases, convert to MB/s, compute ratio
	    abut $a $b | dm x1/1024. x2/1024. x2/x1 > $both

	    # does the ratio represent statistically significant difference?
	    if ($#Adata > 1) then
		set sig=`significant $both`
	    else
		set sig=yes
	    endif

	    # put avgs in datafile, abutting "yes" or "no" from significant
	    avg a < $both | dm s1 s2 s3 \'$sig[1]\' >> $datafile
	    # put cv (of avg of ratios) in cvfile
	    colex 3 < $both | avg c >> $cvfile
	endif
    end
end

###########################################
# build table.tex

reawk: 

echo > table.tex
echo 'Computed table on' >> table.tex
date                     >> table.tex
echo '\\'                >> table.tex
echo ''                  >> table.tex
echo '{\bf Throughput in MB/s\\' >> table.tex
echo "A = $A\\" >> table.tex
echo ''                  >> table.tex
echo "B = $B}\\" >> table.tex
echo ''                  >> table.tex
echo "Nio = $nio\\"      >> table.tex
echo "Ncomp = $ncomp\\"  >> table.tex
echo "Ndisk = $ndisk\\"  >> table.tex
echo '\bigskip'          >> table.tex
echo ''                  >> table.tex

# Now, run it through awk to get it formatted right
abut $data.[0-2] | nawk -f $P/compare.awk ngroups=$#groups >> table.tex

set mintrials=`stats min < $trials`
set maxtrials=`stats max < $trials`
echo '' >> table.tex
echo '{\small Number of trials: max '$maxtrials', min '$mintrials'.}\\' >> table.tex

if ($maxtrials > 1) then
	set maxcv=`stats max < $cvfile`
	echo '' >> table.tex
	echo '{\small Maximum coefficient of variation on average of ratios was '$maxcv'.}\\' >> table.tex
	echo '{\small Ratios in {\em italics\/} do not represent a statistically significant difference at the 95\% confidence level;\\ all others do.}\\' >> table.tex
endif

echo output is in table.tex

# save a local copy for debugging
#  rm -rf compare-data
#  cpdir $tmp compare-data

cleanup:
rm -fr $tmp
