#!/bin/csh -f
#
# significant - decide whether two columns are significantly different
# 	Using a PAIRED-observations t-test, we compare column 1 vs
# column 2 of the each data file, reporting whether the difference is
# significant at the 95% confidence level.  This is optimized for use
# with many files and limited to 1..5 lines per file.
#
set usage='usage: significant [file]...'
#
# Part of
#              The STARFISH Parallel file-system simulator
#        (Simulation Tool for Advanced Research in File Systems)
# 
#                               David Kotz
#                           Dartmouth College
#                              Version 3.0
#                              October 1996
#                          dfk@cs.dartmouth.edu

if ($#argv == 0) then
	echo "$usage"
	exit 1
endif

# Compute t-statistics for 95% confidence, 1..4 degrees of freedom
# set t=()
# foreach df (1 2 3 4)
#     set t = ($t `probdist crit t $df .05`)
# end

# Here's the result; much faster!
set t=(12.706205 4.302653 3.182446 2.776445)

foreach file ($*)
    set x=`dm x1-x2 < $file | stats n mean var | dm x1 x2 'if(x3<0)then 0 else x3'`
#    echo $file $x $t
    echo $x $t | awk '{ \
    	n=$1; mean=$2; var=$3; \
    	if (n <= 1) { \
    	    if (mean+0 != 0) printf "yes"; else printf "no"; \
    	} else { \
    	    t = $(n-1+3); \
    	    lo = mean - t * sqrt(var/n); \
    	    hi = mean + t * sqrt(var/n); \
    	    if (lo * hi > 0) printf "yes"; else printf "no"; \
    	} \
    }'
    echo " "$file
end
