#!/usr/bin/perl

# Copyright 2010, by David A. Burton
# Cary, NC  USA
# +1 919-481-0149
# Email: http://www.burtonsys.com/email/
# Permission is hereby granted to use this function for any purpose,
# with or without modification, provided only that this copyright &
# permission notice is retained.

# You can use a "require" to pull this into another Perl program, like this:
#
#   @INC = ('.','..');
#   require "composite_sd.pl";
#
# It defines four functions:
#
# &composite_SD is a function to calculate the overall standard deviation of
# a collection of sample groups, given only the group standard deviations,
# means, and sample counts.
#
# &sample_SD is a simple function to calculate the standard deviation of a
# collection of samples.
#
# &sum and &avg (to calculate summation and simple average, respectively)
# are also defined.
#
# These functions are compatible both with 32-bit Perl 4.036, and with Perl 5.



if (!defined $debugmode) {
   $debugmode=0;  # set it >0 for debug prints
}


# Input is an array of samples; result is the summation.
sub sum {
   local( $sum ) = 0;
   foreach $i (@_) {  # note that $i is implicitly local to the loop
      $sum += $i;
   }
   return $sum;
}


# Input is an array of samples; result is the mean.
sub avg {
   if (-1 == $#_) { die "ERR: \&avg( empty list )\n"; }
   return &sum(@_) / (1+$#_)
}


# input is an array of samples; result is the standard deviation
sub sample_SD {
   local( @samples ) = @_;
   local( $mean ) = &avg( @samples );
   local( $sum_of_squared_deviations ) = 0;
   if ($#samples <= 0) {
      return 0;
   } else {
      foreach $i (@samples) {
         $sum_of_squared_deviations += (($i - $mean) * ($i - $mean));
      }
      return sqrt( $sum_of_squared_deviations / $#samples );
   }
}


# Calculate combined standard deviation via ANOVA (ANalysis Of VAriance)
# See:  http://www.burtonsys.com/climate/composite_standard_deviations.html
# Inputs are:
#   $G, the number of groups
#   @means, the array of $G group means
#   @SDs, the array of $G group standard deviations
#   $ncount or @ncounts -- number of samples in each group (can be scalar
#                          if all groups have same number of samples)
# Result is the overall standard deviation.
sub composite_SD {
   local( $G ) = shift @_;
   local( @means ) = splice( @_, 0, $G );
   local( @SDs ) = splice( @_, 0, $G );
   local( @ncounts );
   local( $i );
   if (0 == $#_) {
      for ($i=($G-1); $i >= 0; $i--) {
         $ncounts[$i] = $_[0];
      }
   } else {
      @ncounts = @_;
   }
   if (   ($G != ($#means + 1))
       || ($G != ($#SDs + 1))
       || ($G != ($#ncounts + 1)) ) {
      die "ERR: sub composite_SD called w/ wrong number of parameters [$G,$#means,$#SDs,$#ncounts]\n";
   }
   # Okay, we have the parameters, now.

   # calculate total number of samples, N, and grand mean, GM
   local($N) = &sum(@ncounts);  # total number of samples
   if ($N <= 1) {
      print "Warning: only $N samples, SD is incalculable\n";
      return -1;
   }
   local($GM) = 0;
   for ($i=0; $i<$G; $i++) {
      $GM += ($means[$i] * $ncounts[$i]);
   }
   $GM /= $N; # grand mean

   # calculate Error Sum of Squares
   local($ESS) = 0;
   for ($i=0; $i<$G; $i++) {
      $ESS += (($SDs[$i])**2) * ($ncounts[$i] - 1);
   }

   # calculate Total Group Sum of Squares
   local($TGSS) = 0;
   for ($i=0; $i<$G; $i++) {
      $TGSS += (($means[$i]-$GM)**2) * $ncounts[$i];
   }

   # calculate standard deviation as square root of grand variance
   local( $result ) = sqrt( ($ESS+$TGSS)/($N-1) );
   if ($debugmode > 0) { printf "dbg composite_SD: N = $N, GM = %4.3f, ESS = %5.1f, TGSS = %5.1f, SD = %4.3f\n", $GM, $ESS, $TGSS, $result; }
   return $result;
} #composite_SD


# return true value to 'require'
1;