#!/usr/bin/perl
# Copyright 2010, by David A. Burton
# Cary, NC USA
# +1 919-481-0149
# Email: http://www.burtonsys.com/email/
# Permission is hereby granted to use this function for any purpose,
# with or without modification, provided only that this copyright &
# permission notice is retained.
# You can use a "require" to pull this into another Perl program, like this:
#
# @INC = ('.','..');
# require "composite_sd.pl";
#
# It defines four functions:
#
# &composite_SD is a function to calculate the overall standard deviation of
# a collection of sample groups, given only the group standard deviations,
# means, and sample counts.
#
# &sample_SD is a simple function to calculate the standard deviation of a
# collection of samples.
#
# &sum and &avg (to calculate summation and simple average, respectively)
# are also defined.
#
# These functions are compatible both with 32-bit Perl 4.036, and with Perl 5.
if (!defined $debugmode) {
$debugmode=0; # set it >0 for debug prints
}
# Input is an array of samples; result is the summation.
sub sum {
local( $sum ) = 0;
foreach $i (@_) { # note that $i is implicitly local to the loop
$sum += $i;
}
return $sum;
}
# Input is an array of samples; result is the mean.
sub avg {
if (-1 == $#_) { die "ERR: \&avg( empty list )\n"; }
return &sum(@_) / (1+$#_)
}
# input is an array of samples; result is the standard deviation
sub sample_SD {
local( @samples ) = @_;
local( $mean ) = &avg( @samples );
local( $sum_of_squared_deviations ) = 0;
if ($#samples <= 0) {
return 0;
} else {
foreach $i (@samples) {
$sum_of_squared_deviations += (($i - $mean) * ($i - $mean));
}
return sqrt( $sum_of_squared_deviations / $#samples );
}
}
# Calculate combined standard deviation via ANOVA (ANalysis Of VAriance)
# See: http://www.burtonsys.com/climate/composite_standard_deviations.html
# Inputs are:
# $G, the number of groups
# @means, the array of $G group means
# @SDs, the array of $G group standard deviations
# $ncount or @ncounts -- number of samples in each group (can be scalar
# if all groups have same number of samples)
# Result is the overall standard deviation.
sub composite_SD {
local( $G ) = shift @_;
local( @means ) = splice( @_, 0, $G );
local( @SDs ) = splice( @_, 0, $G );
local( @ncounts );
local( $i );
if (0 == $#_) {
for ($i=($G-1); $i >= 0; $i--) {
$ncounts[$i] = $_[0];
}
} else {
@ncounts = @_;
}
if ( ($G != ($#means + 1))
|| ($G != ($#SDs + 1))
|| ($G != ($#ncounts + 1)) ) {
die "ERR: sub composite_SD called w/ wrong number of parameters [$G,$#means,$#SDs,$#ncounts]\n";
}
# Okay, we have the parameters, now.
# calculate total number of samples, N, and grand mean, GM
local($N) = &sum(@ncounts); # total number of samples
if ($N <= 1) {
print "Warning: only $N samples, SD is incalculable\n";
return -1;
}
local($GM) = 0;
for ($i=0; $i<$G; $i++) {
$GM += ($means[$i] * $ncounts[$i]);
}
$GM /= $N; # grand mean
# calculate Error Sum of Squares
local($ESS) = 0;
for ($i=0; $i<$G; $i++) {
$ESS += (($SDs[$i])**2) * ($ncounts[$i] - 1);
}
# calculate Total Group Sum of Squares
local($TGSS) = 0;
for ($i=0; $i<$G; $i++) {
$TGSS += (($means[$i]-$GM)**2) * $ncounts[$i];
}
# calculate standard deviation as square root of grand variance
local( $result ) = sqrt( ($ESS+$TGSS)/($N-1) );
if ($debugmode > 0) { printf "dbg composite_SD: N = $N, GM = %4.3f, ESS = %5.1f, TGSS = %5.1f, SD = %4.3f\n", $GM, $ESS, $TGSS, $result; }
return $result;
} #composite_SD
# return true value to 'require'
1;