Composite Standard Deviation source code

Contents:

  1. Perl
  2. Python
  3. PHP
  4. For MATLAB see Sebastian W's implementation.

 

Perl:   composite_sd.pl

#!/usr/bin/perl

# Copyright 2010, by David A. Burton
# Cary, NC  USA
# +1 919-481-0149
# Email: http://www.burtonsys.com/email/
# Permission is hereby granted to use this function for any purpose,
# with or without modification, provided only that this copyright &
# permission notice is retained.

# You can use a "require" to pull this into another Perl program, like this:
#
#   @INC = ('.','..');
#   require "composite_sd.pl";
#
# It defines four functions:
#
# &composite_SD is a function to calculate the overall standard deviation of
# a collection of sample groups, given only the group standard deviations,
# means, and sample counts.
#
# &sample_SD is a simple function to calculate the standard deviation of a
# collection of samples.
#
# &sum and &avg (to calculate summation and simple average, respectively)
# are also defined.
#
# These functions are compatible both with 32-bit Perl 4.036, and with Perl 5.



if (!defined $debugmode) {
   $debugmode=0;  # set it >0 for debug prints
}


# Input is an array of samples; result is the summation.
sub sum {
   local( $sum ) = 0;
   foreach $i (@_) {  # note that $i is implicitly local to the loop
      $sum += $i;
   }
   return $sum;
}


# Input is an array of samples; result is the mean.
sub avg {
   if (-1 == $#_) { die "ERR: \&avg( empty list )\n"; }
   return &sum(@_) / (1+$#_)
}


# input is an array of samples; result is the standard deviation
sub sample_SD {
   local( @samples ) = @_;
   local( $mean ) = &avg( @samples );
   local( $sum_of_squared_deviations ) = 0;
   if ($#samples <= 0) {
      return 0;
   } else {
      foreach $i (@samples) {
         $sum_of_squared_deviations += (($i - $mean) * ($i - $mean));
      }
      return sqrt( $sum_of_squared_deviations / $#samples );
   }
}


# Calculate combined standard deviation via ANOVA (ANalysis Of VAriance)
# See:  http://www.burtonsys.com/climate/composite_standard_deviations.html
# Inputs are:
#   $G, the number of groups
#   @means, the array of $G group means
#   @SDs, the array of $G group standard deviations
#   $ncount or @ncounts -- number of samples in each group (can be scalar
#                          if all groups have same number of samples)
# Result is the overall standard deviation.
sub composite_SD {
   local( $G ) = shift @_;
   local( @means ) = splice( @_, 0, $G );
   local( @SDs ) = splice( @_, 0, $G );
   local( @ncounts );
   local( $i );
   if (0 == $#_) {
      for ($i=($G-1); $i >= 0; $i--) {
         $ncounts[$i] = $_[0];
      }
   } else {
      @ncounts = @_;
   }
   if (   ($G != ($#means + 1))
       || ($G != ($#SDs + 1))
       || ($G != ($#ncounts + 1)) ) {
      die "ERR: sub composite_SD called w/ wrong number of parameters [$G,$#means,$#SDs,$#ncounts]\n";
   }
   # Okay, we have the parameters, now.

   # calculate total number of samples, N, and grand mean, GM
   local($N) = &sum(@ncounts);  # total number of samples
   if ($N <= 1) {
      print "Warning: only $N samples, SD is incalculable\n";
      return -1;
   }
   local($GM) = 0;
   for ($i=0; $i<$G; $i++) {
      $GM += ($means[$i] * $ncounts[$i]);
   }
   $GM /= $N; # grand mean

   # calculate Error Sum of Squares
   local($ESS) = 0;
   for ($i=0; $i<$G; $i++) {
      $ESS += (($SDs[$i])**2) * ($ncounts[$i] - 1);
   }

   # calculate Total Group Sum of Squares
   local($TGSS) = 0;
   for ($i=0; $i<$G; $i++) {
      $TGSS += (($means[$i]-$GM)**2) * $ncounts[$i];
   }

   # calculate standard deviation as square root of grand variance
   local( $result ) = sqrt( ($ESS+$TGSS)/($N-1) );
   if ($debugmode > 0) { printf "dbg composite_SD: N = $N, GM = %4.3f, ESS = %5.1f, TGSS = %5.1f, SD = %4.3f\n", $GM, $ESS, $TGSS, $result; }
   return $result;
} #composite_SD


# return true value to 'require'
1;

 


Python:   composite_sd.py

#!/usr/bin/python3

'''
composite_sd.py defines three functions:

  composite_SD is a function to calculate the overall standard deviation of
    a collection of sample groups, given only the group standard deviations,
    means, and sample counts.

  sample_SD is a simple function to calculate the standard deviation of a
    collection of samples.

  avg (to calculate the simple average) is also defined.

These functions are compatible with Python 2.6, Python 2.7, and Python 3.*.

Copyright 2010, 2013, by David A. Burton
Cary, NC  USA
+1 919-481-0149
Email: http://www.burtonsys.com/email/
Permission is hereby granted to use this function for any purpose,
with or without modification, provided only that this copyright &
permission notice is retained.

Note: if what you really want is a "running" calculation of variance and/or
standard deviation, see this article about the Welford Method:
http://www.johndcook.com/standard_deviation.html
(That's from the American statistician John Cook, not the Australian
climate activist John Cook.)
'''

from __future__ import print_function, division  # requires python 2.6 or later (2.7 or later preferred)
import math

__all__ = ['iterable', 'avg', 'sample_SD', 'composite_SD']


def iterable(obj):
    '''True iff obj is iterable: a list, tuple, or string.'''
    return hasattr(obj, '__contains__')


def avg(samples):
    if len(samples) >= 1:
        return sum(samples) / len(samples)
    return float('nan')


def sample_SD(samples):
    '''input is an array of samples; result is the standard deviation'''
    mean = avg(samples)
    sum_of_squared_deviations = 0;
    sd = 0
    if len(samples) >= 2:
        for datum in samples:
            sum_of_squared_deviations += ((datum - mean) * (datum - mean));
        sd = math.sqrt(sum_of_squared_deviations / (len(samples)-1) );
    return sd


def composite_SD(means, SDs, ncounts):
    '''Calculate combined standard deviation via ANOVA (ANalysis Of VAriance)
       See:  http://www.burtonsys.com/climate/composite_standard_deviations.html
       Inputs are:
         means, the array of group means
         SDs, the array of group standard deviations
         ncounts, number of samples in each group (can be scalar
                  if all groups have same number of samples)
       Result is the overall standard deviation.
    '''
    G = len(means)  # number of groups
    if G != len(SDs):
        raise Exception('inconsistent list lengths')
    if not iterable(ncounts):
        ncounts = [ncounts] * G  # convert scalar ncounts to array
    elif G != len(ncounts):
        raise Exception('wrong ncounts list length')

    # calculate total number of samples, N, and grand mean, GM
    N = sum(ncounts)  # total number of samples
    if N <= 1:
        raise Exception("Warning: only " + str(N) + " samples, SD is incalculable")
    GM = 0.0
    for i in range(G):
        GM += means[i] * ncounts[i]
    GM /= N  # grand mean

    # calculate Error Sum of Squares
    ESS = 0.0
    for i in range(G):
        ESS += ((SDs[i])**2) * (ncounts[i] - 1)

    # calculate Total Group Sum of Squares
    TGSS = 0.0
    for i in range(G):
        TGSS += ((means[i]-GM)**2) * ncounts[i]

    # calculate standard deviation as square root of grand variance
    result = math.sqrt((ESS+TGSS)/(N-1))
    return result


# samples = range(10)
# print('avg=', avg(samples))
# sd = sample_SD(samples)
# print('sd=', sd)
# pt1 = [0,1,2]
# pt2 = [3,4]
# pt3 = [5,6,7,8,9]
# means = [avg(pt1), avg(pt2), avg(pt3)]
# SDs = [sample_SD(pt1), sample_SD(pt2), sample_SD(pt3)]
# ncounts = [len(pt1), len(pt2), len(pt3)]
# sd2 = composite_SD(means, SDs, ncounts)
# print('sd2=', sd2)
#
# samples = range(9)
# print('avg=', avg(samples))
# sd = sample_SD(samples)
# print('sd=', sd)
# pt1 = [0,1,2]
# pt2 = [3,4,5]
# pt3 = [6,7,8]
# means = [avg(pt1), avg(pt2), avg(pt3)]
# SDs = [sample_SD(pt1), sample_SD(pt2), sample_SD(pt3)]
# ncounts = 3
# sd2 = composite_SD(means, SDs, ncounts)
# print('sd2=', sd2)

 


PHP:   composite_sd.php.txt

Date: Mon, Jun 22, 2015 at 1:05 PM
Subject: Composite standard deviations - PHP Code
 
Hi Dave,
 
I converted the python code which is attached into PHP code. I tested and it gives a good composite SD number. You are free to copy or do as you wish as you provided the original python source.
 
Here is the function which I have added as part of a statistics class.
 
Cheers, Eric
 
Eric Jennings, Sr. Systems Analyst
Hampton Affiliates
//---------------------------------------------------------------------------------------------
// This function returns a composite standard deviation.
// Calculate combined standard deviation via ANOVA (ANalysis Of VAriance)
//   See:  http://www.burtonsys.com/climate/composite_standard_deviations.html
//   Inputs are:
//     Means, the array of group means
//     SDs, the array of group standard deviations
//     nCounts, number of samples in each group (can be scalar
//              if all groups have same number of samples)
// Return: Result is the overall standard deviation.
//
// Copyright 2010, 2013, by David A. Burton; Copyright 2015, by Eric Jennings.
// Contact by email: http://www.burtonsys.com/email/
// Permission is hereby granted to use this function for any purpose,
// with or without modification, provided only that this copyright and
// permission notice is retained.
//
// Note: if what you really want is a "running" calculation of variance and/or
// standard deviation, see this article about the Welford Method:
// http://www.johndcook.com/standard_deviation.html
//
public function composite_SD( $Means, $SDs, $nCounts ){

    // number of groups
    $G = count( $Means );
    if( $G != count($SDs)){
        $this->Msg .= 'inconsistent list lengths';
        return 0;
    }
    if( $G != count($nCounts)){
        $this->Msg .= 'wrong nCounts list length';
        return 0;
    }

    // calculate total number of samples, N, and grand mean, GM
    $N = array_sum($nCounts);  // total number of samples
    if( $N <= 1 ){
        $this->Msg .= "Warning: only $N samples, SD is incalculable";
    }
    $GM = 0.0;
    for( $i = 0; $i < $G; $i++ ){
        $GM += $Means[$i] * $nCounts[$i];
    }
    $GM /= $N;  // grand mean

    // calculate Error Sum of Squares
    $ESS = 0.0;
    for( $i = 0; $i < $G; $i++){
        $ESS += (pow($SDs[$i], 2)) * ($nCounts[$i] - 1);
    }

    // calculate Total Group Sum of Squares
    $TGSS = 0.0;
    for( $i = 0; $i < $G; $i++ ){
        $TGSS += (pow($Means[$i]-$GM, 2)) * $nCounts[$i];
    }

    // calculate standard deviation as square root of grand variance
    $result = sqrt(($ESS+$TGSS)/($N-1));
    return $result;
}