5 * Compute running mean, variance, and extrema of a stream of numbers.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
23 * @author Ori Livneh <ori@wikimedia.org>
26 namespace RunningStat;
28 // Needed due to PHP non-bug <https://bugs.php.net/bug.php?id=49828>.
29 define( 'RUNNINGSTAT_NEGATIVE_INF', -INF );
32 * Represents a running summary of a stream of numbers.
34 * RunningStat instances are accumulator-like objects that provide a set of
35 * continuously-updated summary statistics for a stream of numbers, without
36 * requiring that each value be stored. The measures it provides are the
37 * arithmetic mean, variance, standard deviation, and extrema (min and max);
38 * together they describe the central tendency and statistical dispersion of a
41 * One RunningStat instance can be merged into another; the resultant
42 * RunningStat has the state it would have had if it had accumulated each
43 * individual point. This allows data to be summarized in parallel and in
44 * stages without loss of fidelity.
46 * Based on a C++ implementation by John D. Cook:
47 * <http://www.johndcook.com/standard_deviation.html>
48 * <http://www.johndcook.com/skewness_kurtosis.html>
50 * The in-line documentation for this class incorporates content from the
51 * English Wikipedia articles "Variance", "Algorithms for calculating
52 * variance", and "Standard deviation".
56 /** @var int Number of samples. **/
59 /** @var float The first moment (or mean, or expected value). **/
62 /** @var float The second central moment (or variance). **/
65 /** @var float The least value in the set. **/
68 /** @var float The greatest value in the set. **/
69 public $max = RUNNINGSTAT_NEGATIVE_INF;
72 * Count the number of accumulated values.
75 public function getCount() {
80 * Add a number to the data set.
81 * @param int|float $x Value to add
83 public function addObservation( $x ) {
86 $this->min = min( $this->min, $x );
87 $this->max = max( $this->max, $x );
91 $delta = $x - $this->m1;
92 $delta_n = $delta / $this->n;
93 $this->m1 += $delta_n;
94 $this->m2 += $delta * $delta_n * $n1;
98 * Get the mean, or expected value.
100 * The arithmetic mean is the sum of all measurements divided by the number
101 * of observations in the data set.
105 public function getMean() {
110 * Get the estimated variance.
112 * Variance measures how far a set of numbers is spread out. A small
113 * variance indicates that the data points tend to be very close to the
114 * mean (and hence to each other), while a high variance indicates that the
115 * data points are very spread out from the mean and from each other.
117 * @return float Estimated variance
119 public function getVariance() {
120 if ( $this->n === 0 ) {
121 // The variance of the empty set is undefined.
123 } elseif ( $this->n === 1 ) {
126 return $this->m2 / ( $this->n - 1.0 );
131 * Get the estimated standard deviation.
133 * The standard deviation of a statistical population is the square root of
134 * its variance. It shows how much variation from the mean exists. In
135 * addition to expressing the variability of a population, the standard
136 * deviation is commonly used to measure confidence in statistical conclusions.
138 * @return float Estimated standard deviation
140 public function getStdDev() {
141 return sqrt( $this->getVariance() );
145 * Merge another RunningStat instance into this instance.
147 * This instance then has the state it would have had if all the data had
148 * been accumulated by it alone.
150 * @param RunningStat RunningStat instance to merge into this one
152 public function merge( RunningStat $other ) {
153 // If the other RunningStat is empty, there's nothing to do.
154 if ( $other->n === 0 ) {
158 // If this RunningStat is empty, copy values from other RunningStat.
159 if ( $this->n === 0 ) {
160 $this->n = $other->n;
161 $this->m1 = $other->m1;
162 $this->m2 = $other->m2;
163 $this->min = $other->min;
164 $this->max = $other->max;
168 $n = $this->n + $other->n;
169 $delta = $other->m1 - $this->m1;
170 $delta2 = $delta * $delta;
172 $this->m1 = ( ( $this->n * $this->m1 ) + ( $other->n * $other->m1 ) ) / $n;
173 $this->m2 = $this->m2 + $other->m2 + ( $delta2 * $this->n * $other->n / $n );
174 $this->min = min( $this->min, $other->min );
175 $this->max = max( $this->max, $other->max );