001/* ===========================================================
002 * JFreeChart : a free chart library for the Java(tm) platform
003 * ===========================================================
004 *
005 * (C) Copyright 2000-2013, by Object Refinery Limited and Contributors.
006 *
007 * Project Info:  http://www.jfree.org/jfreechart/index.html
008 *
009 * This library is free software; you can redistribute it and/or modify it
010 * under the terms of the GNU Lesser General Public License as published by
011 * the Free Software Foundation; either version 2.1 of the License, or
012 * (at your option) any later version.
013 *
014 * This library is distributed in the hope that it will be useful, but
015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
017 * License for more details.
018 *
019 * You should have received a copy of the GNU Lesser General Public
020 * License along with this library; if not, write to the Free Software
021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
022 * USA.
023 *
024 * [Oracle and Java are registered trademarks of Oracle and/or its affiliates. 
025 * Other names may be trademarks of their respective owners.]
026 *
027 * ---------------
028 * Statistics.java
029 * ---------------
030 * (C) Copyright 2000-2013, by Matthew Wright and Contributors.
031 *
032 * Original Author:  Matthew Wright;
033 * Contributor(s):   David Gilbert (for Object Refinery Limited);
034 *
035 * Changes (from 08-Nov-2001)
036 * --------------------------
037 * 08-Nov-2001 : Added standard header and tidied Javadoc comments (DG);
038 *               Moved from JFreeChart to package com.jrefinery.data.* in
039 *               JCommon class library (DG);
040 * 24-Jun-2002 : Removed unnecessary local variable (DG);
041 * 07-Oct-2002 : Fixed errors reported by Checkstyle (DG);
042 * 26-May-2004 : Moved calculateMean() method from BoxAndWhiskerCalculator (DG);
043 * 02-Jun-2004 : Fixed bug in calculateMedian() method (DG);
044 * 11-Jan-2005 : Removed deprecated code in preparation for the 1.0.0
045 *               release (DG);
046 * 02-Jul-2013 : Use ParamChecks (DG);
047 *
048 */
049
050package org.jfree.data.statistics;
051
052import java.util.ArrayList;
053import java.util.Collection;
054import java.util.Collections;
055import java.util.Iterator;
056import java.util.List;
057import org.jfree.chart.util.ParamChecks;
058
059/**
060 * A utility class that provides some common statistical functions.
061 */
062public abstract class Statistics {
063
064    /**
065     * Returns the mean of an array of numbers.  This is equivalent to calling
066     * <code>calculateMean(values, true)</code>.
067     *
068     * @param values  the values (<code>null</code> not permitted).
069     *
070     * @return The mean.
071     */
072    public static double calculateMean(Number[] values) {
073        return calculateMean(values, true);
074    }
075
076    /**
077     * Returns the mean of an array of numbers.
078     *
079     * @param values  the values (<code>null</code> not permitted).
080     * @param includeNullAndNaN  a flag that controls whether or not
081     *     <code>null</code> and <code>Double.NaN</code> values are included
082     *     in the calculation (if either is present in the array, the result is
083     *     {@link Double#NaN}).
084     *
085     * @return The mean.
086     *
087     * @since 1.0.3
088     */
089    public static double calculateMean(Number[] values,
090            boolean includeNullAndNaN) {
091
092        ParamChecks.nullNotPermitted(values, "values");
093        double sum = 0.0;
094        double current;
095        int counter = 0;
096        for (int i = 0; i < values.length; i++) {
097            // treat nulls the same as NaNs
098            if (values[i] != null) {
099                current = values[i].doubleValue();
100            }
101            else {
102                current = Double.NaN;
103            }
104            // calculate the sum and count
105            if (includeNullAndNaN || !Double.isNaN(current)) {
106                sum = sum + current;
107                counter++;
108            }
109        }
110        double result = (sum / counter);
111        return result;
112    }
113
114    /**
115     * Returns the mean of a collection of <code>Number</code> objects.
116     *
117     * @param values  the values (<code>null</code> not permitted).
118     *
119     * @return The mean.
120     */
121    public static double calculateMean(Collection values) {
122        return calculateMean(values, true);
123    }
124
125    /**
126     * Returns the mean of a collection of <code>Number</code> objects.
127     *
128     * @param values  the values (<code>null</code> not permitted).
129     * @param includeNullAndNaN  a flag that controls whether or not
130     *     <code>null</code> and <code>Double.NaN</code> values are included
131     *     in the calculation (if either is present in the array, the result is
132     *     {@link Double#NaN}).
133     *
134     * @return The mean.
135     *
136     * @since 1.0.3
137     */
138    public static double calculateMean(Collection values,
139            boolean includeNullAndNaN) {
140
141        ParamChecks.nullNotPermitted(values, "values");
142        int count = 0;
143        double total = 0.0;
144        Iterator iterator = values.iterator();
145        while (iterator.hasNext()) {
146            Object object = iterator.next();
147            if (object == null) {
148                if (includeNullAndNaN) {
149                    return Double.NaN;
150                }
151            }
152            else {
153                if (object instanceof Number) {
154                    Number number = (Number) object;
155                    double value = number.doubleValue();
156                    if (Double.isNaN(value)) {
157                        if (includeNullAndNaN) {
158                            return Double.NaN;
159                        }
160                    }
161                    else {
162                        total = total + number.doubleValue();
163                        count = count + 1;
164                    }
165                }
166            }
167        }
168        return total / count;
169    }
170
171    /**
172     * Calculates the median for a list of values (<code>Number</code> objects).
173     * The list of values will be copied, and the copy sorted, before
174     * calculating the median.  To avoid this step (if your list of values
175     * is already sorted), use the {@link #calculateMedian(List, boolean)}
176     * method.
177     *
178     * @param values  the values (<code>null</code> permitted).
179     *
180     * @return The median.
181     */
182    public static double calculateMedian(List values) {
183        return calculateMedian(values, true);
184    }
185
186    /**
187     * Calculates the median for a list of values (<code>Number</code> objects).
188     * If <code>copyAndSort</code> is <code>false</code>, the list is assumed
189     * to be presorted in ascending order by value.
190     *
191     * @param values  the values (<code>null</code> permitted).
192     * @param copyAndSort  a flag that controls whether the list of values is
193     *                     copied and sorted.
194     *
195     * @return The median.
196     */
197    public static double calculateMedian(List values, boolean copyAndSort) {
198
199        double result = Double.NaN;
200        if (values != null) {
201            if (copyAndSort) {
202                int itemCount = values.size();
203                List copy = new ArrayList(itemCount);
204                for (int i = 0; i < itemCount; i++) {
205                    copy.add(i, values.get(i));
206                }
207                Collections.sort(copy);
208                values = copy;
209            }
210            int count = values.size();
211            if (count > 0) {
212                if (count % 2 == 1) {
213                    if (count > 1) {
214                        Number value = (Number) values.get((count - 1) / 2);
215                        result = value.doubleValue();
216                    }
217                    else {
218                        Number value = (Number) values.get(0);
219                        result = value.doubleValue();
220                    }
221                }
222                else {
223                    Number value1 = (Number) values.get(count / 2 - 1);
224                    Number value2 = (Number) values.get(count / 2);
225                    result = (value1.doubleValue() + value2.doubleValue())
226                             / 2.0;
227                }
228            }
229        }
230        return result;
231    }
232
233    /**
234     * Calculates the median for a sublist within a list of values
235     * (<code>Number</code> objects).
236     *
237     * @param values  the values, in any order (<code>null</code> not
238     *                permitted).
239     * @param start  the start index.
240     * @param end  the end index.
241     *
242     * @return The median.
243     */
244    public static double calculateMedian(List values, int start, int end) {
245        return calculateMedian(values, start, end, true);
246    }
247
248    /**
249     * Calculates the median for a sublist within a list of values
250     * (<code>Number</code> objects).  The entire list will be sorted if the
251     * <code>ascending</code< argument is <code>false</code>.
252     *
253     * @param values  the values (<code>null</code> not permitted).
254     * @param start  the start index.
255     * @param end  the end index.
256     * @param copyAndSort  a flag that that controls whether the list of values
257     *                     is copied and sorted.
258     *
259     * @return The median.
260     */
261    public static double calculateMedian(List values, int start, int end,
262                                         boolean copyAndSort) {
263
264        double result = Double.NaN;
265        if (copyAndSort) {
266            List working = new ArrayList(end - start + 1);
267            for (int i = start; i <= end; i++) {
268                working.add(values.get(i));
269            }
270            Collections.sort(working);
271            result = calculateMedian(working, false);
272        }
273        else {
274            int count = end - start + 1;
275            if (count > 0) {
276                if (count % 2 == 1) {
277                    if (count > 1) {
278                        Number value
279                            = (Number) values.get(start + (count - 1) / 2);
280                        result = value.doubleValue();
281                    }
282                    else {
283                        Number value = (Number) values.get(start);
284                        result = value.doubleValue();
285                    }
286                }
287                else {
288                    Number value1 = (Number) values.get(start + count / 2 - 1);
289                    Number value2 = (Number) values.get(start + count / 2);
290                    result
291                        = (value1.doubleValue() + value2.doubleValue()) / 2.0;
292                }
293            }
294        }
295        return result;
296
297    }
298
299    /**
300     * Returns the standard deviation of a set of numbers.
301     *
302     * @param data  the data (<code>null</code> or zero length array not
303     *     permitted).
304     *
305     * @return The standard deviation of a set of numbers.
306     */
307    public static double getStdDev(Number[] data) {
308        ParamChecks.nullNotPermitted(data, "data");
309        if (data.length == 0) {
310            throw new IllegalArgumentException("Zero length 'data' array.");
311        }
312        double avg = calculateMean(data);
313        double sum = 0.0;
314
315        for (int counter = 0; counter < data.length; counter++) {
316            double diff = data[counter].doubleValue() - avg;
317            sum = sum + diff * diff;
318        }
319        return Math.sqrt(sum / (data.length - 1));
320    }
321
322    /**
323     * Fits a straight line to a set of (x, y) data, returning the slope and
324     * intercept.
325     *
326     * @param xData  the x-data (<code>null</code> not permitted).
327     * @param yData  the y-data (<code>null</code> not permitted).
328     *
329     * @return A double array with the intercept in [0] and the slope in [1].
330     */
331    public static double[] getLinearFit(Number[] xData, Number[] yData) {
332
333        ParamChecks.nullNotPermitted(xData, "xData");
334        ParamChecks.nullNotPermitted(yData, "yData");
335        if (xData.length != yData.length) {
336            throw new IllegalArgumentException(
337                "Statistics.getLinearFit(): array lengths must be equal.");
338        }
339
340        double[] result = new double[2];
341        // slope
342        result[1] = getSlope(xData, yData);
343        // intercept
344        result[0] = calculateMean(yData) - result[1] * calculateMean(xData);
345
346        return result;
347
348    }
349
350    /**
351     * Finds the slope of a regression line using least squares.
352     *
353     * @param xData  the x-values (<code>null</code> not permitted).
354     * @param yData  the y-values (<code>null</code> not permitted).
355     *
356     * @return The slope.
357     */
358    public static double getSlope(Number[] xData, Number[] yData) {
359        ParamChecks.nullNotPermitted(xData, "xData");
360        ParamChecks.nullNotPermitted(yData, "yData");
361        if (xData.length != yData.length) {
362            throw new IllegalArgumentException("Array lengths must be equal.");
363        }
364
365        // ********* stat function for linear slope ********
366        // y = a + bx
367        // a = ybar - b * xbar
368        //     sum(x * y) - (sum (x) * sum(y)) / n
369        // b = ------------------------------------
370        //     sum (x^2) - (sum(x)^2 / n
371        // *************************************************
372
373        // sum of x, x^2, x * y, y
374        double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0;
375        int counter;
376        for (counter = 0; counter < xData.length; counter++) {
377            sx = sx + xData[counter].doubleValue();
378            sxx = sxx + Math.pow(xData[counter].doubleValue(), 2);
379            sxy = sxy + yData[counter].doubleValue()
380                      * xData[counter].doubleValue();
381            sy = sy + yData[counter].doubleValue();
382        }
383        return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter);
384
385    }
386
387    /**
388     * Calculates the correlation between two datasets.  Both arrays should
389     * contain the same number of items.  Null values are treated as zero.
390     * <P>
391     * Information about the correlation calculation was obtained from:
392     *
393     * http://trochim.human.cornell.edu/kb/statcorr.htm
394     *
395     * @param data1  the first dataset.
396     * @param data2  the second dataset.
397     *
398     * @return The correlation.
399     */
400    public static double getCorrelation(Number[] data1, Number[] data2) {
401        ParamChecks.nullNotPermitted(data1, "data1");
402        ParamChecks.nullNotPermitted(data2, "data2");
403        if (data1.length != data2.length) {
404            throw new IllegalArgumentException(
405                "'data1' and 'data2' arrays must have same length."
406            );
407        }
408        int n = data1.length;
409        double sumX = 0.0;
410        double sumY = 0.0;
411        double sumX2 = 0.0;
412        double sumY2 = 0.0;
413        double sumXY = 0.0;
414        for (int i = 0; i < n; i++) {
415            double x = 0.0;
416            if (data1[i] != null) {
417                x = data1[i].doubleValue();
418            }
419            double y = 0.0;
420            if (data2[i] != null) {
421                y = data2[i].doubleValue();
422            }
423            sumX = sumX + x;
424            sumY = sumY + y;
425            sumXY = sumXY + (x * y);
426            sumX2 = sumX2 + (x * x);
427            sumY2 = sumY2 + (y * y);
428        }
429        return (n * sumXY - sumX * sumY) / Math.pow((n * sumX2 - sumX * sumX)
430                * (n * sumY2 - sumY * sumY), 0.5);
431    }
432
433    /**
434     * Returns a data set for a moving average on the data set passed in.
435     *
436     * @param xData  an array of the x data.
437     * @param yData  an array of the y data.
438     * @param period  the number of data points to average
439     *
440     * @return A double[][] the length of the data set in the first dimension,
441     *         with two doubles for x and y in the second dimension
442     */
443    public static double[][] getMovingAverage(Number[] xData, Number[] yData,
444            int period) {
445
446        // check arguments...
447        if (xData.length != yData.length) {
448            throw new IllegalArgumentException("Array lengths must be equal.");
449        }
450
451        if (period > xData.length) {
452            throw new IllegalArgumentException(
453                "Period can't be longer than dataset.");
454        }
455
456        double[][] result = new double[xData.length - period][2];
457        for (int i = 0; i < result.length; i++) {
458            result[i][0] = xData[i + period].doubleValue();
459            // holds the moving average sum
460            double sum = 0.0;
461            for (int j = 0; j < period; j++) {
462                sum += yData[i + j].doubleValue();
463            }
464            sum = sum / period;
465            result[i][1] = sum;
466        }
467        return result;
468
469    }
470
471}