import { mean, deviation } from 'd3';

/**
 * The [correlation](http://en.wikipedia.org/wiki/Correlation_and_dependence) is
 * a measure of how correlated two datasets are, between -1 and 1
 *
 * @param {Array<number>} x first input
 * @param {Array<number>} y second input
 * @returns {number} sample correlation
 * @example
 * excelCorrelation([1, 2, 3, 4, 5, 6], [2, 2, 3, 4, 5, 60]).toFixed(2); // => '0.69'
 */
function excelCorrelation(x: number[], y: number[]) {
    const cov = excelCovariance(x, y);
    const xstd = deviation(x);
    const ystd = deviation(y);
    // if any one of the sets has 0 deviation NaN will be returned
    return cov / xstd! / ystd!;
}

export default excelCorrelation;

/**
 * The [covariance](http://https://en.wikipedia.org/wiki/Covariance) of two datasets:
 * how much do the two datasets move together?
 * x and y are two datasets, represented as arrays of numbers.
 *
 * @param {Array<number>} x a sample of two or more data points
 * @param {Array<number>} y a sample of two or more data points
 * @throws {Error} if x and y do not have equal lengths
 * @throws {Error} if x or y have length of one or less
 * @returns {number} sample covariance
 * @example
 * excelCovariance([1, 2, 3, 4, 5, 6], [6, 5, 4, 3, 2, 1]); // => -3.5
 */
export function excelCovariance(x: number[], y: number[]) {
    // The two datasets must have the same length which must be more than 1
    if (x.length !== y.length)
        throw new Error('excelCovariance requires samples with equal lengths');

    if (x.length < 2)
        throw new Error('excelCovariance requires at least two data points in each sample');

    // determine the mean of each dataset so that we can judge each
    // value of the dataset fairly as the difference from the mean. this
    // way, if one dataset is [1, 2, 3] and [2, 3, 4], their covariance
    // does not suffer because of the difference in absolute values
    const xmean = mean(x);
    const ymean = mean(y);
    let sum = 0;

    // for each pair of values, the covariance increases when their
    // difference from the mean is associated - if both are well above
    // or if both are well below the mean,
    // the covariance increases significantly.
    const cachedLength = x.length;
    for (let i = 0; i < cachedLength; i++) {
        sum += (x[i] - xmean!) * (y[i] - ymean!);
    }

    // this is Bessels' Correction: an adjustment made to sample statistics
    // that allows for the reduced degree of freedom entailed in calculating
    // values from samples rather than complete populations
    const besselsCorrection = x.length - 1;

    // the covariance is weighted by the length of the datasets.
    return sum / besselsCorrection;
}
