Module: Numo::GSL::Stats
- Defined in:
- ext/numo/gsl/stats/gsl_stats.c
Class Method Summary collapse
-
.absdev(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the absolute deviation from the mean of data, a dataset of length n with stride stride.
-
.absdev_m(data[], mean, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the absolute deviation of the dataset data relative to the given value of mean,.
-
.correlation(data1[], data2[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function efficiently computes the Pearson correlation coefficient between the datasets data1 and data2 which must both be of the same length n.
-
.covariance(data1[], data2[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the covariance of the datasets data1 and data2 which must both be of the same length n.
-
.covariance_m(data1[], data2[], mean1, mean2, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the covariance of the datasets data1 and data2 using the given values of the means, mean1 and mean2.
-
.kurtosis(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the kurtosis of data, a dataset of length n with stride stride.
-
.kurtosis_m_sd(data[], mean, sd, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the kurtosis of the dataset data using the given values of the mean mean and standard deviation sd,.
-
.lag1_autocorrelation(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the lag-1 autocorrelation of the dataset data.
-
.lag1_autocorrelation_m(data[], mean, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the lag-1 autocorrelation of the dataset data using the given value of the mean mean.
-
.max(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the maximum value in data, a dataset of length n with stride stride.
-
.max_index(*args) ⇒ Object
This function returns the index of the maximum value in data, a dataset of length n with stride stride.
-
.mean(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the arithmetic mean of data, a dataset of length n with stride stride.
-
.median_from_sorted_data(sorted_data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the median value of sorted_data, a dataset of length n with stride stride.
-
.min(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the minimum value in data, a dataset of length n with stride stride.
-
.min_index(*args) ⇒ Object
This function returns the index of the minimum value in data, a dataset of length n with stride stride.
-
.minmax(data[], axis: nil, keepdims: falsek) ⇒ [Numo::DFloat, Numo::DFloat]
This function finds both the minimum and maximum values min, max in data in a single pass.
-
.minmax_index(*args) ⇒ Object
This function returns the indexes min_index, max_index of the minimum and maximum values in data in a single pass.
-
.quantile_from_sorted_data(sorted_data[], f, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns a quantile value of sorted_data, a double-precision array of length n with stride stride.
-
.sd(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
The standard deviation is defined as the square root of the variance.
-
.sd_m(data[], mean, axis: nil, keepdims: false) ⇒ Numo::DFloat
The standard deviation is defined as the square root of the variance.
-
.sd_with_fixed_mean(data[], mean, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function calculates the standard deviation of data for a fixed population mean mean.
-
.skew(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the skewness of data, a dataset of length n with stride stride.
-
.skew_m_sd(data[], mean, sd, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the skewness of the dataset data using the given values of the mean mean and standard deviation sd,.
-
.spearman(data1[], data2[], work[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the Spearman rank correlation coefficient between the datasets data1 and data2 which must both be of the same length n.
-
.tss(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
These functions return the total sum of squares (TSS) of data about the mean.
-
.tss_m(data[], mean, axis: nil, keepdims: false) ⇒ Numo::DFloat
These functions return the total sum of squares (TSS) of data about the mean.
-
.variance(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the estimated, or sample, variance of data, a dataset of length n with stride stride.
-
.variance_m(data[], mean, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the sample variance of data relative to the given value of mean.
-
.variance_with_fixed_mean(data[], mean, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes an unbiased estimate of the variance of data when the population mean mean of the underlying distribution is known a priori.
-
.wabsdev(w[], data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the weighted absolute deviation from the weighted mean of data.
-
.wabsdev_m(w[], data[], wmean, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the absolute deviation of the weighted dataset data about the given weighted mean wmean.
-
.wkurtosis(w[], data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the weighted kurtosis of the dataset data.
-
.wkurtosis_m_sd(w[], data[], wmean, wsd, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the weighted kurtosis of the dataset data using the given values of the weighted mean and weighted standard deviation, wmean and wsd.
-
.wmean(w[], data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the weighted mean of the dataset data with stride stride and length n, using the set of weights w with stride wstride and length n.
-
.wsd(w[], data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
The standard deviation is defined as the square root of the variance.
-
.wsd_m(w[], data[], wmean, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the square root of the corresponding variance function gsl_stats_wvariance_m above.
-
.wsd_with_fixed_mean(w[], data[], mean, axis: nil, keepdims: false) ⇒ Numo::DFloat
The standard deviation is defined as the square root of the variance.
-
.wskew(w[], data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the weighted skewness of the dataset data.
-
.wskew_m_sd(w[], data[], wmean, wsd, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the weighted skewness of the dataset data using the given values of the weighted mean and weighted standard deviation, wmean and wsd.
-
.wtss(w[], data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
These functions return the weighted total sum of squares (TSS) of data about the weighted mean.
-
.wtss_m(w[], data[], wmean, axis: nil, keepdims: false) ⇒ Numo::DFloat
These functions return the weighted total sum of squares (TSS) of data about the weighted mean.
-
.wvariance(w[], data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the estimated variance of the dataset data with stride stride and length n, using the set of weights w with stride wstride and length n.
-
.wvariance_m(w[], data[], wmean, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the estimated variance of the weighted dataset data using the given weighted mean wmean.
-
.wvariance_with_fixed_mean(w[], data[], mean, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes an unbiased estimate of the variance of the weighted dataset data when the population mean mean of the underlying distribution is known a priori.
Class Method Details
.absdev(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the absolute deviation from the mean of data, a dataset of length n with stride stride. The absolute deviation from the mean is defined as,
absdev = (1/N) \sum | x_i - \Hat\mu |
where x_i are the elements of the dataset data. The absolute deviation from the mean provides a more robust measure of the width of a distribution than the variance. This function computes the mean of data via a call to gsl_stats_mean.
505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 505
static VALUE
stats_s_absdev(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_absdev, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<1) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=1)",argc);
}
reduce = nary_reduce_dimension(argc-1, argv+1, 1, argv, &ndf, 0);
return na_ndloop(&ndf, 2, *argv, reduce);
}
|
.absdev_m(data[], mean, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the absolute deviation of the dataset data relative to the given value of mean,
absdev = (1/N) \sum | x_i - mean |
This function is useful if you have already computed the mean of data (and want to avoid recomputing it), or wish to calculate the absolute deviation relative to another value (such as zero, or the median).
555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 555
static VALUE
stats_s_absdev_m(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[1];
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_absdev_m, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<2) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=2)",argc);
}
opt[0] = NUM2DBL(argv[1]);
reduce = nary_reduce_dimension(argc-2, argv+2, 1, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 2, argv[0], reduce);
}
|
.correlation(data1[], data2[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function efficiently computes the Pearson correlation coefficient between the datasets data1 and data2 which must both be of the same length n. r = cov(x, y) / (\Hat\sigma_x \Hat\sigma_y) = [1/(n-1) \sum (x_i - \Hat x) (y_i - \Hat y) \over \sqrt[1/(n-1) \sum (x_i - \Hat x)^2] \sqrt[1/(n-1) \sum (y_i - \Hat y)^2] ]
1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1009
static VALUE
stats_s_correlation(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_correlation, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<2) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=2)",argc);
}
reduce = nary_reduce_dimension(argc-2, argv+2, 2, argv, &ndf, 0);
return na_ndloop(&ndf, 3, argv[0], argv[1], reduce);
}
|
.covariance(data1[], data2[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the covariance of the datasets data1 and data2 which must both be of the same length n.
covar = (1/(n - 1)) \sum_[i = 1]^[n] (x_i - \Hat x) (y_i - \Hat y)
904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 904
static VALUE
stats_s_covariance(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_covariance, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<2) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=2)",argc);
}
reduce = nary_reduce_dimension(argc-2, argv+2, 2, argv, &ndf, 0);
return na_ndloop(&ndf, 3, argv[0], argv[1], reduce);
}
|
.covariance_m(data1[], data2[], mean1, mean2, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the covariance of the datasets data1 and data2 using the given values of the means, mean1 and mean2. This is useful if you have already computed the means of data1 and data2 and want to avoid recomputing them.
954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 954
static VALUE
stats_s_covariance_m(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[2];
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_covariance_m, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<4) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=4)",argc);
}
opt[0] = NUM2DBL(argv[2]);
opt[1] = NUM2DBL(argv[3]);
reduce = nary_reduce_dimension(argc-4, argv+4, 2, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 3, argv[0], argv[1], reduce);
}
|
.kurtosis(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the kurtosis of data, a dataset of length n with stride stride. The kurtosis is defined as,
kurtosis = ((1/N) \sum ((x_i - \Hat\mu)/\Hat\sigma)^4) - 3
The kurtosis measures how sharply peaked a distribution is, relative to its width. The kurtosis is normalized to zero for a Gaussian distribution.
711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 711
static VALUE
stats_s_kurtosis(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_kurtosis, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<1) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=1)",argc);
}
reduce = nary_reduce_dimension(argc-1, argv+1, 1, argv, &ndf, 0);
return na_ndloop(&ndf, 2, *argv, reduce);
}
|
.kurtosis_m_sd(data[], mean, sd, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the kurtosis of the dataset data using the given values of the mean mean and standard deviation sd,
kurtosis = ((1/N) \sum ((x_i - mean)/sd)^4) - 3
This function is useful if you have already computed the mean and standard deviation of data and want to avoid recomputing them.
760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 760
static VALUE
stats_s_kurtosis_m_sd(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[2];
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_kurtosis_m_sd, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<3) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=3)",argc);
}
opt[0] = NUM2DBL(argv[1]);
opt[1] = NUM2DBL(argv[2]);
reduce = nary_reduce_dimension(argc-3, argv+3, 1, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 2, argv[0], reduce);
}
|
.lag1_autocorrelation(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the lag-1 autocorrelation of the dataset data.
a_1 = [\sum_[i = 2]^[n] (x_[i] - \Hat\mu) (x_[i-1] - \Hat\mu) \over \sum_[i = 1]^[n] (x_[i] - \Hat\mu) (x_[i] - \Hat\mu)]
810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 810
static VALUE
stats_s_lag1_autocorrelation(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_lag1_autocorrelation, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<1) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=1)",argc);
}
reduce = nary_reduce_dimension(argc-1, argv+1, 1, argv, &ndf, 0);
return na_ndloop(&ndf, 2, *argv, reduce);
}
|
.lag1_autocorrelation_m(data[], mean, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the lag-1 autocorrelation of the dataset data using the given value of the mean mean.
854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 854
static VALUE
stats_s_lag1_autocorrelation_m(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[1];
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_lag1_autocorrelation_m, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<2) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=2)",argc);
}
opt[0] = NUM2DBL(argv[1]);
reduce = nary_reduce_dimension(argc-2, argv+2, 1, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 2, argv[0], reduce);
}
|
.max(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the maximum value in data, a dataset of length n with stride stride. The maximum value is defined as the value of the element x_i which satisfies $x_i \ge x_j$ x_i >= x_j for all j.
If you want instead to find the element with the largest absolute magnitude you will need to apply fabs or abs to your data before calling this function.
1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1868
static VALUE
stats_s_max(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_max, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<1) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=1)",argc);
}
reduce = nary_reduce_dimension(argc-1, argv+1, 1, argv, &ndf, 0);
return na_ndloop(&ndf, 2, *argv, reduce);
}
|
.max_index(*args) ⇒ Object
This function returns the index of the maximum value in data, a dataset of length n with stride stride. The maximum value is defined as the value of the element x_i which satisfies $x_i \ge x_j$ x_i >= x_j for all j. When there are several equal maximum elements then the first one is chosen. @overload max_index() => Integer @overload max_index(axis:nil, keepdims:false) => Integer or Numo::Int32/64
@param [Numo::DFloat] data[] @return [Numo::UInt64] return @param [Numeric,Array,Range] axis (keyword) Axes along which the operation is performed. @param [TrueClass] keepdims (keyword) If true, the reduced axes are left in th
2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 2027
static VALUE
stats_s_max_index(int argc, VALUE *argv, VALUE mod)
{
narray_t *na;
VALUE idx, reduce;
ndfunc_arg_in_t ain[3] = {{cDF,0},{Qnil,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{0,0,0}};
ndfunc_t ndf = {0, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT, 3,1, ain,aout};
if (argc<1) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=1)",argc);
}
GetNArray(argv[0],na);
if (na->ndim==0) {
return INT2FIX(0);
}
if (na->size > (~(u_int32_t)0)) {
aout[0].type = numo_cInt64;
idx = rb_narray_new(numo_cInt64, na->ndim, na->shape);
ndf.func = iter_stats_s_max_index_index64;
} else {
aout[0].type = numo_cInt32;
idx = rb_narray_new(numo_cInt32, na->ndim, na->shape);
ndf.func = iter_stats_s_max_index_index32;
}
rb_funcall(idx, rb_intern("seq"), 0);
reduce = nary_reduce_dimension(argc-1, argv+1, 1, argv, &ndf, 0);
return na_ndloop(&ndf, 3, argv[0], idx, reduce);
}
|
.mean(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the arithmetic mean of data, a dataset of length n with stride stride. The arithmetic mean, or sample mean, is denoted by \Hat\mu and defined as,
\Hat\mu = (1/N) \sum x_i
where x_i are the elements of the dataset data. For samples drawn from a gaussian distribution the variance of \Hat\mu is \sigma^2 / N.
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 62
static VALUE
stats_s_mean(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_mean, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<1) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=1)",argc);
}
reduce = nary_reduce_dimension(argc-1, argv+1, 1, argv, &ndf, 0);
return na_ndloop(&ndf, 2, *argv, reduce);
}
|
.median_from_sorted_data(sorted_data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the median value of sorted_data, a dataset of length n with stride stride. The elements of the array must be in ascending numerical order. There are no checks to see whether the data are sorted, so the function gsl_sort should always be used first.
When the dataset has an odd number of elements the median is the value of element (n-1)/2. When the dataset has an even number of elements the median is the mean of the two nearest middle values, elements (n-1)/2 and n/2. Since the algorithm for computing the median involves interpolation this function always returns a floating-point number, even for integer data types.
2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 2272
static VALUE
stats_s_median_from_sorted_data(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_median_from_sorted_data, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<1) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=1)",argc);
}
reduce = nary_reduce_dimension(argc-1, argv+1, 1, argv, &ndf, 0);
return na_ndloop(&ndf, 2, *argv, reduce);
}
|
.min(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the minimum value in data, a dataset of length n with stride stride. The minimum value is defined as the value of the element x_i which satisfies $x_i \le x_j$ x_i <= x_j for all j.
If you want instead to find the element with the smallest absolute magnitude you will need to apply fabs or abs to your data before calling this function.
1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1916
static VALUE
stats_s_min(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_min, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<1) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=1)",argc);
}
reduce = nary_reduce_dimension(argc-1, argv+1, 1, argv, &ndf, 0);
return na_ndloop(&ndf, 2, *argv, reduce);
}
|
.min_index(*args) ⇒ Object
This function returns the index of the minimum value in data, a dataset of length n with stride stride. The minimum value is defined as the value of the element x_i which satisfies $x_i \ge x_j$ x_i >= x_j for all j. When there are several equal minimum elements then the first one is chosen. @overload min_index() => Integer @overload min_index(axis:nil, keepdims:false) => Integer or Numo::Int32/64
@param [Numo::DFloat] data[] @return [Numo::UInt64] return @param [Numeric,Array,Range] axis (keyword) Axes along which the operation is performed. @param [TrueClass] keepdims (keyword) If true, the reduced axes are left in th
2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 2112
static VALUE
stats_s_min_index(int argc, VALUE *argv, VALUE mod)
{
narray_t *na;
VALUE idx, reduce;
ndfunc_arg_in_t ain[3] = {{cDF,0},{Qnil,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{0,0,0}};
ndfunc_t ndf = {0, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT, 3,1, ain,aout};
if (argc<1) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=1)",argc);
}
GetNArray(argv[0],na);
if (na->ndim==0) {
return INT2FIX(0);
}
if (na->size > (~(u_int32_t)0)) {
aout[0].type = numo_cInt64;
idx = rb_narray_new(numo_cInt64, na->ndim, na->shape);
ndf.func = iter_stats_s_min_index_index64;
} else {
aout[0].type = numo_cInt32;
idx = rb_narray_new(numo_cInt32, na->ndim, na->shape);
ndf.func = iter_stats_s_min_index_index32;
}
rb_funcall(idx, rb_intern("seq"), 0);
reduce = nary_reduce_dimension(argc-1, argv+1, 1, argv, &ndf, 0);
return na_ndloop(&ndf, 3, argv[0], idx, reduce);
}
|
.minmax(data[], axis: nil, keepdims: falsek) ⇒ [Numo::DFloat, Numo::DFloat]
This function finds both the minimum and maximum values min, max in data in a single pass.
1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1958
static VALUE
stats_s_minmax(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[2] = {{cDF,0},{cDF,0}};
ndfunc_t ndf = { iter_stats_s_minmax, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 2, ain, aout };
if (argc<1) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=1)",argc);
}
reduce = nary_reduce_dimension(argc-1, argv+1, 1, argv, &ndf, 0);
return na_ndloop(&ndf, 2, argv[0], reduce);
}
|
.minmax_index(*args) ⇒ Object
This function returns the indexes min_index, max_index of the minimum and maximum values in data in a single pass. @overload minmax_index() => [Integer, Integer] @overload minmax_index(axis:nil, keepdims:false) => 2-element array of Integer or Numo::Int32/64
@param [Numo::DFloat] data[] @return [[Numo::UInt64, Numo::UInt64]] array of [min_index, max_index] @param [Numeric,Array,Range] axis (keyword) Axes along which the operation is performed. @param [TrueClass] keepdims (keyword) If true, the reduced axes are left in th
2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 2201
static VALUE
stats_s_minmax_index(int argc, VALUE *argv, VALUE mod)
{
narray_t *na;
VALUE idx, reduce;
ndfunc_arg_in_t ain[3] = {{cDF,0},{Qnil,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[2] = {{0,0,0},{0,0,0}};
ndfunc_t ndf = { 0, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3,2, ain,aout };
if (argc<1) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=1)",argc);
}
GetNArray(argv[0],na);
if (na->ndim==0) {
return INT2FIX(0);
}
if (na->size > (~(u_int32_t)0)) {
aout[0].type = numo_cInt64;
aout[1].type = numo_cInt64;
idx = rb_narray_new(numo_cInt64, na->ndim, na->shape);
ndf.func = iter_stats_s_minmax_index_index64;
} else {
aout[0].type = numo_cInt32;
aout[1].type = numo_cInt32;
idx = rb_narray_new(numo_cInt32, na->ndim, na->shape);
ndf.func = iter_stats_s_minmax_index_index32;
}
rb_funcall(idx, rb_intern("seq"), 0);
reduce = nary_reduce_dimension(argc-1, argv+1, 1, argv, &ndf, 0);
return na_ndloop(&ndf, 3, argv[0], idx, reduce);
}
|
.quantile_from_sorted_data(sorted_data[], f, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns a quantile value of sorted_data, a double-precision array of length n with stride stride. The elements of the array must be in ascending numerical order. The quantile is determined by the f, a fraction between 0 and 1. For example, to compute the value of the 75th percentile f should have the value 0.75.
There are no checks to see whether the data are sorted, so the function gsl_sort should always be used first.
The quantile is found by interpolation, using the formula
quantile = (1 - \delta) x_i + \delta x_[i+1]
where i is floor((n - 1)f) and \delta is (n-1)f - i.
Thus the minimum value of the array (data[0stride]) is given by f equal to zero, the maximum value (data[(n-1)stride]) is given by f equal to one and the median value is given by f equal to 0.5. Since the algorithm for computing quantiles involves interpolation this function always returns a floating-point number, even for integer data types.
2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 2336
static VALUE
stats_s_quantile_from_sorted_data(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[1];
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_quantile_from_sorted_data, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<2) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=2)",argc);
}
opt[0] = NUM2DBL(argv[1]);
reduce = nary_reduce_dimension(argc-2, argv+2, 1, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 2, argv[0], reduce);
}
|
.sd(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
The standard deviation is defined as the square root of the variance. These functions return the square root of the corresponding variance functions above.
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 210
static VALUE
stats_s_sd(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_sd, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<1) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=1)",argc);
}
reduce = nary_reduce_dimension(argc-1, argv+1, 1, argv, &ndf, 0);
return na_ndloop(&ndf, 2, *argv, reduce);
}
|
.sd_m(data[], mean, axis: nil, keepdims: false) ⇒ Numo::DFloat
The standard deviation is defined as the square root of the variance. These functions return the square root of the corresponding variance functions above.
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 254
static VALUE
stats_s_sd_m(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[1];
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_sd_m, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<2) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=2)",argc);
}
opt[0] = NUM2DBL(argv[1]);
reduce = nary_reduce_dimension(argc-2, argv+2, 1, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 2, argv[0], reduce);
}
|
.sd_with_fixed_mean(data[], mean, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function calculates the standard deviation of data for a fixed population mean mean. The result is the square root of the corresponding variance function.
451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 451
static VALUE
stats_s_sd_with_fixed_mean(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[1];
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_sd_with_fixed_mean, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<2) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=2)",argc);
}
opt[0] = NUM2DBL(argv[1]);
reduce = nary_reduce_dimension(argc-2, argv+2, 1, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 2, argv[0], reduce);
}
|
.skew(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the skewness of data, a dataset of length n with stride stride. The skewness is defined as,
skew = (1/N) \sum ((x_i - \Hat\mu)/\Hat\sigma)^3
where x_i are the elements of the dataset data. The skewness measures the asymmetry of the tails of a distribution.
The function computes the mean and estimated standard deviation of data via calls to gsl_stats_mean and gsl_stats_sd.
609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 609
static VALUE
stats_s_skew(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_skew, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<1) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=1)",argc);
}
reduce = nary_reduce_dimension(argc-1, argv+1, 1, argv, &ndf, 0);
return na_ndloop(&ndf, 2, *argv, reduce);
}
|
.skew_m_sd(data[], mean, sd, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the skewness of the dataset data using the given values of the mean mean and standard deviation sd,
skew = (1/N) \sum ((x_i - mean)/sd)^3
These functions are useful if you have already computed the mean and standard deviation of data and want to avoid recomputing them.
658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 658
static VALUE
stats_s_skew_m_sd(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[2];
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_skew_m_sd, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<3) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=3)",argc);
}
opt[0] = NUM2DBL(argv[1]);
opt[1] = NUM2DBL(argv[2]);
reduce = nary_reduce_dimension(argc-3, argv+3, 1, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 2, argv[0], reduce);
}
|
.spearman(data1[], data2[], work[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the Spearman rank correlation coefficient between the datasets data1 and data2 which must both be of the same length n. Additional workspace of size 2*n is required in work. The Spearman rank correlation between vectors x and y is equivalent to the Pearson correlation between the ranked vectors x_R and y_R, where ranks are defined to be the average of the positions of an element in the ascending order of the values.
1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1061
static VALUE
stats_s_spearman(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce, v, buf = 0;
narray_t *na;
double *opt;
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_spearman, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<2) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=2)",argc);
}
GetNArray(argv[0],na);
opt = ALLOCV_N(double,buf,na->size*2); // todo: get loop size
reduce = nary_reduce_dimension(argc-2, argv+2, 2, argv, &ndf, 0);
v = na_ndloop3(&ndf, opt, 3, argv[0], argv[1], reduce);
ALLOCV_END(buf);
return v;
}
|
.tss(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
These functions return the total sum of squares (TSS) of data about the mean. For gsl_stats_tss_m the user-supplied value of mean is used, and for gsl_stats_tss it is computed using gsl_stats_mean.
TSS = \sum (x_i - mean)^2
304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 304
static VALUE
stats_s_tss(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_tss, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<1) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=1)",argc);
}
reduce = nary_reduce_dimension(argc-1, argv+1, 1, argv, &ndf, 0);
return na_ndloop(&ndf, 2, *argv, reduce);
}
|
.tss_m(data[], mean, axis: nil, keepdims: false) ⇒ Numo::DFloat
These functions return the total sum of squares (TSS) of data about the mean. For gsl_stats_tss_m the user-supplied value of mean is used, and for gsl_stats_tss it is computed using gsl_stats_mean.
TSS = \sum (x_i - mean)^2
351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 351
static VALUE
stats_s_tss_m(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[1];
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_tss_m, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<2) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=2)",argc);
}
opt[0] = NUM2DBL(argv[1]);
reduce = nary_reduce_dimension(argc-2, argv+2, 1, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 2, argv[0], reduce);
}
|
.variance(data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the estimated, or sample, variance of data, a dataset of length n with stride stride. The estimated variance is denoted by \Hat\sigma^2 and is defined by,
\Hat\sigma^2 = (1/(N-1)) \sum (x_i - \Hat\mu)^2
where x_i are the elements of the dataset data. Note that the normalization factor of 1/(N-1) results from the derivation of \Hat\sigma^2 as an unbiased estimator of the population variance \sigma^2. For samples drawn from a Gaussian distribution the variance of \Hat\sigma^2 itself is 2 \sigma^4 / N.
This function computes the mean via a call to gsl_stats_mean. If you have already computed the mean then you can pass it directly to gsl_stats_variance_m.
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 117
static VALUE
stats_s_variance(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_variance, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<1) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=1)",argc);
}
reduce = nary_reduce_dimension(argc-1, argv+1, 1, argv, &ndf, 0);
return na_ndloop(&ndf, 2, *argv, reduce);
}
|
.variance_m(data[], mean, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the sample variance of data relative to the given value of mean. The function is computed with \Hat\mu replaced by the value of mean that you supply,
\Hat\sigma^2 = (1/(N-1)) \sum (x_i - mean)^2
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 163
static VALUE
stats_s_variance_m(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[1];
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_variance_m, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<2) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=2)",argc);
}
opt[0] = NUM2DBL(argv[1]);
reduce = nary_reduce_dimension(argc-2, argv+2, 1, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 2, argv[0], reduce);
}
|
.variance_with_fixed_mean(data[], mean, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes an unbiased estimate of the variance of data when the population mean mean of the underlying distribution is known a priori. In this case the estimator for the variance uses the factor 1/N and the sample mean \Hat\mu is replaced by the known population mean \mu,
\Hat\sigma^2 = (1/N) \sum (x_i - \mu)^2
403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 403
static VALUE
stats_s_variance_with_fixed_mean(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[1];
ndfunc_arg_in_t ain[2] = {{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_variance_with_fixed_mean, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
2, 1, ain, aout };
if (argc<2) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=2)",argc);
}
opt[0] = NUM2DBL(argv[1]);
reduce = nary_reduce_dimension(argc-2, argv+2, 1, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 2, argv[0], reduce);
}
|
.wabsdev(w[], data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the weighted absolute deviation from the weighted mean of data. The absolute deviation from the mean is defined as,
absdev = (\sum w_i | x_i - \Hat\mu | ) / (\sum w_i) |
1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1571
static VALUE
stats_s_wabsdev(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_wabsdev, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<2) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=2)",argc);
}
reduce = nary_reduce_dimension(argc-2, argv+2, 2, argv, &ndf, 0);
return na_ndloop(&ndf, 3, argv[0], argv[1], reduce);
}
|
.wabsdev_m(w[], data[], wmean, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the absolute deviation of the weighted dataset data about the given weighted mean wmean.
1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1618
static VALUE
stats_s_wabsdev_m(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[1];
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_wabsdev_m, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<3) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=3)",argc);
}
opt[0] = NUM2DBL(argv[2]);
reduce = nary_reduce_dimension(argc-3, argv+3, 2, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 3, argv[0], argv[1], reduce);
}
|
.wkurtosis(w[], data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the weighted kurtosis of the dataset data.
kurtosis = ((\sum w_i ((x_i - \Hat x)/\Hat \sigma)^4) / (\sum w_i)) - 3
1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1766
static VALUE
stats_s_wkurtosis(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_wkurtosis, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<2) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=2)",argc);
}
reduce = nary_reduce_dimension(argc-2, argv+2, 2, argv, &ndf, 0);
return na_ndloop(&ndf, 3, argv[0], argv[1], reduce);
}
|
.wkurtosis_m_sd(w[], data[], wmean, wsd, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the weighted kurtosis of the dataset data using the given values of the weighted mean and weighted standard deviation, wmean and wsd.
1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1815
static VALUE
stats_s_wkurtosis_m_sd(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[2];
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_wkurtosis_m_sd, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<4) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=4)",argc);
}
opt[0] = NUM2DBL(argv[2]);
opt[1] = NUM2DBL(argv[3]);
reduce = nary_reduce_dimension(argc-4, argv+4, 2, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 3, argv[0], argv[1], reduce);
}
|
.wmean(w[], data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the weighted mean of the dataset data with stride stride and length n, using the set of weights w with stride wstride and length n. The weighted mean is defined as,
\Hat\mu = (\sum w_i x_i) / (\sum w_i)
1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1115
static VALUE
stats_s_wmean(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_wmean, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<2) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=2)",argc);
}
reduce = nary_reduce_dimension(argc-2, argv+2, 2, argv, &ndf, 0);
return na_ndloop(&ndf, 3, argv[0], argv[1], reduce);
}
|
.wsd(w[], data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
The standard deviation is defined as the square root of the variance. This function returns the square root of the corresponding variance function gsl_stats_wvariance above.
1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1265
static VALUE
stats_s_wsd(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_wsd, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<2) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=2)",argc);
}
reduce = nary_reduce_dimension(argc-2, argv+2, 2, argv, &ndf, 0);
return na_ndloop(&ndf, 3, argv[0], argv[1], reduce);
}
|
.wsd_m(w[], data[], wmean, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the square root of the corresponding variance function gsl_stats_wvariance_m above.
1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1312
static VALUE
stats_s_wsd_m(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[1];
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_wsd_m, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<3) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=3)",argc);
}
opt[0] = NUM2DBL(argv[2]);
reduce = nary_reduce_dimension(argc-3, argv+3, 2, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 3, argv[0], argv[1], reduce);
}
|
.wsd_with_fixed_mean(w[], data[], mean, axis: nil, keepdims: false) ⇒ Numo::DFloat
The standard deviation is defined as the square root of the variance. This function returns the square root of the corresponding variance function above.
1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1418
static VALUE
stats_s_wsd_with_fixed_mean(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[1];
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_wsd_with_fixed_mean, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<3) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=3)",argc);
}
opt[0] = NUM2DBL(argv[2]);
reduce = nary_reduce_dimension(argc-3, argv+3, 2, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 3, argv[0], argv[1], reduce);
}
|
.wskew(w[], data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the weighted skewness of the dataset data.
skew = (\sum w_i ((x_i - \Hat x)/\Hat \sigma)^3) / (\sum w_i)
1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1667
static VALUE
stats_s_wskew(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_wskew, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<2) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=2)",argc);
}
reduce = nary_reduce_dimension(argc-2, argv+2, 2, argv, &ndf, 0);
return na_ndloop(&ndf, 3, argv[0], argv[1], reduce);
}
|
.wskew_m_sd(w[], data[], wmean, wsd, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes the weighted skewness of the dataset data using the given values of the weighted mean and weighted standard deviation, wmean and wsd.
1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1716
static VALUE
stats_s_wskew_m_sd(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[2];
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_wskew_m_sd, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<4) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=4)",argc);
}
opt[0] = NUM2DBL(argv[2]);
opt[1] = NUM2DBL(argv[3]);
reduce = nary_reduce_dimension(argc-4, argv+4, 2, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 3, argv[0], argv[1], reduce);
}
|
.wtss(w[], data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
These functions return the weighted total sum of squares (TSS) of data about the weighted mean. For gsl_stats_wtss_m the user-supplied value of wmean is used, and for gsl_stats_wtss it is computed using gsl_stats_wmean.
TSS = \sum w_i (x_i - wmean)^2
1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1470
static VALUE
stats_s_wtss(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_wtss, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<2) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=2)",argc);
}
reduce = nary_reduce_dimension(argc-2, argv+2, 2, argv, &ndf, 0);
return na_ndloop(&ndf, 3, argv[0], argv[1], reduce);
}
|
.wtss_m(w[], data[], wmean, axis: nil, keepdims: false) ⇒ Numo::DFloat
These functions return the weighted total sum of squares (TSS) of data about the weighted mean. For gsl_stats_wtss_m the user-supplied value of wmean is used, and for gsl_stats_wtss it is computed using gsl_stats_wmean.
TSS = \sum w_i (x_i - wmean)^2
1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1521
static VALUE
stats_s_wtss_m(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[1];
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_wtss_m, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<3) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=3)",argc);
}
opt[0] = NUM2DBL(argv[2]);
reduce = nary_reduce_dimension(argc-3, argv+3, 2, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 3, argv[0], argv[1], reduce);
}
|
.wvariance(w[], data[], axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the estimated variance of the dataset data with stride stride and length n, using the set of weights w with stride wstride and length n. The estimated variance of a weighted dataset is calculated as,
\Hat\sigma^2 = ((\sum w_i)/((\sum w_i)^2 - \sum (w_i^2))) \sum w_i (x_i - \Hat\mu)^2
Note that this expression reduces to an unweighted variance with the familiar 1/(N-1) factor when there are N equal non-zero weights.
1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1169
static VALUE
stats_s_wvariance(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_wvariance, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<2) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=2)",argc);
}
reduce = nary_reduce_dimension(argc-2, argv+2, 2, argv, &ndf, 0);
return na_ndloop(&ndf, 3, argv[0], argv[1], reduce);
}
|
.wvariance_m(w[], data[], wmean, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function returns the estimated variance of the weighted dataset data using the given weighted mean wmean.
1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1216
static VALUE
stats_s_wvariance_m(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[1];
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_wvariance_m, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<3) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=3)",argc);
}
opt[0] = NUM2DBL(argv[2]);
reduce = nary_reduce_dimension(argc-3, argv+3, 2, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 3, argv[0], argv[1], reduce);
}
|
.wvariance_with_fixed_mean(w[], data[], mean, axis: nil, keepdims: false) ⇒ Numo::DFloat
This function computes an unbiased estimate of the variance of the weighted dataset data when the population mean mean of the underlying distribution is known a priori. In this case the estimator for the variance replaces the sample mean \Hat\mu by the known population mean \mu,
\Hat\sigma^2 = (\sum w_i (x_i - \mu)^2) / (\sum w_i)
1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 |
# File 'ext/numo/gsl/stats/gsl_stats.c', line 1367
static VALUE
stats_s_wvariance_with_fixed_mean(int argc, VALUE *argv, VALUE mod)
{
VALUE reduce;
double opt[1];
ndfunc_arg_in_t ain[3] = {{cDF,0},{cDF,0},{sym_reduce,0}};
ndfunc_arg_out_t aout[1] = {{cDF,0}};
ndfunc_t ndf = { iter_stats_s_wvariance_with_fixed_mean, STRIDE_LOOP_NIP|NDF_FLAT_REDUCE|NDF_EXTRACT,
3, 1, ain, aout };
if (argc<3) {
rb_raise(rb_eArgError,"wrong number of argument (%d for >=3)",argc);
}
opt[0] = NUM2DBL(argv[2]);
reduce = nary_reduce_dimension(argc-3, argv+3, 2, argv, &ndf, 0);
return na_ndloop3(&ndf, opt, 3, argv[0], argv[1], reduce);
}
|