Statistics for MySQL  0.9
corr.c
Go to the documentation of this file.
00001 /* corr.c (Pearson correlation coeffient) */
00002 
00003 /***********************************************************************
00004 *  This code is part of Statistics for MySQL.
00005 *
00006 *  Copyright (C) 2011 Heinrich Schuchardt (xypron.glpk@gmx.de)
00007 *
00008 *  Licensed under the Apache License, Version 2.0 (the "License");
00009 *  you may not use this file except in compliance with the License.
00010 *  You may obtain a copy of the License at
00011 *
00012 *      http://www.apache.org/licenses/LICENSE-2.0
00013 *
00014 *  Unless required by applicable law or agreed to in writing, software
00015 *  distributed under the License is distributed on an "AS IS" BASIS,
00016 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00017 *  See the License for the specific language governing permissions and
00018 *  limitations under the License.
00019 ***********************************************************************/
00020 
00030 #include "sqlstat.h"
00031 
00035 struct corr_storage {
00036   int     argc;  
00037   double  count; 
00038   double  sumX;  
00039   double  sumXX; 
00040   double  sumXY; 
00041   double  sumY;  
00042   double  sumYY; 
00043 };
00044 
00057 my_bool corr_init(UDF_INIT *initid, UDF_ARGS *args, char *message) {
00058   struct corr_storage * data;
00059   
00060   if (args->arg_count < 2 || args->arg_count > 3) {
00061     strcpy(message,"corr() requires two or three arguments");
00062     return 1;
00063   }
00064   args->arg_type[0] = REAL_RESULT;
00065   args->arg_type[1] = REAL_RESULT;
00066   if (args->arg_count > 2) {
00067     args->arg_type[2] = REAL_RESULT;
00068   }
00069 
00070   data = (struct corr_storage *) malloc( sizeof(struct corr_storage));
00071   if (data == NULL) {
00072     strcpy(message,"Couldn't allocate memory");
00073     return 1;
00074   }
00075   data->argc = args->arg_count;
00076   
00077   initid->maybe_null = 1;
00078   initid->decimals   = NOT_FIXED_DEC;
00079   initid->max_length = 13 + initid->decimals;
00080   initid->ptr        = (char *) data;
00081   initid->const_item = 0;
00082   
00083   return 0;
00084 }
00085 
00097 void corr_reset(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error) {
00098   corr_clear(initid, is_null, error);
00099   corr_add(initid, args, is_null, error);
00100 }
00101 
00111 void corr_clear(UDF_INIT *initid, char *is_null, char *error) {
00112   struct corr_storage *data;
00113   data = (struct corr_storage *) initid->ptr;
00114   data->count = 0;
00115   data->sumX  = 0;
00116   data->sumXX = 0;
00117   data->sumXY = 0;
00118   data->sumY  = 0;
00119   data->sumYY = 0;
00120 }
00121 
00132 void corr_add(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error) {
00133   struct corr_storage * data;
00134   double w;
00135   double x;
00136   double y;
00137   
00138   if (!args->args[0] || !args->args[1]) {
00139     return;
00140   }
00141   data = (struct corr_storage *) initid->ptr;
00142   if (data->argc > 2) {
00143     if (!args->args[2]) {
00144       return;
00145     }
00146     w = *((double*) args->args[2]);
00147   } else {
00148     w = 1.;
00149   }
00150   x = *((double*) args->args[0]);
00151   y = *((double*) args->args[1]);
00152   
00153   data->count  += w;
00154   data->sumX   += w * x;
00155   data->sumXX  += w * x * x;
00156   data->sumXY  += w * x * y;
00157   data->sumY   += w * y;
00158   data->sumYY  += w * y * y;
00159 }
00160 
00170 double corr(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error) {
00171   struct corr_storage * data;
00172   double ret;
00173   double nx;
00174   double ny;
00175 
00176   data = (struct corr_storage *) initid->ptr;
00177 
00178   nx = data->count * data->sumXX - data->sumX * data->sumX;
00179   ny = data->count * data->sumYY - data->sumY * data->sumY;
00180   
00181   if (nx <= 0 || ny <= 0) {
00182     *is_null = 1;
00183     return 0;
00184   }
00185   
00186   ret = (data->count * data->sumXY - data->sumX * data->sumY)
00187       / sqrt(nx * ny);
00188   
00189   return ret;
00190 }
00191 
00199 void corr_deinit(UDF_INIT *initid) {
00200   if (initid->ptr) {
00201     free(initid->ptr);
00202   }
00203 }
 All Classes Files Functions Variables Typedefs Defines