![]() |
Statistics for MySQL
0.9
|
00001 /* corr.c (Pearson correlation coeffient) */ 00002 00003 /*********************************************************************** 00004 * This code is part of Statistics for MySQL. 00005 * 00006 * Copyright (C) 2011 Heinrich Schuchardt (xypron.glpk@gmx.de) 00007 * 00008 * Licensed under the Apache License, Version 2.0 (the "License"); 00009 * you may not use this file except in compliance with the License. 00010 * You may obtain a copy of the License at 00011 * 00012 * http://www.apache.org/licenses/LICENSE-2.0 00013 * 00014 * Unless required by applicable law or agreed to in writing, software 00015 * distributed under the License is distributed on an "AS IS" BASIS, 00016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00017 * See the License for the specific language governing permissions and 00018 * limitations under the License. 00019 ***********************************************************************/ 00020 00030 #include "sqlstat.h" 00031 00035 struct corr_storage { 00036 int argc; 00037 double count; 00038 double sumX; 00039 double sumXX; 00040 double sumXY; 00041 double sumY; 00042 double sumYY; 00043 }; 00044 00057 my_bool corr_init(UDF_INIT *initid, UDF_ARGS *args, char *message) { 00058 struct corr_storage * data; 00059 00060 if (args->arg_count < 2 || args->arg_count > 3) { 00061 strcpy(message,"corr() requires two or three arguments"); 00062 return 1; 00063 } 00064 args->arg_type[0] = REAL_RESULT; 00065 args->arg_type[1] = REAL_RESULT; 00066 if (args->arg_count > 2) { 00067 args->arg_type[2] = REAL_RESULT; 00068 } 00069 00070 data = (struct corr_storage *) malloc( sizeof(struct corr_storage)); 00071 if (data == NULL) { 00072 strcpy(message,"Couldn't allocate memory"); 00073 return 1; 00074 } 00075 data->argc = args->arg_count; 00076 00077 initid->maybe_null = 1; 00078 initid->decimals = NOT_FIXED_DEC; 00079 initid->max_length = 13 + initid->decimals; 00080 initid->ptr = (char *) data; 00081 initid->const_item = 0; 00082 00083 return 0; 00084 } 00085 00097 void corr_reset(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error) { 00098 corr_clear(initid, is_null, error); 00099 corr_add(initid, args, is_null, error); 00100 } 00101 00111 void corr_clear(UDF_INIT *initid, char *is_null, char *error) { 00112 struct corr_storage *data; 00113 data = (struct corr_storage *) initid->ptr; 00114 data->count = 0; 00115 data->sumX = 0; 00116 data->sumXX = 0; 00117 data->sumXY = 0; 00118 data->sumY = 0; 00119 data->sumYY = 0; 00120 } 00121 00132 void corr_add(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error) { 00133 struct corr_storage * data; 00134 double w; 00135 double x; 00136 double y; 00137 00138 if (!args->args[0] || !args->args[1]) { 00139 return; 00140 } 00141 data = (struct corr_storage *) initid->ptr; 00142 if (data->argc > 2) { 00143 if (!args->args[2]) { 00144 return; 00145 } 00146 w = *((double*) args->args[2]); 00147 } else { 00148 w = 1.; 00149 } 00150 x = *((double*) args->args[0]); 00151 y = *((double*) args->args[1]); 00152 00153 data->count += w; 00154 data->sumX += w * x; 00155 data->sumXX += w * x * x; 00156 data->sumXY += w * x * y; 00157 data->sumY += w * y; 00158 data->sumYY += w * y * y; 00159 } 00160 00170 double corr(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error) { 00171 struct corr_storage * data; 00172 double ret; 00173 double nx; 00174 double ny; 00175 00176 data = (struct corr_storage *) initid->ptr; 00177 00178 nx = data->count * data->sumXX - data->sumX * data->sumX; 00179 ny = data->count * data->sumYY - data->sumY * data->sumY; 00180 00181 if (nx <= 0 || ny <= 0) { 00182 *is_null = 1; 00183 return 0; 00184 } 00185 00186 ret = (data->count * data->sumXY - data->sumX * data->sumY) 00187 / sqrt(nx * ny); 00188 00189 return ret; 00190 } 00191 00199 void corr_deinit(UDF_INIT *initid) { 00200 if (initid->ptr) { 00201 free(initid->ptr); 00202 } 00203 }