diff --git a/dht/dht.c b/dht/dht.c index 97b60224..0ddabaea 100644 --- a/dht/dht.c +++ b/dht/dht.c @@ -55,7 +55,7 @@ gsl_dht_alloc (size_t size) GSL_ERROR_VAL("could not allocate memory for j", GSL_ENOMEM, 0); } - t->Jjj = (double *)malloc(size*(size+1)/2 * sizeof(double)); + t->Jjj = (double *)malloc(size*size*sizeof(double)); if(t->Jjj == 0) { free(t->j); @@ -106,7 +106,6 @@ gsl_dht_new (size_t size, double nu, double xmax) return 0; status = gsl_dht_init(dht, nu, xmax); - if (status) return 0; @@ -122,10 +121,11 @@ gsl_dht_init(gsl_dht * t, double nu, double xmax) GSL_ERROR ("nu is negative", GSL_EDOM); } else { - size_t n, m; + size_t n, m, i; int stat_bz = GSL_SUCCESS; int stat_J = 0; double jN; + double *tmp; if(nu != t->nu) { /* Recalculate Bessel zeros if necessary. */ @@ -147,14 +147,43 @@ gsl_dht_init(gsl_dht * t, double nu, double xmax) /* J_nu(j[n] j[m] / j[N]) = Jjj[n(n-1)/2 + m - 1], 1 <= n,m <= size */ + + /* allocate temporary array */ + tmp = (double *)malloc(t->size*(t->size+1)/2 * sizeof(double)); + if(tmp == 0) { + gsl_dht_free(t); + GSL_ERROR_VAL("could not allocate memory for tmp", GSL_ENOMEM, 0); + } + + /* compute necessary values of J, storing on temporary array */ for(n=1; nsize+1; n++) { for(m=1; m<=n; m++) { double arg = t->j[n] * t->j[m] / jN; gsl_sf_result J; stat_J += gsl_sf_bessel_Jnu_e(nu, arg, &J); - t->Jjj[n*(n-1)/2 + m - 1] = J.val; + tmp[n*(n-1)/2 + m - 1] = J.val; + } + } + + /* copy data into Jjj such that it can be read sequentially by gsl_dht_apply */ + for(m=0; msize; m++) { + for(i=0; isize; i++) { + size_t m_local, n_local; + int coord; + if(i < m) { + m_local = i; + n_local = m; + } + else { + m_local = m; + n_local = i; + } + coord = n_local*(n_local+1)/2 + m_local; + t->Jjj[m * t->size + i] = tmp[coord]; } } + /* free temporary array */ + free(tmp); if(stat_J != 0) { GSL_ERROR("error computing bessel function", GSL_EFAILED); @@ -191,32 +220,15 @@ void gsl_dht_free(gsl_dht * t) int gsl_dht_apply(const gsl_dht * t, double * f_in, double * f_out) { - const double jN = t->j[t->size + 1]; - const double r = t->xmax / jN; - size_t m; - size_t i; + double const jN = t->j[t->size + 1]; + double const r = t->xmax / jN; + size_t m, i, l; + l = 0; for(m=0; msize; m++) { double sum = 0.0; - double Y; - for(i=0; isize; i++) { - /* Need to find max and min so that we - * address the symmetric Jjj matrix properly. - * FIXME: we can presumably optimize this - * by just running over the elements of Jjj - * in a deterministic manner. - */ - size_t m_local; - size_t n_local; - if(i < m) { - m_local = i; - n_local = m; - } - else { - m_local = m; - n_local = i; - } - Y = t->Jjj[n_local*(n_local+1)/2 + m_local] / t->J2[i+1]; + for(i=0; isize; i++, l++) { + double const Y = t->Jjj[l] / t->J2[i+1]; sum += Y * f_in[i]; } f_out[m] = sum * 2.0 * r*r;