|
6 | 6 | // |
7 | 7 | //===----------------------------------------------------------------------===// |
8 | 8 |
|
| 9 | +#include <clc/float/definitions.h> |
| 10 | +#include <clc/math/clc_ep.h> |
9 | 11 | #include <clc/math/clc_fabs.h> |
10 | 12 | #include <clc/math/clc_fma.h> |
| 13 | +#include <clc/math/clc_frexp.h> |
| 14 | +#include <clc/math/clc_ldexp.h> |
11 | 15 | #include <clc/math/clc_mad.h> |
12 | 16 | #include <clc/math/math.h> |
13 | 17 | #include <clc/relational/clc_isinf.h> |
@@ -176,129 +180,52 @@ __clc_log(float x) |
176 | 180 |
|
177 | 181 | _CLC_OVERLOAD _CLC_DEF double |
178 | 182 | #if defined(COMPILING_LOG2) |
179 | | -__clc_log2(double x) |
| 183 | +__clc_log2(double a) |
180 | 184 | #elif defined(COMPILING_LOG10) |
181 | | -__clc_log10(double x) |
| 185 | +__clc_log10(double a) |
182 | 186 | #else |
183 | | -__clc_log(double x) |
| 187 | +__clc_log(double a) |
184 | 188 | #endif |
185 | 189 | { |
186 | | - |
187 | | -#ifndef COMPILING_LOG2 |
188 | | - // log2_lead and log2_tail sum to an extra-precise version of ln(2) |
189 | | - const double log2_lead = 6.93147122859954833984e-01; /* 0x3fe62e42e0000000 */ |
190 | | - const double log2_tail = 5.76999904754328540596e-08; /* 0x3e6efa39ef35793c */ |
191 | | -#endif |
192 | | - |
193 | | -#if defined(COMPILING_LOG10) |
194 | | - // log10e_lead and log10e_tail sum to an extra-precision version of log10(e) |
195 | | - // (19 bits in lead) |
196 | | - const double log10e_lead = |
197 | | - 4.34293746948242187500e-01; /* 0x3fdbcb7800000000 */ |
198 | | - const double log10e_tail = |
199 | | - 7.3495500964015109100644e-7; /* 0x3ea8a93728719535 */ |
200 | | -#elif defined(COMPILING_LOG2) |
201 | | - // log2e_lead and log2e_tail sum to an extra-precision version of log2(e) (19 |
202 | | - // bits in lead) |
203 | | - const double log2e_lead = 1.44269180297851562500E+00; /* 0x3FF7154400000000 */ |
204 | | - const double log2e_tail = 3.23791044778235969970E-06; /* 0x3ECB295C17F0BBBE */ |
205 | | -#endif |
206 | | - |
207 | | - // log_thresh1 = 9.39412117004394531250e-1 = 0x3fee0faa00000000 |
208 | | - // log_thresh2 = 1.06449508666992187500 = 0x3ff1082c00000000 |
209 | | - const double log_thresh1 = 0x1.e0faap-1; |
210 | | - const double log_thresh2 = 0x1.1082cp+0; |
211 | | - |
212 | | - bool is_near = x >= log_thresh1 && x <= log_thresh2; |
213 | | - |
214 | | - // Near 1 code |
215 | | - double r = x - 1.0; |
216 | | - double u = r / (2.0 + r); |
217 | | - double correction = r * u; |
218 | | - u = u + u; |
219 | | - double v = u * u; |
220 | | - double r1 = r; |
221 | | - |
222 | | - const double ca_1 = 8.33333333333317923934e-02; /* 0x3fb55555555554e6 */ |
223 | | - const double ca_2 = 1.25000000037717509602e-02; /* 0x3f89999999bac6d4 */ |
224 | | - const double ca_3 = 2.23213998791944806202e-03; /* 0x3f62492307f1519f */ |
225 | | - const double ca_4 = 4.34887777707614552256e-04; /* 0x3f3c8034c85dfff0 */ |
226 | | - |
227 | | - double r2 = __clc_fma( |
228 | | - u * v, __clc_fma(v, __clc_fma(v, __clc_fma(v, ca_4, ca_3), ca_2), ca_1), |
229 | | - -correction); |
230 | | - |
231 | | -#if defined(COMPILING_LOG10) |
232 | | - r = r1; |
233 | | - r1 = __clc_as_double(__clc_as_ulong(r1) & 0xffffffff00000000); |
234 | | - r2 = r2 + (r - r1); |
235 | | - double ret_near = __clc_fma( |
236 | | - log10e_lead, r1, |
237 | | - __clc_fma(log10e_lead, r2, __clc_fma(log10e_tail, r1, log10e_tail * r2))); |
238 | | -#elif defined(COMPILING_LOG2) |
239 | | - r = r1; |
240 | | - r1 = __clc_as_double(__clc_as_ulong(r1) & 0xffffffff00000000); |
241 | | - r2 = r2 + (r - r1); |
242 | | - double ret_near = __clc_fma( |
243 | | - log2e_lead, r1, |
244 | | - __clc_fma(log2e_lead, r2, __clc_fma(log2e_tail, r1, log2e_tail * r2))); |
| 190 | + int a_exp; |
| 191 | + double m = __clc_frexp(a, &a_exp); |
| 192 | + int b = m < (2.0 / 3.0); |
| 193 | + m = __clc_ldexp(m, b); |
| 194 | + int e = a_exp - b; |
| 195 | + |
| 196 | + double2 x = __clc_ep_div(m - 1.0, __clc_ep_fast_add(1.0, m)); |
| 197 | + double s = x.hi * x.hi; |
| 198 | + double p = __clc_mad(s, __clc_mad(s, __clc_mad(s, |
| 199 | + __clc_mad(s, __clc_mad(s, __clc_mad(s, 0x1.3ab76bf559e2bp-3, 0x1.385386b47b09ap-3), |
| 200 | + 0x1.7474dd7f4df2ep-3), 0x1.c71c016291751p-3), |
| 201 | + 0x1.249249b27acf1p-2), 0x1.99999998ef7b6p-2), 0x1.5555555555780p-1); |
| 202 | + double2 r = __clc_ep_fast_add(__clc_ep_ldexp(x, 1), s * x.hi * p); |
| 203 | + |
| 204 | +#if defined COMPILING_LOG2 |
| 205 | + r = __clc_ep_add( |
| 206 | + (double)e, |
| 207 | + __clc_ep_mul( |
| 208 | + __clc_ep_make_pair(0x1.71547652b82fep+0, 0x1.777d0ffda0d24p-56), r)); |
| 209 | +#elif defined COMPILING_LOG10 |
| 210 | + r = __clc_ep_add( |
| 211 | + __clc_ep_mul( |
| 212 | + __clc_ep_make_pair(0x1.34413509f79ffp-2, -0x1.9dc1da994fd21p-59), |
| 213 | + (double)e), |
| 214 | + __clc_ep_mul( |
| 215 | + __clc_ep_make_pair(0x1.bcb7b1526e50ep-2, 0x1.95355baaafad3p-57), r)); |
245 | 216 | #else |
246 | | - double ret_near = r1 + r2; |
| 217 | + r = __clc_ep_add(__clc_ep_mul(__clc_ep_make_pair(0x1.62e42fefa39efp-1, |
| 218 | + 0x1.abc9e3b39803fp-56), |
| 219 | + (double)e), |
| 220 | + r); |
247 | 221 | #endif |
248 | 222 |
|
249 | | - // This is the far from 1 code |
250 | | - |
251 | | - // Deal with subnormal |
252 | | - ulong ux = __clc_as_ulong(x); |
253 | | - ulong uxs = |
254 | | - __clc_as_ulong(__clc_as_double(0x03d0000000000000UL | ux) - 0x1.0p-962); |
255 | | - int c = ux < IMPBIT_DP64; |
256 | | - ux = c ? uxs : ux; |
257 | | - int expadjust = c ? 60 : 0; |
258 | | - |
259 | | - int xexp = ((__clc_as_int2(ux).hi >> 20) & 0x7ff) - EXPBIAS_DP64 - expadjust; |
260 | | - double f = __clc_as_double(HALFEXPBITS_DP64 | (ux & MANTBITS_DP64)); |
261 | | - int index = __clc_as_int2(ux).hi >> 13; |
262 | | - index = ((0x80 | (index & 0x7e)) >> 1) + (index & 0x1); |
263 | | - |
264 | | - double z1 = __CLC_USE_TABLE(ln_tbl_lo, index - 64); |
265 | | - double q = __CLC_USE_TABLE(ln_tbl_hi, index - 64); |
266 | | - |
267 | | - double f1 = index * 0x1.0p-7; |
268 | | - double f2 = f - f1; |
269 | | - u = f2 / __clc_fma(f2, 0.5, f1); |
270 | | - v = u * u; |
271 | | - |
272 | | - const double cb_1 = 8.33333333333333593622e-02; /* 0x3fb5555555555557 */ |
273 | | - const double cb_2 = 1.24999999978138668903e-02; /* 0x3f89999999865ede */ |
274 | | - const double cb_3 = 2.23219810758559851206e-03; /* 0x3f6249423bd94741 */ |
275 | | - |
276 | | - double poly = v * __clc_fma(v, __clc_fma(v, cb_3, cb_2), cb_1); |
277 | | - double z2 = q + __clc_fma(u, poly, u); |
278 | | - |
279 | | - double dxexp = (double)xexp; |
280 | | -#if defined(COMPILING_LOG10) |
281 | | - // Add xexp * log(2) to z1,z2 to get log(x) |
282 | | - r1 = __clc_fma(dxexp, log2_lead, z1); |
283 | | - r2 = __clc_fma(dxexp, log2_tail, z2); |
284 | | - double ret_far = __clc_fma( |
285 | | - log10e_lead, r1, |
286 | | - __clc_fma(log10e_lead, r2, __clc_fma(log10e_tail, r1, log10e_tail * r2))); |
287 | | -#elif defined(COMPILING_LOG2) |
288 | | - r1 = __clc_fma(log2e_lead, z1, dxexp); |
289 | | - r2 = __clc_fma(log2e_lead, z2, __clc_fma(log2e_tail, z1, log2e_tail * z2)); |
290 | | - double ret_far = r1 + r2; |
291 | | -#else |
292 | | - r1 = __clc_fma(dxexp, log2_lead, z1); |
293 | | - r2 = __clc_fma(dxexp, log2_tail, z2); |
294 | | - double ret_far = r1 + r2; |
295 | | -#endif |
| 223 | + double ret = r.hi; |
296 | 224 |
|
297 | | - double ret = is_near ? ret_near : ret_far; |
| 225 | + ret = __clc_isinf(a) ? a : ret; |
| 226 | + ret = a < 0.0 ? DBL_NAN : ret; |
| 227 | + ret = a == 0.0 ? -INFINITY : ret; |
298 | 228 |
|
299 | | - ret = __clc_isinf(x) ? __clc_as_double(PINFBITPATT_DP64) : ret; |
300 | | - ret = (__clc_isnan(x) | (x < 0.0)) ? __clc_as_double(QNANBITPATT_DP64) : ret; |
301 | | - ret = x == 0.0 ? __clc_as_double(NINFBITPATT_DP64) : ret; |
302 | 229 | return ret; |
303 | 230 | } |
304 | 231 |
|
|
0 commit comments