/*
* LIBOIL - Library of Optimized Inner Loops
* Copyright (c) 2004 David A. Schleef <ds@schleef.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <liboil/liboil.h>
#include <liboil/liboilprofile.h>
#include <stdio.h>
#include <stdlib.h>
#define N 1000
static void taylor4_f32_ref (float *dest, float *src, float *tmp, float *a, int n);
static void taylor4_f32_oil (float *dest, float *src, float *tmp, float *a, int n);
int main(int argc, char *argv[])
{
float *dest;
float *src;
float *tmp;
float a[4];
int i;
OilProfile prof;
double ave, std;
oil_init();
src = malloc(N*sizeof(float));
dest = malloc(N*sizeof(float));
tmp = malloc(3*N*sizeof(float));
for(i=0;i<N;i++){
src[i] = i;
}
a[0] = 1;
a[1] = 1;
a[2] = 1;
a[3] = 1;
oil_profile_init(&prof);
for(i=0;i<10;i++){
oil_profile_start(&prof);
taylor4_f32_ref (dest, src, tmp, a, N);
oil_profile_stop(&prof);
}
oil_profile_get_ave_std (&prof, &ave, &std);
printf("ref: %10.4g %10.4g\n", ave, std);
for(i=0;i<10;i++){
printf("%g\n", dest[i]);
}
oil_profile_init(&prof);
for(i=0;i<10;i++){
oil_profile_start(&prof);
taylor4_f32_oil (dest, src, tmp, a, N);
oil_profile_stop(&prof);
}
oil_profile_get_ave_std (&prof, &ave, &std);
printf("oil: %10.4g %10.4g\n", ave, std);
for(i=0;i<10;i++){
printf("%g\n", dest[i]);
}
return 0;
}
static void
taylor4_f32_ref (float *dest, float *src, float *tmp, float *a, int n)
{
int i;
float x;
for(i=0;i<n;i++){
x = src[i];
dest[i] = a[0];
dest[i] += a[1] * x;
dest[i] += a[2] * x * x;
dest[i] += a[3] * x * x * x;
}
}
static void
taylor4_f32_oil (float *dest, float *src, float *tmp, float *a, int n)
{
float *tmp1;
float *tmp2;
float *tmp3;
tmp1 = tmp;
tmp2 = tmp+N;
tmp3 = tmp+2*N;
oil_scalarmultiply_f32_ns (tmp1, src, a+1, n);
oil_scalaradd_f32_ns (tmp1, tmp1, a, n);
oil_multiply_f32 (tmp2, src, src, n);
oil_scalarmultiply_f32_ns (tmp3, tmp2, a+2, n);
oil_add_f32 (tmp1, tmp1, tmp3, n);
oil_multiply_f32 (tmp2, tmp2, src, n);
oil_scalarmultiply_f32_ns (tmp3, tmp2, a+3, n);
oil_add_f32 (dest, tmp1, tmp3, n);
}
syntax highlighted by Code2HTML, v. 0.9.1