/*
* LIBOIL - Library of Optimized Inner Loops
* Copyright (c) 2006 David A. Schleef <ds@schleef.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdio.h>
#include <math.h>
#include <liboil/liboil.h>
#include <liboil/liboilprototype.h>
#include <liboil/liboiltest.h>
#include <liboil/liboilcpu.h>
#include <liboil/liboilrandom.h>
#define OFFSET 100
void mmx_engine_test(void);
void sse2_engine_test(void);
int main (int argc, char *argv[])
{
uint32_t cpu_flags;
oil_init ();
cpu_flags = oil_cpu_get_flags ();
#ifdef HAVE_GCC_ASM
#if defined(HAVE_I386) || defined(HAVE_AMD64)
if (cpu_flags & OIL_IMPL_FLAG_MMX) {
mmx_engine_test();
} else {
printf("No MMX.\n");
}
if (cpu_flags & OIL_IMPL_FLAG_SSE2) {
sse2_engine_test();
} else {
printf("No SSE2.\n");
}
#endif
#endif
return 0;
}
int sci_sprint_alt(char *s,double x,double y)
{
int errsig;
int maxsig;
int sigfigs;
double mantissa;
double error;
double mindigit;
errsig = floor(log10(fabs(y)));
maxsig = floor(log10(fabs(x)));
mindigit = pow(10,errsig);
if(maxsig<errsig)maxsig=errsig;
sigfigs = maxsig-errsig+2;
mantissa = x*pow(10,-maxsig);
error = y*pow(10,-errsig+1);
if(isnan(x)){
return sprintf(s,"%g",x);
}
if(errsig==1 && maxsig<4 && maxsig>1){
return sprintf(s,"%0.0f(%2.0f)",x,error);
}
if(maxsig<=0 && maxsig>=-2){
return sprintf(s,"%0.*f(%2.0f)",sigfigs-1-maxsig,
mantissa*pow(10,maxsig),error);
}
return sprintf(s,"%0.*f(%2.0f)e%d",sigfigs-1,mantissa,error,maxsig);
}
#ifdef HAVE_GCC_ASM
#if defined(HAVE_I386) || defined(HAVE_AMD64)
void mmx_engine_test(void)
{
OilProfile prof;
double ave, std;
char s[40];
int i;
#define CHECK_LATENCY(insn) \
oil_profile_init (&prof); \
for(i=0;i<10;i++) { \
oil_profile_start(&prof); \
asm volatile ( \
" mov $1000, %%ecx\n" \
".p2align 4,,15\n" \
"1:\n" \
" " #insn " %%mm0, %%mm1\n" \
" " #insn " %%mm1, %%mm2\n" \
" " #insn " %%mm2, %%mm3\n" \
" " #insn " %%mm3, %%mm0\n" \
" decl %%ecx\n" \
" jne 1b\n" \
" emms\n" \
:::"ecx"); \
oil_profile_stop(&prof); \
} \
oil_profile_get_ave_std(&prof, &ave, &std); \
ave -= OFFSET; \
sci_sprint_alt(s,ave/4000,std/4000); \
printf("latency of " #insn ": %s\n", s);
CHECK_LATENCY(packssdw)
CHECK_LATENCY(packsswb)
CHECK_LATENCY(packuswb)
CHECK_LATENCY(paddb)
CHECK_LATENCY(paddd)
CHECK_LATENCY(paddsb)
CHECK_LATENCY(paddsw)
CHECK_LATENCY(paddusb)
CHECK_LATENCY(paddusw)
CHECK_LATENCY(paddw)
CHECK_LATENCY(pand)
CHECK_LATENCY(pandn)
CHECK_LATENCY(pcmpeqb)
CHECK_LATENCY(pcmpeqd)
CHECK_LATENCY(pcmpeqw)
CHECK_LATENCY(pcmpgtb)
CHECK_LATENCY(pcmpgtd)
CHECK_LATENCY(pcmpgtw)
CHECK_LATENCY(pmaddwd)
CHECK_LATENCY(pmulhw)
CHECK_LATENCY(pmullw)
//CHECK_LATENCY(pmulhuw)
CHECK_LATENCY(por)
CHECK_LATENCY(pslld)
CHECK_LATENCY(psllq)
CHECK_LATENCY(psllw)
CHECK_LATENCY(psrad)
CHECK_LATENCY(psraw)
CHECK_LATENCY(psrld)
CHECK_LATENCY(psrlq)
CHECK_LATENCY(psrlw)
CHECK_LATENCY(psubb)
CHECK_LATENCY(psubd)
CHECK_LATENCY(psubsb)
CHECK_LATENCY(psubsw)
CHECK_LATENCY(psubusb)
CHECK_LATENCY(psubusw)
CHECK_LATENCY(psubw)
CHECK_LATENCY(punpckhbw)
CHECK_LATENCY(punpckhdq)
CHECK_LATENCY(punpckhwd)
CHECK_LATENCY(punpcklbw)
CHECK_LATENCY(punpckldq)
CHECK_LATENCY(punpcklwd)
CHECK_LATENCY(pxor)
CHECK_LATENCY(psadbw)
#define CHECK_THROUGHPUT(insn) \
oil_profile_init (&prof); \
for(i=0;i<10;i++) { \
oil_profile_start(&prof); \
asm volatile ( \
" mov $1000, %%ecx\n" \
".p2align 4,,15\n" \
"1:\n" \
" " #insn " %%mm0, %%mm1\n" \
" " #insn " %%mm0, %%mm2\n" \
" " #insn " %%mm0, %%mm3\n" \
" " #insn " %%mm0, %%mm4\n" \
" " #insn " %%mm0, %%mm1\n" \
" " #insn " %%mm0, %%mm2\n" \
" " #insn " %%mm0, %%mm3\n" \
" " #insn " %%mm0, %%mm4\n" \
" " #insn " %%mm0, %%mm1\n" \
" " #insn " %%mm0, %%mm2\n" \
" " #insn " %%mm0, %%mm3\n" \
" " #insn " %%mm0, %%mm4\n" \
" " #insn " %%mm0, %%mm1\n" \
" " #insn " %%mm0, %%mm2\n" \
" " #insn " %%mm0, %%mm3\n" \
" " #insn " %%mm0, %%mm4\n" \
" decl %%ecx\n" \
" jne 1b\n" \
" emms\n" \
:::"ecx"); \
oil_profile_stop(&prof); \
} \
oil_profile_get_ave_std(&prof, &ave, &std); \
ave -= OFFSET; \
sci_sprint_alt(s,ave/16000,std/16000); \
printf("throughput of " #insn ": %s\n", s);
CHECK_THROUGHPUT(packssdw)
CHECK_THROUGHPUT(packsswb)
CHECK_THROUGHPUT(packuswb)
CHECK_THROUGHPUT(paddb)
CHECK_THROUGHPUT(paddd)
CHECK_THROUGHPUT(paddsb)
CHECK_THROUGHPUT(paddsw)
CHECK_THROUGHPUT(paddusb)
CHECK_THROUGHPUT(paddusw)
CHECK_THROUGHPUT(paddw)
CHECK_THROUGHPUT(pand)
CHECK_THROUGHPUT(pandn)
CHECK_THROUGHPUT(pcmpeqb)
CHECK_THROUGHPUT(pcmpeqd)
CHECK_THROUGHPUT(pcmpeqw)
CHECK_THROUGHPUT(pcmpgtb)
CHECK_THROUGHPUT(pcmpgtd)
CHECK_THROUGHPUT(pcmpgtw)
CHECK_THROUGHPUT(pmaddwd)
CHECK_THROUGHPUT(pmulhw)
CHECK_THROUGHPUT(pmullw)
#if 0
/* pmulhuw requires mmxext */
CHECK_THROUGHPUT(pmulhuw)
#endif
CHECK_THROUGHPUT(por)
CHECK_THROUGHPUT(pslld)
CHECK_THROUGHPUT(psllq)
CHECK_THROUGHPUT(psllw)
CHECK_THROUGHPUT(psrad)
CHECK_THROUGHPUT(psraw)
CHECK_THROUGHPUT(psrld)
CHECK_THROUGHPUT(psrlq)
CHECK_THROUGHPUT(psrlw)
CHECK_THROUGHPUT(psubb)
CHECK_THROUGHPUT(psubd)
CHECK_THROUGHPUT(psubsb)
CHECK_THROUGHPUT(psubsw)
CHECK_THROUGHPUT(psubusb)
CHECK_THROUGHPUT(psubusw)
CHECK_THROUGHPUT(psubw)
CHECK_THROUGHPUT(punpckhbw)
CHECK_THROUGHPUT(punpckhdq)
CHECK_THROUGHPUT(punpckhwd)
CHECK_THROUGHPUT(punpcklbw)
CHECK_THROUGHPUT(punpckldq)
CHECK_THROUGHPUT(punpcklwd)
CHECK_THROUGHPUT(pxor)
CHECK_THROUGHPUT(psadbw)
#undef CHECK_LATENCY
#undef CHECK_THROUGHPUT
}
void sse2_engine_test(void)
{
OilProfile prof;
double ave, std;
char s[40];
int i;
#define CHECK_LATENCY(insn) \
oil_profile_init (&prof); \
for(i=0;i<10;i++) { \
oil_profile_start(&prof); \
asm volatile ( \
" mov $1000, %%ecx\n" \
".p2align 4,,15\n" \
"1:\n" \
" " #insn " %%xmm0, %%xmm1\n" \
" " #insn " %%xmm1, %%xmm2\n" \
" " #insn " %%xmm2, %%xmm3\n" \
" " #insn " %%xmm3, %%xmm0\n" \
" decl %%ecx\n" \
" jne 1b\n" \
:::"ecx"); \
oil_profile_stop(&prof); \
} \
oil_profile_get_ave_std(&prof, &ave, &std); \
ave -= OFFSET; \
sci_sprint_alt(s,ave/4000,std/4000); \
printf("latency of " #insn ": %s\n", s);
CHECK_LATENCY(packssdw)
CHECK_LATENCY(packsswb)
CHECK_LATENCY(packuswb)
CHECK_LATENCY(paddb)
CHECK_LATENCY(paddd)
CHECK_LATENCY(paddsb)
CHECK_LATENCY(paddsw)
CHECK_LATENCY(paddusb)
CHECK_LATENCY(paddusw)
CHECK_LATENCY(paddw)
CHECK_LATENCY(pand)
CHECK_LATENCY(pandn)
CHECK_LATENCY(pcmpeqb)
CHECK_LATENCY(pcmpeqd)
CHECK_LATENCY(pcmpeqw)
CHECK_LATENCY(pcmpgtb)
CHECK_LATENCY(pcmpgtd)
CHECK_LATENCY(pcmpgtw)
CHECK_LATENCY(pmaddwd)
CHECK_LATENCY(pmulhw)
CHECK_LATENCY(pmullw)
#if 0
/* pmulhuw requires mmxext */
CHECK_LATENCY(pmulhuw)
#endif
CHECK_LATENCY(por)
CHECK_LATENCY(pslld)
CHECK_LATENCY(psllq)
CHECK_LATENCY(psllw)
CHECK_LATENCY(psrad)
CHECK_LATENCY(psraw)
CHECK_LATENCY(psrld)
CHECK_LATENCY(psrlq)
CHECK_LATENCY(psrlw)
CHECK_LATENCY(psubb)
CHECK_LATENCY(psubd)
CHECK_LATENCY(psubsb)
CHECK_LATENCY(psubsw)
CHECK_LATENCY(psubusb)
CHECK_LATENCY(psubusw)
CHECK_LATENCY(psubw)
CHECK_LATENCY(punpckhbw)
CHECK_LATENCY(punpckhdq)
CHECK_LATENCY(punpckhwd)
CHECK_LATENCY(punpcklbw)
CHECK_LATENCY(punpckldq)
CHECK_LATENCY(punpcklwd)
CHECK_LATENCY(pxor)
#define CHECK_THROUGHPUT(insn) \
oil_profile_init (&prof); \
for(i=0;i<10;i++) { \
oil_profile_start(&prof); \
asm volatile ( \
" mov $1000, %%ecx\n" \
".p2align 4,,15\n" \
"1:\n" \
" " #insn " %%xmm0, %%xmm1\n" \
" " #insn " %%xmm0, %%xmm2\n" \
" " #insn " %%xmm0, %%xmm3\n" \
" " #insn " %%xmm0, %%xmm4\n" \
" " #insn " %%xmm0, %%xmm5\n" \
" " #insn " %%xmm0, %%xmm6\n" \
" " #insn " %%xmm0, %%xmm7\n" \
" " #insn " %%xmm0, %%xmm1\n" \
" " #insn " %%xmm0, %%xmm2\n" \
" " #insn " %%xmm0, %%xmm3\n" \
" " #insn " %%xmm0, %%xmm4\n" \
" " #insn " %%xmm0, %%xmm5\n" \
" " #insn " %%xmm0, %%xmm6\n" \
" " #insn " %%xmm0, %%xmm7\n" \
" decl %%ecx\n" \
" jne 1b\n" \
:::"ecx"); \
oil_profile_stop(&prof); \
} \
oil_profile_get_ave_std(&prof, &ave, &std); \
ave -= OFFSET; \
sci_sprint_alt(s,ave/14000,std/14000); \
printf("throughput of " #insn ": %s\n", s);
CHECK_THROUGHPUT(packssdw)
CHECK_THROUGHPUT(packsswb)
CHECK_THROUGHPUT(packuswb)
CHECK_THROUGHPUT(paddb)
CHECK_THROUGHPUT(paddd)
CHECK_THROUGHPUT(paddsb)
CHECK_THROUGHPUT(paddsw)
CHECK_THROUGHPUT(paddusb)
CHECK_THROUGHPUT(paddusw)
CHECK_THROUGHPUT(paddw)
CHECK_THROUGHPUT(pand)
CHECK_THROUGHPUT(pandn)
CHECK_THROUGHPUT(pcmpeqb)
CHECK_THROUGHPUT(pcmpeqd)
CHECK_THROUGHPUT(pcmpeqw)
CHECK_THROUGHPUT(pcmpgtb)
CHECK_THROUGHPUT(pcmpgtd)
CHECK_THROUGHPUT(pcmpgtw)
CHECK_THROUGHPUT(pmaddwd)
CHECK_THROUGHPUT(pmulhw)
CHECK_THROUGHPUT(pmullw)
CHECK_THROUGHPUT(pmulhuw)
CHECK_THROUGHPUT(por)
CHECK_THROUGHPUT(pslld)
CHECK_THROUGHPUT(psllq)
CHECK_THROUGHPUT(psllw)
CHECK_THROUGHPUT(psrad)
CHECK_THROUGHPUT(psraw)
CHECK_THROUGHPUT(psrld)
CHECK_THROUGHPUT(psrlq)
CHECK_THROUGHPUT(psrlw)
CHECK_THROUGHPUT(psubb)
CHECK_THROUGHPUT(psubd)
CHECK_THROUGHPUT(psubsb)
CHECK_THROUGHPUT(psubsw)
CHECK_THROUGHPUT(psubusb)
CHECK_THROUGHPUT(psubusw)
CHECK_THROUGHPUT(psubw)
CHECK_THROUGHPUT(punpckhbw)
CHECK_THROUGHPUT(punpckhdq)
CHECK_THROUGHPUT(punpckhwd)
CHECK_THROUGHPUT(punpcklbw)
CHECK_THROUGHPUT(punpckldq)
CHECK_THROUGHPUT(punpcklwd)
CHECK_THROUGHPUT(pxor)
}
#endif
#endif
syntax highlighted by Code2HTML, v. 0.9.1