From 8ea64d4fe8878b2fa48a598c2d0178fd9a532aa0 Mon Sep 17 00:00:00 2001 From: "Ryan Hinton on tce-virtual.corral" Date: Tue, 15 Dec 2015 18:57:04 -0700 Subject: [PATCH 01/14] initial test files for double-precision support --- .../bintools/Compiler/data/doubles.adf | 1307 +++++++++++++++++ .../bintools/Compiler/data/doubles.cc | 118 ++ .../Compiler/tcetest_double_support.sh | 16 + 3 files changed, 1441 insertions(+) create mode 100644 testsuite/systemtest/bintools/Compiler/data/doubles.adf create mode 100644 testsuite/systemtest/bintools/Compiler/data/doubles.cc create mode 100644 testsuite/systemtest/bintools/Compiler/tcetest_double_support.sh diff --git a/testsuite/systemtest/bintools/Compiler/data/doubles.adf b/testsuite/systemtest/bintools/Compiler/data/doubles.adf new file mode 100644 index 0000000000..5b1db08c79 --- /dev/null +++ b/testsuite/systemtest/bintools/Compiler/data/doubles.adf @@ -0,0 +1,1307 @@ + + + + + 32 + + + + + + + BOOL + 0 + + + + + + + BOOL + 0 + + + + + + + BOOL + 1 + + + + + + + BOOL + 1 + + + + + + + + zero + 32 + + + + + 64 + + + + + + + BOOL + 0 + + + + + + + BOOL + 0 + + + + + + + BOOL + 1 + + + + + + + BOOL + 1 + + + + + + + + zero + 0 + + + + + + B1 + seg1 + + + + + + B1 + seg1 + + + + + + B1 + seg1 + + + + + + B1 + seg1 + + + + + + B1 + seg1 + + + + + + B1 + seg1 + + + B64 + seg1 + + + + + + B1 + seg1 + + + B64 + seg1 + + + + + + B1 + seg1 + + + + + + B1 + seg1 + + + + + + B1 + seg1 + + + + + + B1 + seg1 + + + + + + B1 + seg1 + + + + + + B1 + seg1 + + + + + + B64 + seg1 + + + + + + B64 + seg1 + + + + + + B64 + seg1 + + + + + + B1 + seg1 + + + + + + B64 + seg1 + + + + + + B64 + seg1 + + + + + + B64 + seg1 + + + B1 + seg1 + + + + + + B64 + seg1 + + + B1 + seg1 + + + + + + lsu_i1 + 32 + + + + + lsu_o1 + 32 + + + lsu_i2 + 32 + + + ldw + in1t + out1 + + + 0 + 1 + + + 2 + 1 + + + + + ldq + in1t + out1 + + + 0 + 1 + + + 2 + 1 + + + + + ldh + in1t + out1 + + + 0 + 1 + + + 2 + 1 + + + + + stw + in1t + in2 + + + 0 + 1 + + + 0 + 1 + + + + + stq + in1t + in2 + + + 0 + 1 + + + 0 + 1 + + + + + sth + in1t + in2 + + + 0 + 1 + + + 0 + 1 + + + + + ldqu + in1t + out1 + + + 0 + 1 + + + 2 + 1 + + + + + ldhu + in1t + out1 + + + 0 + 1 + + + 2 + 1 + + + + data + + + + + ALU_i1 + 32 + + + + + ALU_i2 + 32 + + + ALU_o1 + 32 + + + add + in1t + in2 + out1 + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + and + in1t + in2 + out1 + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + eq + in1t + in2 + out1 + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + gt + in1t + in2 + out1 + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + gtu + in1t + in2 + out1 + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + ior + in1t + in2 + out1 + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + shl + in2 + in1t + out1 + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + shr + in2 + in1t + out1 + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + shru + in2 + in1t + out1 + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + sub + in1t + in2 + out1 + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + sxhw + in1t + out1 + + + 0 + 1 + + + 0 + 1 + + + + + sxqw + in1t + out1 + + + 0 + 1 + + + 0 + 1 + + + + + xor + in1t + in2 + out1 + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + + + + + FPU_i1 + 64 + + + FPU_i2 + 64 + + + + + FPU_o1 + 64 + + + absd + i2 + out + + + 0 + 1 + + + 0 + 1 + + + + + addd + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + divd + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + eqd + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + equd + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + ged + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + geud + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + gtd + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + gtud + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + invsqrtd + i2 + out + + + 0 + 1 + + + 0 + 1 + + + + + led + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + leud + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + ltd + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + ltud + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + maxd + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + mind + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + muld + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + ned + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + negd + i2 + out + + + 0 + 1 + + + 0 + 1 + + + + + neud + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + ordd + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + sqrtd + i2 + out + + + 0 + 1 + + + 0 + 1 + + + + + squared + i2 + out + + + 0 + 1 + + + 0 + 1 + + + + + subd + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + uordd + i1 + i2 + out + + + 0 + 1 + + + 0 + 1 + + + 0 + 1 + + + + + + + + + LSDU_i1 + 32 + + + + + LSDU_i2 + 64 + + + LSDU_o1 + 64 + + + ldd + adr + do + + + 0 + 1 + + + 2 + 1 + + + + + std + adr + di + + + 0 + 1 + + + 0 + 1 + + + + data + + + + normal + 8 + 32 + 1 + 1 + + RF_i1 + + + RF_o1 + + + + + normal + 2 + 1 + 1 + 1 + + bool_i1 + + + bool_o1 + + + + + normal + 8 + 64 + 0 + 0 + + RF64_o1 + + + RF64_i1 + + + + + 8 + 0 + 65535 + 0 + + + + 8 + 0 + 65535 + + + + 8 + 0 + 65535 + 0 + 1 + + + + + gcu_i1 + 32 + + + + + gcu_i2 + gcu_o1 + 32 + + ra + + jump + pc + + + 0 + 1 + + + + + call + pc + + + 0 + 1 + + + + instructions + 3 + 1 + + + diff --git a/testsuite/systemtest/bintools/Compiler/data/doubles.cc b/testsuite/systemtest/bintools/Compiler/data/doubles.cc new file mode 100644 index 0000000000..f3d8a65c84 --- /dev/null +++ b/testsuite/systemtest/bintools/Compiler/data/doubles.cc @@ -0,0 +1,118 @@ +/* + * Test code for double-precision operations/operands. + */ + +//#include +#include +#include +#include "lwpr.h" +#include "tceops.h" + + +// check storing double data to DATA section +volatile double data64[16] = { 1.0, -2.0, 2.0/3.0, 10.0, + 11.0, 12.0, 12.0+5.0, 0.9, //tanh(0.9), + 1.2, 3.1, -4.0-2.0, 4.4, //atan2(-4.0, 3.0), + 2.3, 4.5, 1.1*4.9, 4.5};//pow(1.3, 4.5)}; + + +// helper function until TUT comes back from Christmas... +#define my_fabs(val) (((val)>=0) ? (val) : (-(val))) +//inline double my_fabs(double val) +//{ +// if (val >= 0.0) +// return val; +// else +// return -val; +//} + +// helper function, check for approximate equality, update success +void check_approx_eql(int &succ, double desired, double actual, const char *msg, double tol=1e-15) +{ + double norm = actual; + double val_abs; + + // handle normalization, don't want to divide by zero + //if (fabs(desired) < tol) { // newlib making a mess of this + //if (desired < tol) { // newlib making a mess of this + if (my_fabs(desired) < tol) { // newlib making a mess of this + succ = 0; + } + //if (fabs(desired) < tol) { // newlib making a mess of this + //_TCE_ABSD(desired, val_abs); + //if (val_abs < tol) { + // norm = 1.0; + //} + +//compiler: // check for approximately equal within tolerance +//compiler: //if (fabs(actual - desired)/actual > tol) { +//compiler: _TCE_ABSD(actual - desired, val_abs); +//compiler: if (val_abs / actual > tol) { +//compiler: lwpr_print_str((char*)msg); +//compiler: succ = 0; +//compiler: } +} + + +// main test function +int main() +{ + int succ = 1; + double a, b; + + // quick, simple tests + check_approx_eql(succ, 1.567, fabs(-1.567), "fabs (absd) failed"); + + // test that memory stores are correct + a = 2.0; + b = 3.0; + check_approx_eql(succ, a/b, data64[2], "stored divide result failed"); + a = 12.0; + b = 5.0; + check_approx_eql(succ, a+b, data64[6], "stored add result failed"); + a = -4.0; + b = 2.0; + check_approx_eql(succ, a-b, data64[10], "stored subtract result failed"); + a = 1.1; + b = 4.9; + check_approx_eql(succ, a*b, data64[14], "stored multiply result failed"); + + a = data64[14]; + b = data64[15]; + + a = 1.4; + + data64[2] = a + b; + data64[3] = a - b; + data64[4] = a * b; + data64[5] = a / b; + + + return !succ; +} + + +/* FPU opcode list: + * absd + * negd + * addd + * subd + * muld + * divd + * squared + * sqrtd + * invsqrtd + * eqd equd + * ged geud + * gtd gtud + * led leud + * ltd ltud + * ned neud + * ordd uordd + * + * FPU doesn't contain LDD/STD or MACD. The LSDU handles LDD/STD. + * + * Found instruction MSUD, not in FPU, not sure what it does. ..UD is probably + * "unordered double". Tried to add it anyway, ProDe says I have the wrong + * number/type of operands or something like that. + */ diff --git a/testsuite/systemtest/bintools/Compiler/tcetest_double_support.sh b/testsuite/systemtest/bintools/Compiler/tcetest_double_support.sh new file mode 100644 index 0000000000..4ba4b4a8e5 --- /dev/null +++ b/testsuite/systemtest/bintools/Compiler/tcetest_double_support.sh @@ -0,0 +1,16 @@ +#!/bin/sh +### TCE TESTCASE +### title: Test double-precision operations and operands + +#old:mach=data/multi_addrspace.adf +#old:src=data/multi_addrspace.c +mach=data/doubles.adf +src=data/doubles.cc +program=$(mktemp tmpXXXXXX) + +#set -x +tcecc -a $mach -O3 $src -o $program && \ +ttasim -a $mach -p $program --no-debugmode + +rm -f $program + From ade0afff419070323d6d48f54ed157653844129a Mon Sep 17 00:00:00 2001 From: "Ryan Hinton on tce-virtual.corral" Date: Tue, 15 Dec 2015 18:57:52 -0700 Subject: [PATCH 02/14] port of changes from bzr, see [https://code.launchpad.net/~iobass/tce/doubles/+merge/245458] for details --- .gitignore | 4 +- tce/INSTALL | 11 + .../newlib/libc/include/machine/ieeefp.h | 1 - tce/opset/base/base.cc | 530 +++++++++++++----- tce/opset/base/base.opp | 468 +++++++++++++++- .../applibs/LLVMBackend/TCETargetMachine.cc | 2 +- tce/src/applibs/LLVMBackend/TDGen.cc | 299 ++++++++-- .../LLVMBackend/plugin/TCEDAGToDAGISel.cc | 5 +- .../LLVMBackend/plugin/TCEFrameInfo.hh | 2 +- .../LLVMBackend/plugin/TCEISelLowering.cc | 27 +- .../LLVMBackend/plugin/TCEInstrInfo.cc | 3 + .../LLVMBackend/plugin/TCEInstrInfo.td | 24 +- .../plugin/TCETargetMachinePlugin.cc | 6 + .../Algorithms/BBSchedulerController.hh | 2 +- .../Algorithms/BUBasicBlockScheduler.cc | 2 +- .../Algorithms/BasicBlockScheduler.cc | 4 +- .../BypassingBUBasicBlockScheduler.cc | 4 +- .../Scheduler/Algorithms/RegisterCopyAdder.cc | 6 +- .../Scheduler/Algorithms/RegisterRenamer.cc | 6 +- .../DDG/DataDependenceGraph.cc | 4 +- .../ExecutionPipelineResource.cc | 8 +- tce/tools/scripts/install_llvm_3.5.sh | 13 +- tce/tools/scripts/install_llvm_3.7.sh | 10 +- 23 files changed, 1203 insertions(+), 238 deletions(-) diff --git a/.gitignore b/.gitignore index 549df932ce..5b1fd70afe 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.o +*.so src/**/Makefile *.dot lastresults.csv @@ -8,6 +9,7 @@ vgcore* core.* core *.lo +*.a *.la **/runner tce/Makefile @@ -35,7 +37,7 @@ tce/*/Makefile tce/src/tools/tce_version_string.h tce/newlib-1.17.0/tce-llvm RE:.*/.deps -RE:.*/.libs +**/.libs tce/test/Makefile.in RE:tce/src/.*/Makefile RE:tce/test/.*/Makefile.in diff --git a/tce/INSTALL b/tce/INSTALL index 90e56a1a6a..4f5e3a7f7b 100644 --- a/tce/INSTALL +++ b/tce/INSTALL @@ -1,3 +1,14 @@ +Quick start +=========== + +Quick installation instructions are included for several operating systems. If +you're running a Debian-based Linux system (e.g. Ubuntu), follow the +instructions in INSTALL.Debian. Similarly, for RedHat-related distributions +(e.g. CentOS), follow the instructions in INSTALL.RedHat. + +The remainder of this document goes into more detail on the dependencies, +versions, and installation procedure for a Unix-style operating system. + Operating system ================ diff --git a/tce/newlib-1.17.0/newlib/libc/include/machine/ieeefp.h b/tce/newlib-1.17.0/newlib/libc/include/machine/ieeefp.h index 0760896b84..d46ba26bc6 100644 --- a/tce/newlib-1.17.0/newlib/libc/include/machine/ieeefp.h +++ b/tce/newlib-1.17.0/newlib/libc/include/machine/ieeefp.h @@ -324,7 +324,6 @@ #ifdef __TCE__ #define __IEEE_BIG_ENDIAN -#define _DOUBLE_IS_32BITS /* This define is used but never defined anywhere.. maybe it's not needed at all #define NO_LONG_DOUBLE diff --git a/tce/opset/base/base.cc b/tce/opset/base/base.cc index d57d8a439f..4f69a414a8 100644 --- a/tce/opset/base/base.cc +++ b/tce/opset/base/base.cc @@ -493,26 +493,31 @@ END_TRIGGER; END_OPERATION(NOT) ////////////////////////////////////////////////////////////////////////////// -// NEQF - arithmetic negation, floating-point +// NEGF - arithmetic negation, single-precision floating-point ////////////////////////////////////////////////////////////////////////////// OPERATION(NEGF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE) { - FloatWord in = FLT(1); - in = -in; - IO(2) = in; - } else if (BWIDTH(1) == DBL_WORD_SIZE) { - DoubleWord in = DBL(1); - in = -in; - IO(2) = in; - } else { - abortWithError("bit width of operand erronous"); - } + FloatWord in = FLT(1); + in = -in; + IO(2) = in; END_TRIGGER; END_OPERATION(NEGF) +////////////////////////////////////////////////////////////////////////////// +// NEGD - arithmetic negation, double-precision floating-point +////////////////////////////////////////////////////////////////////////////// +OPERATION(NEGD) + +TRIGGER + DoubleWord in = DBL(1); + in = -in; + IO(2) = in; +END_TRIGGER; + +END_OPERATION(NEGD) + ////////////////////////////////////////////////////////////////////////////// // MUL - integer multiply ////////////////////////////////////////////////////////////////////////////// @@ -598,13 +603,7 @@ END_OPERATION(MODU) OPERATION(ADDF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) { - IO(3) = FLT(1) + FLT(2); - } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) { - IO(3) = DBL(1) + DBL(2); - } else { - abortWithError("bit widths of operands erronous"); - } + IO(3) = FLT(1) + FLT(2); END_TRIGGER; END_OPERATION(ADDF) @@ -615,13 +614,7 @@ END_OPERATION(ADDF) OPERATION(SUBF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) { - IO(3) = FLT(1) - FLT(2); - } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) { - IO(3) = DBL(1) - DBL(2); - } else { - abortWithError("bit widths of operands erronous"); - } + IO(3) = FLT(1) - FLT(2); END_TRIGGER; END_OPERATION(SUBF) @@ -632,13 +625,7 @@ END_OPERATION(SUBF) OPERATION(MULF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) { - IO(3) = FLT(1) * FLT(2); - } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) { - IO(3) = DBL(1) * DBL(2); - } else { - abortWithError("bit widths of operands erronous"); - } + IO(3) = FLT(1) * FLT(2); END_TRIGGER; END_OPERATION(MULF) @@ -649,13 +636,7 @@ END_OPERATION(MULF) OPERATION(DIVF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) { - IO(3) = FLT(1) / FLT(2); - } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) { - IO(3) = DBL(1) / DBL(2); - } else { - abortWithError("bit widths of operands erronous"); - } + IO(3) = FLT(1) / FLT(2); END_TRIGGER; END_OPERATION(DIVF) @@ -666,13 +647,7 @@ END_OPERATION(DIVF) OPERATION(EQF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) { - IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) == FLT(2)) ? 1 : 0; - } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) { - IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) == DBL(2)) ? 1 : 0; - } else { - abortWithError("bit widths of operands erronous"); - } + IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) == FLT(2)) ? 1 : 0; END_TRIGGER; END_OPERATION(EQF) @@ -684,13 +659,7 @@ END_OPERATION(EQF) OPERATION(EQUF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) { - IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) == FLT(2)) ? 1 : 0; - } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) { - IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) == DBL(2)) ? 1 : 0; - } else { - abortWithError("bit widths of operands erronous"); - } + IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) == FLT(2)) ? 1 : 0; END_TRIGGER; END_OPERATION(EQUF) @@ -702,13 +671,7 @@ END_OPERATION(EQUF) OPERATION(NEF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) { - IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) != FLT(2)) ? 1 : 0; - } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) { - IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) != DBL(2)) ? 1 : 0; - } else { - abortWithError("bit widths of operands erronous"); - } + IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) != FLT(2)) ? 1 : 0; END_TRIGGER; END_OPERATION(NEF) @@ -720,13 +683,7 @@ END_OPERATION(NEF) OPERATION(NEUF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) { - IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) != FLT(2)) ? 1 : 0; - } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) { - IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) != DBL(2)) ? 1 : 0; - } else { - abortWithError("bit widths of operands erronous"); - } + IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) != FLT(2)) ? 1 : 0; END_TRIGGER; END_OPERATION(NEUF) @@ -737,13 +694,7 @@ END_OPERATION(NEUF) OPERATION(GTF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) { - IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) > FLT(2)) ? 1 : 0; - } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) { - IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) > DBL(2)) ? 1 : 0; - } else { - abortWithError("bit widths of operands erronous"); - } + IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) > FLT(2)) ? 1 : 0; END_TRIGGER; END_OPERATION(GTF) @@ -754,13 +705,7 @@ END_OPERATION(GTF) OPERATION(GTUF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) { - IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) > FLT(2)) ? 1 : 0; - } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) { - IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) > DBL(2)) ? 1 : 0; - } else { - abortWithError("bit widths of operands erronous"); - } + IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) > FLT(2)) ? 1 : 0; END_TRIGGER; END_OPERATION(GTUF) @@ -771,13 +716,7 @@ END_OPERATION(GTUF) OPERATION(GEF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) { - IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) >= FLT(2)) ? 1 : 0; - } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) { - IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) >= DBL(2)) ? 1 : 0; - } else { - abortWithError("bit widths of operands erronous"); - } + IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) >= FLT(2)) ? 1 : 0; END_TRIGGER; END_OPERATION(GEF) @@ -788,13 +727,7 @@ END_OPERATION(GEF) OPERATION(GEUF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) { - IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) >= FLT(2)) ? 1 : 0; - } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) { - IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) >= DBL(2)) ? 1 : 0; - } else { - abortWithError("bit widths of operands erronous"); - } + IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) >= FLT(2)) ? 1 : 0; END_TRIGGER; END_OPERATION(GEUF) @@ -805,13 +738,7 @@ END_OPERATION(GEUF) OPERATION(LTF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) { - IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) < FLT(2)) ? 1 : 0; - } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) { - IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) < DBL(2)) ? 1 : 0; - } else { - abortWithError("bit widths of operands erronous"); - } + IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) < FLT(2)) ? 1 : 0; END_TRIGGER; END_OPERATION(LTF) @@ -823,13 +750,7 @@ END_OPERATION(LTF) OPERATION(LTUF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) { - IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) < FLT(2)) ? 1 : 0; - } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) { - IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) < DBL(2)) ? 1 : 0; - } else { - abortWithError("bit widths of operands erronous"); - } + IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) < FLT(2)) ? 1 : 0; END_TRIGGER; END_OPERATION(LTUF) @@ -840,13 +761,7 @@ END_OPERATION(LTUF) OPERATION(LEF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) { - IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) <= FLT(2)) ? 1 : 0; - } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) { - IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) <= DBL(2)) ? 1 : 0; - } else { - abortWithError("bit widths of operands erronous"); - } + IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2)) && FLT(1) <= FLT(2)) ? 1 : 0; END_TRIGGER; END_OPERATION(LEF) @@ -857,13 +772,7 @@ END_OPERATION(LEF) OPERATION(LEUF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) { - IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) <= FLT(2)) ? 1 : 0; - } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) { - IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) <= DBL(2)) ? 1 : 0; - } else { - abortWithError("bit widths of operands erronous"); - } + IO(3) = (isnan(FLT(1)) || isnan(FLT(2)) || FLT(1) <= FLT(2)) ? 1 : 0; END_TRIGGER; END_OPERATION(LEUF) @@ -875,13 +784,7 @@ END_OPERATION(LEUF) OPERATION(ORDF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) { - IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2))) ? 1 : 0; - } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) { - IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2))) ? 1 : 0; - } else { - abortWithError("bit widths of operands erronous"); - } + IO(3) = (!isnan(FLT(1)) && !isnan(FLT(2))) ? 1 : 0; END_TRIGGER; END_OPERATION(ORDF) @@ -893,17 +796,217 @@ END_OPERATION(ORDF) OPERATION(UORDF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE && BWIDTH(2) == FLT_WORD_SIZE) { - IO(3) = (isnan(FLT(1)) || isnan(FLT(2))) ? 1 : 0; - } else if (BWIDTH(1) == DBL_WORD_SIZE && BWIDTH(2) == DBL_WORD_SIZE) { - IO(3) = (isnan(DBL(1)) || isnan(DBL(2))) ? 1 : 0; - } else { - abortWithError("bit widths of operands erronous"); - } + IO(3) = (isnan(FLT(1)) || isnan(FLT(2))) ? 1 : 0; END_TRIGGER; END_OPERATION(UORDF) + +////////////////////////////////////////////////////////////////////////////// +// ADDD - double-precision floating-point add +////////////////////////////////////////////////////////////////////////////// +OPERATION(ADDD) + +TRIGGER + IO(3) = DBL(1) + DBL(2); +END_TRIGGER; + +END_OPERATION(ADDD) + +////////////////////////////////////////////////////////////////////////////// +// SUBD - double-precision floating-point subtract +////////////////////////////////////////////////////////////////////////////// +OPERATION(SUBD) + +TRIGGER + IO(3) = DBL(1) - DBL(2); +END_TRIGGER; + +END_OPERATION(SUBD) + +////////////////////////////////////////////////////////////////////////////// +// MULD - double-precision floating-point multiply +////////////////////////////////////////////////////////////////////////////// +OPERATION(MULD) + +TRIGGER + IO(3) = DBL(1) * DBL(2); +END_TRIGGER; + +END_OPERATION(MULD) + +////////////////////////////////////////////////////////////////////////////// +// DIVD - double-precision floating-point divide +////////////////////////////////////////////////////////////////////////////// +OPERATION(DIVD) + +TRIGGER + IO(3) = DBL(1) / DBL(2); +END_TRIGGER; + +END_OPERATION(DIVD) + + +////////////////////////////////////////////////////////////////////////////// +// EQD - double-precision floating-point compare equal (ordered) +////////////////////////////////////////////////////////////////////////////// +OPERATION(EQD) + +TRIGGER + IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) == DBL(2)) ? 1 : 0; +END_TRIGGER; + +END_OPERATION(EQD) + + +////////////////////////////////////////////////////////////////////////////// +// EQUD - double-precision floating-point compare equal (unordered) +////////////////////////////////////////////////////////////////////////////// +OPERATION(EQUD) + +TRIGGER + IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) == DBL(2)) ? 1 : 0; +END_TRIGGER; + +END_OPERATION(EQUD) + + +////////////////////////////////////////////////////////////////////////////// +// NED - double-precision floating-point compare not equal (ordered) +////////////////////////////////////////////////////////////////////////////// +OPERATION(NED) + +TRIGGER + IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) != DBL(2)) ? 1 : 0; +END_TRIGGER; + +END_OPERATION(NED) + + +////////////////////////////////////////////////////////////////////////////// +// NEUD - floating-point compare not equal (unordered) +////////////////////////////////////////////////////////////////////////////// +OPERATION(NEUD) + +TRIGGER + IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) != DBL(2)) ? 1 : 0; +END_TRIGGER; + +END_OPERATION(NEUD) + +////////////////////////////////////////////////////////////////////////////// +// GTD - double-precision floating-point compare greater (ordered) +////////////////////////////////////////////////////////////////////////////// +OPERATION(GTD) + +TRIGGER + IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) > DBL(2)) ? 1 : 0; +END_TRIGGER; + +END_OPERATION(GTD) + +////////////////////////////////////////////////////////////////////////////// +// GTUD - double-precision floating-point compare greater (unordered) +////////////////////////////////////////////////////////////////////////////// +OPERATION(GTUD) + +TRIGGER + IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) > DBL(2)) ? 1 : 0; +END_TRIGGER; + +END_OPERATION(GTUD) + +////////////////////////////////////////////////////////////////////////////// +// GED - double-precision floating-point compare greater or equal +////////////////////////////////////////////////////////////////////////////// +OPERATION(GED) + +TRIGGER + IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) >= DBL(2)) ? 1 : 0; +END_TRIGGER; + +END_OPERATION(GED) + +////////////////////////////////////////////////////////////////////////////// +// GEUD - double-precision floating-point compare greater or equal (unordered) +////////////////////////////////////////////////////////////////////////////// +OPERATION(GEUD) + +TRIGGER + IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) >= DBL(2)) ? 1 : 0; +END_TRIGGER; + +END_OPERATION(GEUD) + +////////////////////////////////////////////////////////////////////////////// +// LTD - double-precision floating-point compare lower (ordered) +////////////////////////////////////////////////////////////////////////////// +OPERATION(LTD) + +TRIGGER + IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) < DBL(2)) ? 1 : 0; +END_TRIGGER; + +END_OPERATION(LTD) + + +////////////////////////////////////////////////////////////////////////////// +// LTUD - double-precision floating-point compare lower (unordered) +////////////////////////////////////////////////////////////////////////////// +OPERATION(LTUD) + +TRIGGER + IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) < DBL(2)) ? 1 : 0; +END_TRIGGER; + +END_OPERATION(LTUD) + +////////////////////////////////////////////////////////////////////////////// +// LED - double-precision floating-point compare lower or equal +////////////////////////////////////////////////////////////////////////////// +OPERATION(LED) + +TRIGGER + IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2)) && DBL(1) <= DBL(2)) ? 1 : 0; +END_TRIGGER; + +END_OPERATION(LED) + +////////////////////////////////////////////////////////////////////////////// +// LEUD - double-precision floating-point compare lower or equal (unordered) +////////////////////////////////////////////////////////////////////////////// +OPERATION(LEUD) + +TRIGGER + IO(3) = (isnan(DBL(1)) || isnan(DBL(2)) || DBL(1) <= DBL(2)) ? 1 : 0; +END_TRIGGER; + +END_OPERATION(LEUD) + + +////////////////////////////////////////////////////////////////////////////// +// ORDD - double-precision floating-point order check +////////////////////////////////////////////////////////////////////////////// +OPERATION(ORDD) + +TRIGGER + IO(3) = (!isnan(DBL(1)) && !isnan(DBL(2))) ? 1 : 0; +END_TRIGGER; + +END_OPERATION(ORDD) + + +////////////////////////////////////////////////////////////////////////////// +// UORDD - floating-point unorder check +////////////////////////////////////////////////////////////////////////////// +OPERATION(UORDD) + +TRIGGER + IO(3) = (isnan(DBL(1)) || isnan(DBL(2))) ? 1 : 0; +END_TRIGGER; + +END_OPERATION(UORDD) + ////////////////////////////////////////////////////////////////////////////// // CFI - convert floating-point to integer ////////////////////////////////////////////////////////////////////////////// @@ -956,6 +1059,58 @@ END_TRIGGER; END_OPERATION(CIFU) +////////////////////////////////////////////////////////////////////////////// +// CDI - convert double-precision floating-point to integer +////////////////////////////////////////////////////////////////////////////// +OPERATION(CDI) + +TRIGGER + DoubleWord in = DBL(1); + SIntWord out = static_cast(in); + IO(2) = out; +END_TRIGGER; + +END_OPERATION(CDI) + +////////////////////////////////////////////////////////////////////////////// +// CDIU - convert double-precision floating-point to unsigned integer +////////////////////////////////////////////////////////////////////////////// +OPERATION(CDIU) + +TRIGGER + DoubleWord in = DBL(1); + UIntWord out = static_cast(in); + IO(2) = out; +END_TRIGGER; + +END_OPERATION(CDIU) + +////////////////////////////////////////////////////////////////////////////// +// CID - convert integer to floating-point (i2d) +////////////////////////////////////////////////////////////////////////////// +// NOTE: can't be tested whether the output really is double... +OPERATION(CID) + +TRIGGER + SIntWord in = INT(1); + IO(2) = static_cast(in); +END_TRIGGER; + +END_OPERATION(CID) + +////////////////////////////////////////////////////////////////////////////// +// CIDU - convert unsigned integer to floating-point (i2d) +////////////////////////////////////////////////////////////////////////////// +// NOTE: can't be tested whether the output really is double... +OPERATION(CIDU) + +TRIGGER + UIntWord in = UINT(1); + IO(2) = static_cast(in); +END_TRIGGER; + +END_OPERATION(CIDU) + ////////////////////////////////////////////////////////////////////////////// // CFD - convert single precision floating-point to double ////////////////////////////////////////////////////////////////////////////// @@ -1060,6 +1215,19 @@ END_TRIGGER; END_OPERATION(CIHU) +////////////////////////////////////////////////////////////////////////////// +// INVSQRTD - double-precision floating-point inverse square root +////////////////////////////////////////////////////////////////////////////// +OPERATION(INVSQRTD) + +TRIGGER + DoubleWord in = DBL(1); + DoubleWord result(1.0 / sqrt(in)); + IO(2) = result; +END_TRIGGER; + +END_OPERATION(INVSQRTD) + ////////////////////////////////////////////////////////////////////////////// // INVSQRTH - half-float inverse square root ////////////////////////////////////////////////////////////////////////////// @@ -1133,27 +1301,17 @@ END_TRIGGER; END_OPERATION(ABS) ////////////////////////////////////////////////////////////////////////////// -// ABSF - floating-point absolute value +// ABSF - single-precision floating-point absolute value ////////////////////////////////////////////////////////////////////////////// OPERATION(ABSF) TRIGGER - if (BWIDTH(1) == FLT_WORD_SIZE) { - FloatWord in1 = FLT(1); - if (in1 < 0) { - in1 = -in1; - } - IO(2) = in1; - } else if (BWIDTH(1) == DBL_WORD_SIZE) { - DoubleWord in = DBL(1); - if (in < 0) { - in = -in; - } - IO(2) = in; - } else { - abortWithError("bit width of operand erronous"); + FloatWord in1 = FLT(1); + if (in1 < 0) { + in1 = -in1; } + IO(2) = in1; END_TRIGGER; END_OPERATION(ABSF) @@ -1208,6 +1366,72 @@ END_TRIGGER; END_OPERATION(MINF) +////////////////////////////////////////////////////////////////////////////// +// ABSD - double-precision floating-point absolute value +////////////////////////////////////////////////////////////////////////////// + +OPERATION(ABSD) + +TRIGGER + DoubleWord in = DBL(1); + if (in < 0) { + in = -in; + } + IO(2) = in; +END_TRIGGER; + +END_OPERATION(ABSD) + +////////////////////////////////////////////////////////////////////////////// +// SQRTD - floating-point square root +////////////////////////////////////////////////////////////////////////////// + +OPERATION(SQRTD) + +TRIGGER + IO(2) = sqrt(DBL(1)); +END_TRIGGER; + +END_OPERATION(SQRTD) + +////////////////////////////////////////////////////////////////////////////// +// MAXD - higher of two floating-point values +////////////////////////////////////////////////////////////////////////////// + +OPERATION(MAXD) + +TRIGGER + DoubleWord in1 = DBL(1); + DoubleWord in2 = DBL(2); + if( in2 > in1 ) { + IO(3) = in2; + } + else { + IO(3) = in1; + } +END_TRIGGER; + +END_OPERATION(MAXD) + +////////////////////////////////////////////////////////////////////////////// +// MIND - lower of two floating-point values +////////////////////////////////////////////////////////////////////////////// + +OPERATION(MIND) + +TRIGGER + DoubleWord in1 = DBL(1); + DoubleWord in2 = DBL(2); + if( in2 < in1 ) { + IO(3) = in2; + } + else { + IO(3) = in1; + } +END_TRIGGER; + +END_OPERATION(MIND) + ////////////////////////////////////////////////////////////////////////////// // LDQU - load 1 mimimum addressable unit from memory (unsigned) ////////////////////////////////////////////////////////////////////////////// diff --git a/tce/opset/base/base.opp b/tce/opset/base/base.opp index dd44826ef8..aae7eb9a92 100644 --- a/tce/opset/base/base.opp +++ b/tce/opset/base/base.opp @@ -672,6 +672,23 @@ Output returns '1' if input 1 is greater than input 2 and otherwise returns '0'. + + NEGD + + 1 + 1 + + + + EXEC_OPERATION(negd, IO(1), IO(2)); + + + SimValue tmp; + EXEC_OPERATION(subd, IO(1), IO(1), tmp); + EXEC_OPERATION(subd, tmp, IO(1), IO(2)); + + + NEGF @@ -712,6 +729,26 @@ Output returns '1' if input 1 is greater than input 2 and otherwise returns '0'. + + MAXD + + 2 + 1 + + + + + + + MIND + + 2 + 1 + + + + + MAXF @@ -813,6 +850,300 @@ Output returns '1' if input 1 is greater than input 2 and otherwise returns '0'. + + ADDD + + 2 + 1 + + + + + + + + + + + + + EXEC_OPERATION(addd, IO(1), IO(2), IO(3)); + + + + + SUBD + + 2 + 1 + + + + + EXEC_OPERATION(subd, IO(1), IO(2), IO(3)); + + + + + MULD + + 2 + 1 + + + + + + + + + + + + + EXEC_OPERATION(muld, IO(1), IO(2), IO(3)); + + + + + DIVD + + 2 + 1 + + + + + EXEC_OPERATION(divd, IO(1), IO(2), IO(3)); + + + + + EQD + + 2 + 1 + + + + + + + + + + + + + EXEC_OPERATION(eqd, IO(1), IO(2), IO(3)); + + + + + NED + + 2 + 1 + + + + + + + + + + + + + EXEC_OPERATION(ned, IO(1), IO(2), IO(3)); + + + SimValue t1,t2; + EXEC_OPERATION(gtd, IO(1), IO(2), t1); + EXEC_OPERATION(gtd, IO(2), IO(1), t2); + EXEC_OPERATION(ior, t1, t2, IO(3)); + + + + + GTD + + 2 + 1 + + + + + EXEC_OPERATION(gtd, IO(1), IO(2), IO(3)); + + + + + GED + + 2 + 1 + + + + + EXEC_OPERATION(ged, IO(1), IO(2), IO(3)); + + + SimValue t1,t2; + EXEC_OPERATION(gtd, IO(1), IO(2), t1); + EXEC_OPERATION(eqd, IO(1), IO(2), t2); + EXEC_OPERATION(ior, t1, t2, IO(3)); + + + + + LTD + + 2 + 1 + + + + + EXEC_OPERATION(ltd, IO(1), IO(2), IO(3)); + + + EXEC_OPERATION(ged, IO(2), IO(1), IO(3)); + + + + + LED + + 2 + 1 + + + + + EXEC_OPERATION(led, IO(1), IO(2), IO(3)); + + + SimValue t1,t2; + EXEC_OPERATION(eqd, IO(1), IO(2), t1); + EXEC_OPERATION(gtd, IO(2), IO(1), t2); + EXEC_OPERATION(ior, t1, t2, IO(3)); + + + + + EQUD + + 2 + 1 + + + + + + + + + + + + + EXEC_OPERATION(eqd, IO(1), IO(2), IO(3)); + + + + + NEUD + + 2 + 1 + + + + + + + + + + + + + SimValue temp1; + EXEC_OPERATION(eqd, IO(1), IO(2), temp1); + EXEC_OPERATION(xor, temp1, 1, IO(3)); + + + + + GTUD + + 2 + 1 + + + + + EXEC_OPERATION(gtd, IO(1), IO(2), IO(3)); + + + + + GEUD + + 2 + 1 + + + + + SimValue temp1, temp2; + EXEC_OPERATION(gtd, IO(1), IO(2), temp1); + EXEC_OPERATION(eqd, IO(1), IO(2), temp2); + EXEC_OPERATION(ior, temp1, temp2, IO(3)); + + + + + LTUD + + 2 + 1 + + + + + SimValue temp1, temp2, temp3; + EXEC_OPERATION(gtd, IO(1), IO(2), temp1); + EXEC_OPERATION(eqd, IO(1), IO(2), temp2); + EXEC_OPERATION(ior, temp1, temp2, temp3); + EXEC_OPERATION(xor, temp3, 1, IO(3)); + + + + + LEUD + + 2 + 1 + + + + + SimValue temp1; + EXEC_OPERATION(gtd, IO(1), IO(2), temp1); + EXEC_OPERATION(xor,temp1,1, IO(3)); + + + SimValue t1,t2; + EXEC_OPERATION(eqd, IO(1), IO(2), t1); + EXEC_OPERATION(gtd, IO(2), IO(1), t2); + EXEC_OPERATION(ior, t1, t2, IO(3)); + + + ADDF @@ -973,7 +1304,7 @@ Output returns '1' if input 1 is greater than input 2 and otherwise returns '0'. EXEC_OPERATION(ltf, IO(1), IO(2), IO(3)); - EXEC_OPERATION(gtf, IO(2), IO(1), IO(3)); + EXEC_OPERATION(gef, IO(2), IO(1), IO(3)); @@ -1330,6 +1661,54 @@ EXEC_OPERATION(xor,temp1,1, IO(3)); + + MACD + + 3 + 1 + + + + + + + + + + + + + + SimValue t1; + EXEC_OPERATION(muld, IO(2), IO(3), t1); + EXEC_OPERATION(addd, IO(1), t1, IO(4)); + + + + + MSUD + + 3 + 1 + + + + + + + + + + + + + + SimValue t1; + EXEC_OPERATION(muld, IO(2), IO(3), t1); + EXEC_OPERATION(subd, IO(1), t1, IO(4)); + + + MAC @@ -1726,6 +2105,18 @@ EXEC_OPERATION(xor,temp1,1, IO(3)); + + ABSD + + 1 + 1 + + + + EXEC_OPERATION(absd, IO(1), IO(2)); + + + ABSF @@ -2011,6 +2402,51 @@ The simulation behavior definition assumes 100MHz clock frequency, which can be + + ORDD + Returns 1 if floating point input operands are ordered. i.e. neither of the input operands is NaN. + + + 2 + 1 + + + + + + + + + + + + + SimValue temp1, temp2; + EXEC_OPERATION(eqd, IO(1), IO(1), temp1); + EXEC_OPERATION(eqd, IO(2), IO(2), temp2); + EXEC_OPERATION(and, temp1, temp2, IO(3)); + + + + + UORDD + Returns 1 if the two floating point operands are unordered. i.e. isNaN(io1) || isNan(io2). + + + 2 + 1 + + + + + SimValue temp1, temp2, temp3; + EXEC_OPERATION(eqd, IO(1), IO(1), temp1); + EXEC_OPERATION(eqd, IO(2), IO(2), temp2); + EXEC_OPERATION(and, temp1, temp2, temp3); + EXEC_OPERATION(xor, temp3, 1, IO(3)); + + + ORDF Returns 1 if floating point input operands are ordered. i.e. neither of the input operands is NaN. @@ -2060,6 +2496,15 @@ EXEC_OPERATION(xor, temp3, 1, IO(3)); + + SQRTD + Performs a square root of a double precision floating point. + 1 + 1 + + + + SQRTF Performs a square root of a single precision floating point. For example, the MIPS R4000 FPU implements it. @@ -2086,6 +2531,15 @@ EXEC_OPERATION(xor, temp3, 1, IO(3)); + + INVSQRTD + Performs a inverse square root of a double-precision floating point. + 1 + 1 + + + + INVSQRTH Performs a inverse square root of a half-precision floating point. @@ -2096,6 +2550,18 @@ EXEC_OPERATION(xor, temp3, 1, IO(3)); + + SQUARED + Computes the square of a double-precision floating point. + 1 + 1 + + + + EXEC_OPERATION(muld, IO(1), IO(1), IO(2)); + + + SQUAREH Computes the square of a half-precision floating point. diff --git a/tce/src/applibs/LLVMBackend/TCETargetMachine.cc b/tce/src/applibs/LLVMBackend/TCETargetMachine.cc index 358c36237e..465281fbb6 100644 --- a/tce/src/applibs/LLVMBackend/TCETargetMachine.cc +++ b/tce/src/applibs/LLVMBackend/TCETargetMachine.cc @@ -192,7 +192,7 @@ TCETargetMachine::setTargetMachinePlugin(TCETargetMachinePlugin& plugin) { dataLayoutStr += "-i64:32:32"; dataLayoutStr += "-f16:16:16"; dataLayoutStr += "-f32:32:32"; - dataLayoutStr += "-f64:32:64"; + dataLayoutStr += "-f64:64:64"; dataLayoutStr += "-v64:32:64"; dataLayoutStr += "-v128:32:128"; dataLayoutStr += "-v256:32:256"; diff --git a/tce/src/applibs/LLVMBackend/TDGen.cc b/tce/src/applibs/LLVMBackend/TDGen.cc index e6cb9ba56a..3fed0f0ce8 100644 --- a/tce/src/applibs/LLVMBackend/TDGen.cc +++ b/tce/src/applibs/LLVMBackend/TDGen.cc @@ -231,6 +231,7 @@ TDGen::writeRegisterInfo(std::ostream& o) write1bitRegisterInfo(o); //write16bitRegisterInfo(o); write32bitRegisterInfo(o); + write64bitRegisterInfo(o); writeVectorRegisterInfo(o); return true; @@ -248,6 +249,13 @@ TDGen::writeRegisterClasses(std::ostream& o) { << "}" << std::endl; } + o << "class R16 aliases> : TCEReg {" + << "}" << std::endl; + if (hasExIntRegs_) { + o << "class R16_Ex aliases> : R16 {}" + << std::endl; + } + o << "class R32 aliases> : TCEReg {" << "}" << std::endl; if (hasExIntRegs_) { @@ -255,13 +263,13 @@ TDGen::writeRegisterClasses(std::ostream& o) { << std::endl; } - o << "class R16 aliases> : TCEReg {" + o << "class R64 aliases> : TCEReg {" << "}" << std::endl; if (hasExIntRegs_) { - o << "class R16_Ex aliases> : R16 {}" + o << "class R64_Ex aliases> : R64 {}" << std::endl; } - + for (int i = 0; i <= highestLaneInt_; i++) { o << "class R32_L_" << i << " aliases> : R32{}" << std::endl; } @@ -604,7 +612,6 @@ TDGen::write32bitRegisterInfo(std::ostream& o) { o << ")>;" << std::endl; } } - o << std::endl; } @@ -688,22 +695,21 @@ TDGen::write64bitRegisterInfo(std::ostream& o) { if (regs64bit_.size() < 1) { RegInfo reg = { "dummy64", 0 }; - writeRegisterDef(o, reg, "DIRES0", "Ri64", "", RESERVED); + writeRegisterDef(o, reg, "DIRES0", "R64", "", RESERVED); i64regs = "DIRES0"; } else { - - writeRegisterDef(o, regs64bit_[0], "DIRES0", "Ri64", "", RESERVED); + writeRegisterDef(o, regs64bit_[0], "DIRES0", "R64", "", RESERVED); for (unsigned i = 1; i < regs64bit_.size(); i++) { std::string regName = "DI" + Conversion::toString(i); i64regs += regName; i64regs += ", "; - writeRegisterDef(o, regs64bit_[i], regName, "Ri64", "", GPR); + writeRegisterDef(o, regs64bit_[i], regName, "R64", "", GPR); } i64regs += "DIRES0"; } o << std::endl - << "def I64Regs : RegisterClass<\"TCE\", [i64], 32, (add " // DIRES + << "def R64Regs : RegisterClass<\"TCE\", [i64,f64], 32, (add " // DIRES << i64regs << ")> ;" << std::endl; @@ -711,23 +717,21 @@ TDGen::write64bitRegisterInfo(std::ostream& o) { if (regs64bit_.size() < 1) { RegInfo reg = { "dummy64", 0 }; - writeRegisterDef(o, reg, "DRES0", "Rf64", "", RESERVED); + writeRegisterDef(o, reg, "DRES0", "R64", "", RESERVED); f64regs = "DRES0"; } else { - writeRegisterDef( - o, regs64bit_[0], "DRES0", "Ri64", "DIRES0", RESERVED); + writeRegisterDef(o, regs64bit_[0], "DRES0", "R64", "DIRES0", RESERVED); for (unsigned i = 1; i < regs64bit_.size(); i++) { std::string regName = "D" + Conversion::toString(i); std::string aliasName = "DI" + Conversion::toString(i); f64regs += regName; f64regs += ", "; - writeRegisterDef( - o, regs64bit_[i], regName, "Rf64", aliasName, GPR); + writeRegisterDef(o, regs64bit_[i], regName, "R64", aliasName, GPR); } f64regs += "DRES0"; } o << std::endl - << "def F64Regs : RegisterClass<\"TCE\", [f64], 32, (add " + << "def R64FPRegs : RegisterClass<\"TCE\", [f64], 32, (add " << f64regs << ")>;" << std::endl; } @@ -887,6 +891,7 @@ TDGen::writeVectorRegisterInfo( << "HFPRegs : RegisterClass<\"TCE\", [v" << vectorWidth << "f16], " << 32 * vectorWidth << ", (add V" << vectorWidth << "R32DUMMY)> ;" << std::endl << std::endl;*/ + //RWH: need something here for f64? } else { o << "def " << regClassBase << "_L_" << i << "Regs : RegisterClass<\"TCE\", [v" << vectorWidth @@ -1045,6 +1050,11 @@ TDGen::writeInstrInfo(std::ostream& os) { } } + opNames_["LDDdr"] = "LDD"; + opNames_["LDDdi"] = "LDD"; + opNames_["STDdr"] = "STD"; + opNames_["STDdi"] = "STD"; + opNames_["LDWfr"] = "LDW"; opNames_["LDWfi"] = "LDW"; opNames_["STWfr"] = "STW"; @@ -1379,6 +1389,7 @@ TDGen::writeBackendCode(std::ostream& o) { if (opName == "sxhw") hasSXHW = true; if (opName == "sxqw") hasSXQW = true; if (opName == "sqrtf") hasSQRTF = true; + //RWH: add more cases here for double-precision ops? } } @@ -1433,15 +1444,15 @@ TDGen::writeTopLevelTD(std::ostream& o) { * a * b = Boolean/predicate register * c - * d - * e + * d = Float64 register + * e = immediate float64 * f = Float32 register * g * h = Float16 register * i = Immediate integer * j = immediate boolean - * k = immediate float? - * l = immediate float16? + * k = immediate float32 + * l = immediate float16 * m = float2 vec? * n = float4 vec? * o = float8 vec? @@ -1501,6 +1512,10 @@ TDGen::writeOperationDefs( return; } + if (op.name() == "CDI" || op.name() == "CDIU") { + writeOperationDef(o, op, "rd", attrs, skipPattern); + return; + } // rotations are allways n x n -> n bits. if (op.name() == "ROTL" || op.name() == "ROTR" || @@ -1537,9 +1552,18 @@ TDGen::writeOperationDefs( writeOperationDef(o, op, "bjjb", attrs, skipPattern); writeOperationDef(o, op, "bjbb", attrs, skipPattern); writeOperationDef(o, op, "bbjb", attrs, skipPattern); - // TODO: what about floating-point values? + writeOperationDef(o, op, "dddb", attrs, skipPattern); + writeOperationDef(o, op, "deeb", attrs, skipPattern); + writeOperationDef(o, op, "dedb", attrs, skipPattern); + writeOperationDef(o, op, "ddeb", attrs, skipPattern); writeOperationDef(o, op, "fffb", attrs, skipPattern); + writeOperationDef(o, op, "fkkb", attrs, skipPattern); + writeOperationDef(o, op, "fkfb", attrs, skipPattern); + writeOperationDef(o, op, "ffkb", attrs, skipPattern); writeOperationDef(o, op, "hhhb", attrs, skipPattern); + writeOperationDef(o, op, "hllb", attrs, skipPattern); + writeOperationDef(o, op, "hlhb", attrs, skipPattern); + writeOperationDef(o, op, "hhlb", attrs, skipPattern); hasSelect_ = true; return; @@ -1549,7 +1573,6 @@ TDGen::writeOperationDefs( if (op.numberOfInputs() == 2 && op.numberOfOutputs() == 0) { Operand& operand1 = op.operand(1); Operand& operand2 = op.operand(2); - // TODO: add an else branch here for float immediates if ((operand1.type() == Operand::UINT_WORD || operand1.type() == Operand::SINT_WORD || operand1.type() == Operand::RAW_DATA) && @@ -1558,6 +1581,15 @@ TDGen::writeOperationDefs( operand2.type() == Operand::RAW_DATA)) { writeOperationDef(o, op, "ii", attrs, skipPattern); + } else if (operand1.type() == Operand::DOUBLE_WORD && + operand2.type() == Operand::DOUBLE_WORD) { + writeOperationDef(o, op, "ee", attrs, skipPattern); + } else if (operand1.type() == Operand::FLOAT_WORD && + operand2.type() == Operand::FLOAT_WORD) { + writeOperationDef(o, op, "kk", attrs, skipPattern); + } else if (operand1.type() == Operand::HALF_FLOAT_WORD && + operand2.type() == Operand::HALF_FLOAT_WORD) { + writeOperationDef(o, op, "ll", attrs, skipPattern); } } @@ -1565,9 +1597,6 @@ TDGen::writeOperationDefs( // this the ordinary def // then try with immediates. - // TODO: this should be 2^n loop instead of n loop, to get - // all permutations. - writeOperationDefs(o, op, operandTypes, attrs, skipPattern); // then with boolean outs, and vector versions. @@ -1584,8 +1613,11 @@ TDGen::writeOperationDefs( // create vector versions. // TODO: no half vectors yet, pending f16v4..f16v16 types in llvm. + // Also double vectors not implemented yet. if (outOperand.type() != Operand::HALF_FLOAT_WORD && - op.operand(1).type() != Operand::HALF_FLOAT_WORD) { + outOperand.type() != Operand::DOUBLE_WORD && + op.operand(1).type() != Operand::HALF_FLOAT_WORD && + op.operand(1).type() != Operand::DOUBLE_WORD) { for (int i = 0, w = 2; i < 3; i++, w<<=1) { char floatChar = 'm' + i; char intChar = 'v' + i; @@ -1620,6 +1652,8 @@ TDGen::writeOperationDefs( // first without imms. writeOperationDef(o, op, operandTypes, attrs, skipPattern, backendPrefix); + // TODO: this should be 2^n loop instead of n loop, to get + // all permutations. for (int i = 0; i < op.numberOfInputs(); i++) { bool canSwap = false; for (int j = i+1 ; j < op.numberOfInputs(); j++) { @@ -1642,6 +1676,9 @@ TDGen::writeOperationDefs( case 'b': c = 'j'; break; + case 'd': + c = 'e'; + break; case 'f': c = 'k'; break; @@ -1907,7 +1944,7 @@ TDGen::writeOperationDef( /** * Checks whether operand is integer or float type. * - * @return 'r' for integer, 'f' for float + * @return letter from operand type table, e.g. 'r' for integer, 'f' for float */ char TDGen::operandChar(Operand& operand) { @@ -1915,9 +1952,11 @@ TDGen::operandChar(Operand& operand) { return 'b'; } else if (operand.type() == Operand::HALF_FLOAT_WORD) { return 'h'; + } else if (operand.type() == Operand::DOUBLE_WORD) { + return 'd'; } else if (operand.type() != Operand::UINT_WORD && - operand.type() != Operand::SINT_WORD && - operand.type() != Operand::RAW_DATA) { + operand.type() != Operand::SINT_WORD && + operand.type() != Operand::RAW_DATA) { return 'f'; } else { return 'r'; @@ -1988,7 +2027,7 @@ TDGen::writeEmulationPattern( char inputType = operandChar(op.operand(i+1)); if (immInput == i+1) { // float imm operands not allowed - if (inputType == 'f' || inputType == 'h') { + if (inputType == 'f' || inputType == 'h' || inputType == 'd') { ok = false; break; } else { @@ -2011,12 +2050,19 @@ TDGen::writeEmulationPattern( op.name() == "LEF" || op.name() == "LEUF" || op.name() == "GTF" || op.name() == "GTUF" || op.name() == "NEF" || op.name() == "NEUF" || + op.name() == "LTD" || op.name() == "LTUD" || + op.name() == "EQD" || op.name() == "EQUD" || + op.name() == "GED" || op.name() == "GEUD" || + op.name() == "LED" || op.name() == "LEUD" || + op.name() == "GTD" || op.name() == "GTUD" || + op.name() == "NED" || op.name() == "NEUD" || op.name() == "EQ" || op.name() == "NE" || op.name() == "GE" ||op.name() == "GEU" || op.name() == "GT" || op.name() == "GTU" || op.name() == "LE" || op.name() == "LEU" || op.name() == "LT" || op.name() == "LTU" || - op.name() == "ORDF" || op.name() == "UORDF") { + op.name() == "ORDF" || op.name() == "UORDF" || + op.name() == "ORDD" || op.name() == "UORDD") { std::string boolOperandTypes = operandTypes; boolOperandTypes[0] = 'b'; o << "def : Pat<(" << match1.str() << "), " @@ -2095,6 +2141,40 @@ TDGen::llvmOperationPattern(const Operation& op, char operandType) { if (opName == "negf") return "fneg %1%"; if (opName == "sqrtf") return "fsqrt %1%"; + if (opName == "eqd") return "setoeq %1%, %2%"; + if (opName == "ned") return "setone %1%, %2%"; + if (opName == "ltd") return "setolt %1%, %2%"; + if (opName == "led") return "setole %1%, %2%"; + if (opName == "gtd") return "setogt %1%, %2%"; + if (opName == "ged") return "setoge %1%, %2%"; + + if (opName == "equd") return "setueq %1%, %2%"; + if (opName == "neud") return "setune %1%, %2%"; + if (opName == "ltud") return "setult %1%, %2%"; + if (opName == "leud") return "setule %1%, %2%"; + if (opName == "gtud") return "setugt %1%, %2%"; + if (opName == "geud") return "setuge %1%, %2%"; + + if (opName == "ordd") return "seto %1%, %2%"; + if (opName == "uordd") return "setuo %1%, %2%"; + + if (opName == "addd") return "fadd %1%, %2%"; + if (opName == "subd") return "fsub %1%, %2%"; + if (opName == "muld") return "fmul %1%, %2%"; + if (opName == "divd") return "fdiv %1%, %2%"; + if (opName == "absd") return "fabs %1%"; + if (opName == "negd") return "fneg %1%"; + if (opName == "sqrtd") return "fsqrt %1%"; + + //RWH: check + if (opName == "cid") return "sint_to_fp %1%"; + if (opName == "cdi") return "fp_to_sint %1%"; + if (opName == "cidu") return "uint_to_fp %1%"; + if (opName == "cdiu") return "fp_to_uint %1%"; + + if (opName == "cdf") return "fround %1%";//fptrunc %1% to float + if (opName == "cfd") return "fextend %1%";//fpext %1% to double + if (opName == "cif") return "sint_to_fp %1%"; if (opName == "cfi") return "fp_to_sint %1%"; if (opName == "cifu") return "uint_to_fp %1%"; @@ -2142,12 +2222,12 @@ TDGen::llvmOperationPattern(const Operation& op, char operandType) { if (opName == "ldh") return "sextloadi16 %1%"; if (opName == "ldhu") return "zextloadi16 %1%"; if (opName == "ldw") return "load %1%"; - //if (opName == "ldd") return "load"; + if (opName == "ldd") return "load %1%";//rwh: check if (opName == "stq") return "truncstorei8 %2%, %1%"; if (opName == "sth") return "truncstorei16 %2%, %1%"; if (opName == "stw") return "store %2%, %1%"; - //if (opName == "std") return "load"; + if (opName == "std") return "store %2%, %1%";//rwh: check if (opName == "sxhw") { switch (operandType) { @@ -2251,6 +2331,40 @@ TDGen::llvmOperationName(const Operation& op) { if (opName == "negf") return "fneg"; if (opName == "sqrtf") return "fsqrt"; + if (opName == "eqd") return "setoeq"; + if (opName == "ned") return "setone"; + if (opName == "ltd") return "setolt"; + if (opName == "led") return "setole"; + if (opName == "gtd") return "setogt"; + if (opName == "ged") return "setoge"; + + if (opName == "equd") return "setueq"; + if (opName == "neud") return "setune"; + if (opName == "ltud") return "setult"; + if (opName == "leud") return "setule"; + if (opName == "gtud") return "setugt"; + if (opName == "geud") return "setuge"; + + if (opName == "ordd") return "seto"; + if (opName == "uordd") return "setuo"; + + if (opName == "addd") return "fadd"; + if (opName == "subd") return "fsub"; + if (opName == "muld") return "fmul"; + if (opName == "divd") return "fdiv"; + if (opName == "absd") return "fabs"; + if (opName == "negd") return "fneg"; + if (opName == "sqrtd") return "fsqrt"; + + //RWH: check + if (opName == "cid") return "sint_to_fp"; + if (opName == "cdi") return "fp_to_sint"; + if (opName == "cidu") return "uint_to_fp"; + if (opName == "cdiu") return "fp_to_uint"; + + if (opName == "cdf") return "fround";//fptrunc to float + if (opName == "cfd") return "fextend";//fpext to double + if (opName == "cif") return "sint_to_fp"; if (opName == "cfi") return "fp_to_sint"; if (opName == "cifu") return "uint_to_fp"; @@ -2259,17 +2373,51 @@ TDGen::llvmOperationName(const Operation& op) { if (opName == "cfh") return "fround"; if (opName == "chf") return "fextend"; + if (opName == "cih") return "sint_to_fp"; + if (opName == "chi") return "fp_to_sint"; + if (opName == "cihu") return "uint_to_fp"; + if (opName == "chiu") return "fp_to_uint"; + + if (opName == "neuh") return "setune"; + if (opName == "eqh") return "setoeq"; + if (opName == "neh") return "setone"; + if (opName == "lth") return "setolt"; + if (opName == "leh") return "setole"; + if (opName == "gth") return "setogt"; + if (opName == "geh") return "setoge"; + + if (opName == "ordh") return "seto"; + if (opName == "uordh") return "setuo"; + + if (opName == "addh") return "fadd"; + if (opName == "subh") return "fsub"; + if (opName == "mulh") return "fmul"; + if (opName == "divh") return "fdiv"; + if (opName == "absh") return "fabs"; + if (opName == "negh") return "fneg"; + if (opName == "sqrth") return "fsqrt"; + + if (opName == "cih") return "sint_to_fp"; + if (opName == "chi") return "fp_to_sint"; + if (opName == "cihu") return "uint_to_fp"; + if (opName == "chiu") return "fp_to_uint"; + + if (opName == "csh") return "sint_to_fp"; + if (opName == "cshu") return "uint_to_fp"; + if (opName == "chs") return "fp_to_sint"; + if (opName == "chsu") return "fp_to_uint"; + if (opName == "ldq") return "sextloadi8"; if (opName == "ldqu") return "zextloadi8"; if (opName == "ldh") return "sextloadi16"; if (opName == "ldhu") return "zextloadi16"; if (opName == "ldw") return "load"; - //if (opName == "ldd") return "load"; + if (opName == "ldd") return "load";//RWH: check if (opName == "stq") return "truncstorei8"; if (opName == "sth") return "truncstorei16"; if (opName == "stw") return "store"; - //if (opName == "std") return "load"; + if (opName == "std") return "store"; if (opName == "sxhw") return "sext_inreg"; if (opName == "sxqw") return "sext_inreg"; @@ -2314,6 +2462,8 @@ TDGen::operationCanBeMatched( if (llvmOperationPattern(op,'r') != "") { return true; } + //RWH: should probably lookup operand type instead of assuming 'r' in call + //to llvmOperationPattern std::set useSet; if (recursionCycleCheck != NULL) { @@ -2591,11 +2741,15 @@ TDGen::constantNodeString( case 'b': return "(i1 " + Conversion::toString(node.value()) + ")"; case 'h': + case 'l': return "(f16 " + Conversion::toString(node.value()) + ")"; // TODO: f16 vectors not yet implemented case 'f': case 'k': return "(f32 " + Conversion::toString(node.value()) + ")"; + case 'd': + case 'e': + return "(f64 " + Conversion::toString(node.value()) + ")"; case 'm': return ("(v2f32 (build_vector (f32 " + Conversion::toString(node.value()) + @@ -2752,12 +2906,17 @@ TDGen::emulatingOpNodeLLVMName( } else { if (dynamic_cast( &(dag.tailNode(edge)))) { - if (operand.type() == Operand::SINT_WORD || - operand.type() == Operand::UINT_WORD || - operand.type() == Operand::RAW_DATA) { - operationName += 'i'; - } else { + if (operand.type() == Operand::BOOL) { + operationName += 'j'; + } else if (operand.type() == Operand::HALF_FLOAT_WORD) { + operationName += 'l'; + } else if (operand.type() == Operand::FLOAT_WORD) { operationName += 'k'; + } else if (operand.type() == Operand::DOUBLE_WORD) { + operationName += 'e'; + } else { + // should be a 32b integer register (signed, unsigned, or raw) + operationName += 'i'; } } else { TerminalNode* t = @@ -2999,7 +3158,7 @@ TDGen::operandToString( } } else if (operand.type() == Operand::DOUBLE_WORD) { // TODO: immediate check?? - return "F64Regs:$op" + Conversion::toString(idx); + return "R64FPRegs:$op" + Conversion::toString(idx); } else { assert(false && "Unknown operand type."); } @@ -3107,9 +3266,7 @@ TDGen::canBeImmediate( void TDGen::generateLoadStoreCopyGenerator(std::ostream& os) { - // vector store/load generation code - - TCEString prefix = "&"; // address of -operator + TCEString prefix = "&"; // address-of operator TCEString rcpf = "RegsRegClass"; TCEString rapf = "TCE::RARegRegClass"; @@ -3140,6 +3297,10 @@ TDGen::generateLoadStoreCopyGenerator(std::ostream& os) { os << "\tif (rc == " << prefix << "TCE::" << ri->first << "HFP" << rcpf << ") return TCE::STHhr;" << std::endl; } + if (ri->first.find("R64") == 0) { + os << "\tif (rc == " << prefix << "TCE::" << ri->first + << "FP" << rcpf << ") return TCE::STDdr;" << std::endl; + } } if (opNames_.find("STW2vr") != opNames_.end()) { @@ -3234,6 +3395,10 @@ TDGen::generateLoadStoreCopyGenerator(std::ostream& os) { os << "\tif (rc == " << prefix << "TCE::" << ri->first << "HFP" << rcpf << ") return TCE::LDHhr;" << std::endl; } + if (ri->first.find("R64") == 0) { + os << "\tif (rc == " << prefix << "TCE::" << ri->first + << "FP" << rcpf << ") return TCE::LDDdr;" << std::endl; + } } if (opNames_.find("LDW2vr") != opNames_.end()) { @@ -3313,6 +3478,9 @@ TDGen::createMinMaxGenerator(std::ostream& os) { if (opNames_.find("MINFfff") != opNames_.end()) { os << "if (vt == MVT::f32) return TCE::MINFfff;" << std::endl; } + if (opNames_.find("MINDddd") != opNames_.end()) { + os << "if (vt == MVT::f64) return TCE::MINDddd;" << std::endl; + } os << "\treturn -1; " << std::endl << "}" << std::endl; // MAX @@ -3324,22 +3492,41 @@ TDGen::createMinMaxGenerator(std::ostream& os) { if (opNames_.find("MAXFfff") != opNames_.end()) { os << "if (vt == MVT::f32) return TCE::MAXFfff;" << std::endl; } + if (opNames_.find("MAXDddd") != opNames_.end()) { + os << "if (vt == MVT::f64) return TCE::MAXDddd;" << std::endl; + } os << "\treturn -1; " << std::endl << "}" << std::endl; - // MINU + // MINU: unsigned for integer, unordered for float os << "int GeneratedTCEPlugin::getMinuOpcode(SDNode* n) const {" << std::endl; if (opNames_.find("MINUrrr") != opNames_.end()) { os << "\tEVT vt = n->getOperand(1).getValueType();" << std::endl; os << "if (vt == MVT::i32) return TCE::MINUrrr;" << std::endl; } + if (opNames_.find("MINUFfff") != opNames_.end()) { + os << "\tEVT vt = n->getOperand(1).getValueType();" << std::endl; + os << "if (vt == MVT::f32) return TCE::MINUFfff;" << std::endl; + } + if (opNames_.find("MINUDddd") != opNames_.end()) { + os << "\tEVT vt = n->getOperand(1).getValueType();" << std::endl; + os << "if (vt == MVT::f64) return TCE::MINUDddd;" << std::endl; + } os << "\treturn -1; " << std::endl << "}" << std::endl; - // MAXU + // MAXU: unsigned for integer, unordered for float os << "int GeneratedTCEPlugin::getMaxuOpcode(SDNode* n) const {" << std::endl; if (opNames_.find("MAXUrrr") != opNames_.end()) { os << "\tEVT vt = n->getOperand(1).getValueType();" << std::endl; os << "if (vt == MVT::i32) return TCE::MAXUrrr;" << std::endl; } + if (opNames_.find("MAXUFfff") != opNames_.end()) { + os << "\tEVT vt = n->getOperand(1).getValueType();" << std::endl; + os << "if (vt == MVT::f32) return TCE::MAXUFfff;" << std::endl; + } + if (opNames_.find("MAXUDddd") != opNames_.end()) { + os << "\tEVT vt = n->getOperand(1).getValueType();" << std::endl; + os << "if (vt == MVT::f64) return TCE::MAXUDddd;" << std::endl; + } os << "\treturn -1; " << std::endl << "}" << std::endl; } @@ -3362,6 +3549,7 @@ void TDGen::createShortExtLoadPatterns(std::ostream& os) { << "def : Pat<(i32 (extloadi16 ADDRri:$src)), (LDHri ADDRri:$src)>;" << std::endl; } +//RWH: need load patterns for 64b? void TDGen::writeCallingConv(std::ostream& os) { @@ -3371,9 +3559,13 @@ TDGen::writeCallingConv(std::ostream& os) { os << "def RetCC_TCE : CallingConv<[" << std::endl << " CCIfType<[i1], CCPromoteToType>," << std::endl << " CCIfType<[i32], CCAssignToReg<[IRES0]>>," << std::endl + << " CCIfType<[i64], CCAssignToReg<[DIRES0]>>," << std::endl << " CCIfType<[f32], CCAssignToReg<[IRES0]>>," << std::endl + << " CCIfType<[f64], CCAssignToReg<[DRES0]>>," << std::endl << " CCAssignToStack<4, 4>" << std::endl << "]>;" << std::endl << std::endl; + //RWH: //TODO: need to figure out function return value second column for + // f64 os << "// Function argument value types." << std::endl; os << @@ -3382,14 +3574,14 @@ TDGen::writeCallingConv(std::ostream& os) { " CCIfType<[i32], CCAssignToReg<[IRES0]>>," << std::endl; os << - " // Integer values get stored in stack slots that are 4 bytes in " + " // Integer and float32 values get stored in stack slots that are 4 bytes in " << std::endl << " // size and 4-byte aligned." << std::endl << " CCIfType<[i32, f32], CCAssignToStack<4, 4>>," << std::endl << - " // Integer values get stored in stack slots that are 8 bytes in" + " // Long integer and float64 values get stored in stack slots that are 8 bytes in" << std::endl << " // size and 8-byte aligned." << std::endl << - " CCIfType<[f64], CCAssignToStack<8, 8>>" << std::endl << + " CCIfType<[i64, f64], CCAssignToStack<8, 8>>" << std::endl << "]>;" << std::endl; } @@ -3464,6 +3656,9 @@ void TDGen::createSelectPatterns(std::ostream& os) { << "(XORbbj R1Regs:$c, 1))>;" << std::endl << std::endl; + //RWH: not defining for fp64 due to the coctail of instructions + // necessary, esp. with immediates, which aren't supported by the + // current instruction patterns. os << "def : Pat<(f32 (select R1Regs:$c, R32FPRegs:$t,R32FPRegs:$f))," << "(IORfff (ANDfff R32FPRegs:$t, (SUBfir 0, (ANDext R1Regs:$c, 1)))," << "(ANDfff R32FPRegs:$f, (SUBfri (ANDext R1Regs:$c,1),1)))>;" @@ -3482,6 +3677,7 @@ void TDGen::createSelectPatterns(std::ostream& os) { opNames_["SELECT_I32ir"] = "CMOV_SELECT"; opNames_["SELECT_I32ri"] = "CMOV_SELECT"; opNames_["SELECT_I32ii"] = "CMOV_SELECT"; + opNames_["SELECT_F64"] = "CMOV_SELECT"; opNames_["SELECT_F32"] = "CMOV_SELECT"; opNames_["SELECT_F16"] = "CMOV_SELECT"; @@ -3548,6 +3744,13 @@ void TDGen::createSelectPatterns(std::ostream& os) { << "(select R32IRegs:$c, (i32 imm:$T), (i32 imm:$F)))]>;" << std::endl << std::endl + << "def SELECT_F64 : InstTCE<(outs R64FPRegs:$dst)," + << "(ins R1Regs:$c, R64FPRegs:$T, R64FPRegs:$F)," + << "\"# SELECT_F64 PSEUDO!\"," + << "[(set R64FPRegs:$dst," + << "(select R1Regs:$c, R64FPRegs:$T, R64FPRegs:$F))]>;" + << std::endl << std::endl + << "def SELECT_F32 : InstTCE<(outs R32FPRegs:$dst)," << "(ins R1Regs:$c, R32FPRegs:$T, R32FPRegs:$F)," << "\"# SELECT_F32 PSEUDO!\"," diff --git a/tce/src/applibs/LLVMBackend/plugin/TCEDAGToDAGISel.cc b/tce/src/applibs/LLVMBackend/plugin/TCEDAGToDAGISel.cc index df6069a75e..816a1210a5 100644 --- a/tce/src/applibs/LLVMBackend/plugin/TCEDAGToDAGISel.cc +++ b/tce/src/applibs/LLVMBackend/plugin/TCEDAGToDAGISel.cc @@ -166,7 +166,10 @@ TCEDAGToDAGISel::Select(SDNode* n) { int opc; ISD::CondCode cc = cast( node2->getOperand(2))->get(); - + + //TODO://RWH: review for i64/f64, looks like + //getMinOpcode call should suffice if correct opcodes + //are included in the machine switch (cc) { case ISD::SETLT: case ISD::SETLE: diff --git a/tce/src/applibs/LLVMBackend/plugin/TCEFrameInfo.hh b/tce/src/applibs/LLVMBackend/plugin/TCEFrameInfo.hh index aefaf770c3..29e45c7643 100644 --- a/tce/src/applibs/LLVMBackend/plugin/TCEFrameInfo.hh +++ b/tce/src/applibs/LLVMBackend/plugin/TCEFrameInfo.hh @@ -44,7 +44,7 @@ namespace llvm { * ON EVERY LLVM UPDATE CHECK THESE INTERFACES VERY CAREFULLY * FROM include/llvm/Target/TargetFrameInfo.h * - * Compiler doesn warn or give error if parameter lists are changed. + * Compiler doesn't warn or give error if parameter lists are changed. * Many times also base class implementation works, but does not do * very good job. */ diff --git a/tce/src/applibs/LLVMBackend/plugin/TCEISelLowering.cc b/tce/src/applibs/LLVMBackend/plugin/TCEISelLowering.cc index a1ee250c05..f5a116ce38 100644 --- a/tce/src/applibs/LLVMBackend/plugin/TCEISelLowering.cc +++ b/tce/src/applibs/LLVMBackend/plugin/TCEISelLowering.cc @@ -390,6 +390,8 @@ TCETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } else if (sType == MVT::f16 || sType == MVT::f32) { ObjSize = 4; ValToStore = Val; + //TODO: inconsistent with argument counting code that allows f16 and + //f32 to be passed in registers. } else if (sType == MVT::i64 || sType == MVT::f64) { ObjSize = 8; ValToStore = Val; // Whole thing is passed in memory. @@ -422,7 +424,7 @@ TCETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emited instructions must be + // The InFlag is necessary since all emited instructions must be // stuck together. SDValue InFlag; @@ -476,8 +478,10 @@ TCETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Copy all of the result registers out of their specified physreg. (only one rv reg) for (unsigned i = 0; i != RVLocs.size(); ++i) { - unsigned Reg = RVLocs[i].getLocReg(); - + unsigned Reg = RVLocs[i].getLocReg(); + MVT sType = RVLocs[i].getValVT().SimpleTy; + + //TODO://RWH:: add code for handling f64 return values Chain = DAG.getCopyFromReg(Chain, dl, Reg, RVLocs[i].getValVT(), InFlag).getValue(1); InFlag = Chain.getValue(2); @@ -515,8 +519,9 @@ TCETargetLowering::TCETargetLowering( addRegisterClass(MVT::i1, &TCE::R1RegsRegClass); addRegisterClass(MVT::i32, &TCE::R32IRegsRegClass); - addRegisterClass(MVT::f32, &TCE::R32FPRegsRegClass); addRegisterClass(MVT::f16, &TCE::R32HFPRegsRegClass); + addRegisterClass(MVT::f32, &TCE::R32FPRegsRegClass); + addRegisterClass(MVT::f64, &TCE::R64FPRegsRegClass); if (opts->useVectorBackend()) { switch (tm_.maxVectorSize()) { @@ -640,6 +645,11 @@ TCETargetLowering::TCETargetLowering( setOperationAction(ISD::BR_CC, MVT::i1, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::BR_CC, MVT::f32, Expand); +<<<<<<< TREE +======= + setOperationAction(ISD::BR_CC, MVT::f64, Expand); +#endif +>>>>>>> MERGE-SOURCE setOperationAction(ISD::MULHU, MVT::i32, Expand); setOperationAction(ISD::MULHS, MVT::i32, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); @@ -671,10 +681,15 @@ TCETargetLowering::TCETargetLowering( setOperationAction(ISD::UDIVREM, MVT::i32, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); #ifdef LLVM_OLDER_THAN_3_6 setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); #else setLoadExtAction(ISD::EXTLOAD, MVT::f16, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f16, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f64, Expand); #endif setOperationAction(ISD::ADDE, MVT::i32, Expand); @@ -684,7 +699,6 @@ TCETargetLowering::TCETargetLowering( setOperationAction(ISD::ADDE, MVT::i8, Expand); setOperationAction(ISD::ADDC, MVT::i8, Expand); - setStackPointerRegisterToSaveRestore(TCE::SP); // Set missing operations that can be emulated with emulation function @@ -939,6 +953,8 @@ TCETargetLowering::getRegForInlineAsmConstraint( case 'f': if (VT == MVT::f32) { return std::make_pair(0U, &TCE::R32FPRegsRegClass); + } else if (VT == MVT::f64) { + return std::make_pair(0U, &TCE::R64FPRegsRegClass); } } } @@ -949,7 +965,6 @@ TCETargetLowering::getRegForInlineAsmConstraint( #endif } - std::vector TCETargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { diff --git a/tce/src/applibs/LLVMBackend/plugin/TCEInstrInfo.cc b/tce/src/applibs/LLVMBackend/plugin/TCEInstrInfo.cc index 522b5151c0..781b77606f 100644 --- a/tce/src/applibs/LLVMBackend/plugin/TCEInstrInfo.cc +++ b/tce/src/applibs/LLVMBackend/plugin/TCEInstrInfo.cc @@ -247,6 +247,9 @@ void TCEInstrInfo::copyPhysReg( } else if (TCE::R32IRegsRegClass.contains(destReg, srcReg)) { BuildMI(mbb, mbbi, dl, get(TCE::MOVI32rr), destReg) .addReg(srcReg, getKillRegState(killSrc)); + } else if (TCE::R64FPRegsRegClass.contains(destReg, srcReg)) { + BuildMI(mbb, mbbi, dl, get(TCE::MOVF64dd), destReg) + .addReg(srcReg, getKillRegState(killSrc)); } else if (TCE::R32FPRegsRegClass.contains(destReg, srcReg)) { BuildMI(mbb, mbbi, dl, get(TCE::MOVF32ff), destReg) .addReg(srcReg, getKillRegState(killSrc)); diff --git a/tce/src/applibs/LLVMBackend/plugin/TCEInstrInfo.td b/tce/src/applibs/LLVMBackend/plugin/TCEInstrInfo.td index abdbe87be3..09eddee760 100644 --- a/tce/src/applibs/LLVMBackend/plugin/TCEInstrInfo.td +++ b/tce/src/applibs/LLVMBackend/plugin/TCEInstrInfo.td @@ -39,6 +39,7 @@ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>; def TCEftoi : SDNode<"TCEISD::FTOI", SDTTCEFTOI>; def TCEitof : SDNode<"TCEISD::ITOF", SDTTCEITOF>; +//RWH: need anything here for F64? // These are target-independent nodes, but have target-specific formats. def SDT_TCECallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; @@ -142,6 +143,9 @@ def PRED_FALSE_MOVI32ri : InstTCE<(outs R32IRegs:$dst), (ins R1Regs:$pred, i32imm:$src), "$src -> $dst;", []>; +def MOVF64de : InstTCE<(outs R64FPRegs:$dst), (ins f64imm:$val), + "$val -> $dst;", + [(set R64FPRegs:$dst, (f64 fpimm:$val))]>; def MOVF32fk : InstTCE<(outs R32FPRegs:$dst), (ins f32imm:$val), "$val -> $dst;", [(set R32FPRegs:$dst, (f32 fpimm:$val))]>; @@ -151,6 +155,7 @@ def MOVF16hk : InstTCE<(outs R32HFPRegs:$dst), (ins f16imm:$val), } // TODO: are these really ever used for anything? propably not? +// RWH: add a double version? doesn't fit in R32.... let isReMaterializable = 1, isMoveImm = 1 in { def MOVF32fi : InstTCE<(outs R32FPRegs:$dst), (ins i32imm:$src), "$src -> $dst;", @@ -211,6 +216,11 @@ def MOVF32ff : InstTCE<(outs R32FPRegs:$dst), (ins R32FPRegs:$src), "$src -> $dst;", []>; +//RWH: add moves to/from I32 or I64? +def MOVF64dd : InstTCE<(outs R64FPRegs:$dst), (ins R64FPRegs:$src), + "$src -> $dst;", + []>; + def MOVF16hh : InstTCE<(outs R32HFPRegs:$dst), (ins R32HFPRegs:$src), "$src -> $dst;", []>; @@ -337,6 +347,18 @@ def: Pat<(brind (i32 R32Regs:$dst)), (TCEBRIND R32Regs:$dst)>; // FP stores/loads + def LDDdr : InstTCE<(outs R64FPRegs:$op2), + (ins MEMrr:$op1), "", [(set R64FPRegs:$op2, + (load ADDRrr:$op1))]>; + + def LDDdi : InstTCE<(outs R64FPRegs:$op2), + (ins MEMri:$op1), "", [(set R64FPRegs:$op2, + (load ADDRri:$op1))]>; + + def STDdr : InstTCE<(outs), (ins MEMrr:$op1, R64FPRegs:$op2), "", + [(store R64FPRegs:$op2, ADDRrr:$op1)]>; + def STDdi : InstTCE<(outs), (ins MEMri:$op1, R64FPRegs:$op2), "", + [(store R64FPRegs:$op2, ADDRri:$op1)]>; def LDWfr : InstTCE<(outs R32FPRegs:$op2), (ins MEMrr:$op1), "", [(set R32FPRegs:$op2, @@ -723,7 +745,7 @@ def : Pat <(v8f32 (scalar_to_vector (f32 R32FPRegs:$e))), subvector2_0)), subvector4_0))>; -// build done with inseet_subreg +// build done with insert_subreg def : Pat <(v2i32 (build_vector (i32 R32IRegs:$e1), R32IRegs:$e2)), (v2i32 (INSERT_SUBREG (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), diff --git a/tce/src/applibs/LLVMBackend/plugin/TCETargetMachinePlugin.cc b/tce/src/applibs/LLVMBackend/plugin/TCETargetMachinePlugin.cc index d1bd9a754f..e0c95fde83 100644 --- a/tce/src/applibs/LLVMBackend/plugin/TCETargetMachinePlugin.cc +++ b/tce/src/applibs/LLVMBackend/plugin/TCETargetMachinePlugin.cc @@ -310,6 +310,12 @@ GeneratedTCEPlugin::operationName(unsigned opc) const { if (opc == TCE::MOVFI16rh) return MOVE; if (opc == TCE::MOVIF16hr) return MOVE; if (opc == TCE::MOVF16hi) return MOVE; + if (opc == TCE::MOVF64dd) return MOVE; + //if (opc == TCE::MOVF64d?) return MOVE; //TODO: move from 64b integer immediate + if (opc == TCE::MOVF64de) return MOVE; + //if (opc == TCE::MOVI64I1rr) return MOVE; + //if (opc == TCE::MOVFI64?d) return MOVE; //TODO: move to 64b integer register + //if (opc == TCE::MOVIF64d?) return MOVE; //TODO: move from 64b integer register if (opc == TCE::INLINEASM) return INLINEASM; diff --git a/tce/src/applibs/Scheduler/Algorithms/BBSchedulerController.hh b/tce/src/applibs/Scheduler/Algorithms/BBSchedulerController.hh index 67863d251e..fcfa849178 100644 --- a/tce/src/applibs/Scheduler/Algorithms/BBSchedulerController.hh +++ b/tce/src/applibs/Scheduler/Algorithms/BBSchedulerController.hh @@ -57,7 +57,7 @@ namespace TTAProgram { * A class that control operation of a basic block scheduler. * * This class handles calling other optimization passes and tranferring data - * for the actual BB scheduler and loop scheudlers etc. + * for the actual BB scheduler and loop schedulers etc. */ class BBSchedulerController : public BasicBlockPass, public ControlFlowGraphPass, public ProcedurePass, diff --git a/tce/src/applibs/Scheduler/Algorithms/BUBasicBlockScheduler.cc b/tce/src/applibs/Scheduler/Algorithms/BUBasicBlockScheduler.cc index c3079c08ec..8cf2508c5f 100644 --- a/tce/src/applibs/Scheduler/Algorithms/BUBasicBlockScheduler.cc +++ b/tce/src/applibs/Scheduler/Algorithms/BUBasicBlockScheduler.cc @@ -737,7 +737,7 @@ BUBasicBlockScheduler::scheduleMove( latestFromTrigger, ddg_->latestCycle( moveNode, INT_MAX, true)); // TODO: 0 or INT_MAX - // rename if can and may alow scheuduling later. + // rename if can and may alow scheduling later. if (minRenamedEC > ddgCycle) { minRenamedEC = rm_->latestCycle(minRenamedEC, moveNode); if (minRenamedEC > ddgCycle) { diff --git a/tce/src/applibs/Scheduler/Algorithms/BasicBlockScheduler.cc b/tce/src/applibs/Scheduler/Algorithms/BasicBlockScheduler.cc index 1ab82b5d8e..526aad4c62 100644 --- a/tce/src/applibs/Scheduler/Algorithms/BasicBlockScheduler.cc +++ b/tce/src/applibs/Scheduler/Algorithms/BasicBlockScheduler.cc @@ -816,7 +816,7 @@ BasicBlockScheduler::scheduleMove( sourceReadyCycle, ddg_->earliestCycle( moveNode, INT_MAX, true, true)); // TODO: 0 or INT_MAX - // rename if can and may alow scheuduling earlier. + // rename if can and may allow scheduling earlier. if (renamer_ != NULL && minRenamedEC < ddgCycle) { minRenamedEC = rm_->earliestCycle(minRenamedEC, moveNode); if (minRenamedEC < ddgCycle) { @@ -1051,7 +1051,7 @@ BasicBlockScheduler::scheduleInputOperandTempMoves(MoveNode& operandMove, MoveNo } /** - * Prints DDG to a dot file before and after scheudling + * Prints DDG to a dot file before and after scheduling * * @param ddg to print * @param name operation name for ddg diff --git a/tce/src/applibs/Scheduler/Algorithms/BypassingBUBasicBlockScheduler.cc b/tce/src/applibs/Scheduler/Algorithms/BypassingBUBasicBlockScheduler.cc index d35aa97463..e0548c8300 100644 --- a/tce/src/applibs/Scheduler/Algorithms/BypassingBUBasicBlockScheduler.cc +++ b/tce/src/applibs/Scheduler/Algorithms/BypassingBUBasicBlockScheduler.cc @@ -629,7 +629,7 @@ BypassingBUBasicBlockScheduler::scheduleMoveBU( endCycle -= targetMachine_->controlUnit()->delaySlots(); } - // if regcpy is unscheudled it must be rescheduled + // if regcpy is unscheduled it must be rescheduled if (t == TempRegAfter) { MoveNode* regCopyAfter = regCopiesAfter_[&mn]; if (regCopyAfter != NULL && @@ -726,7 +726,7 @@ BypassingBUBasicBlockScheduler::scheduleMoveBU( scheduledMoves_.insert(&mn); std::cerr << "\t\tScheduled to cycle: " << rmCycle << std::endl; - // if regcpy is unscheudled it must be rescheduled + // if regcpy is unscheduled it must be rescheduled if (t == TempRegBefore) { MoveNode* regCopyBefore = regCopiesBefore_[&mn]; if (regCopyBefore != NULL && diff --git a/tce/src/applibs/Scheduler/Algorithms/RegisterCopyAdder.cc b/tce/src/applibs/Scheduler/Algorithms/RegisterCopyAdder.cc index 7c2f80c693..0c430c1b63 100644 --- a/tce/src/applibs/Scheduler/Algorithms/RegisterCopyAdder.cc +++ b/tce/src/applibs/Scheduler/Algorithms/RegisterCopyAdder.cc @@ -311,7 +311,7 @@ RegisterCopyAdder::addRegisterCopies( * * Returns 0 in case there is a connection already. * - * @param originalMove The move that might not be unschedulable due to missing + * @param originalMove The move that might be unschedulable due to missing * connectivity. Will be modified to read from the temporary reg instead in * case connectivity is missing. * @param sourcePort The source port. @@ -530,7 +530,7 @@ RegisterCopyAdder::addConnectionRegisterCopies( be in the ProgramOperation, i.e., an operation move. The original move should be either the last of the chain or the first, in case it's input or output move, respectively. In case of register move, - the original move is considered the fisrt of the chain */ + the original move is considered the first of the chain */ TTAProgram::Terminal& omDest = originalMove.move().destination(); @@ -2061,7 +2061,7 @@ void RegisterCopyAdder::resultsScheduled( /** - * Find the temporary registers usef for reg copies + * Find the temporary registers used for reg copies */ void RegisterCopyAdder::findTempRegisters( diff --git a/tce/src/applibs/Scheduler/Algorithms/RegisterRenamer.cc b/tce/src/applibs/Scheduler/Algorithms/RegisterRenamer.cc index 4d24ba0475..f157e44e84 100644 --- a/tce/src/applibs/Scheduler/Algorithms/RegisterRenamer.cc +++ b/tce/src/applibs/Scheduler/Algorithms/RegisterRenamer.cc @@ -55,7 +55,7 @@ /** * Constructor. * - * @param machine machine for which we are scheudling + * @param machine machine for which we are scheduling RegisterRenamer::RegisterRenamer(const TTAMachine::Machine& machine) : machine_(machine) { @@ -66,7 +66,7 @@ RegisterRenamer::RegisterRenamer(const TTAMachine::Machine& machine) : /** * Constructor. * - * @param machine machine for which we are scheudling + * @param machine machine for which we are scheduling */ RegisterRenamer::RegisterRenamer( const TTAMachine::Machine& machine, TTAProgram::BasicBlock& bb) : @@ -421,7 +421,7 @@ RegisterRenamer::renameDestinationRegister( node.move().destination().registerFile(); // don't allow using same reg multiple times if loop scheduling. - // unscheudling would cause problems, missing war edges. + // unscheduling would cause problems, missing war edges. if (loopScheduling) { earliestCycle = -1; } diff --git a/tce/src/applibs/Scheduler/ProgramRepresentations/DDG/DataDependenceGraph.cc b/tce/src/applibs/Scheduler/ProgramRepresentations/DDG/DataDependenceGraph.cc index a9833d6f57..1cc7f00f8c 100644 --- a/tce/src/applibs/Scheduler/ProgramRepresentations/DDG/DataDependenceGraph.cc +++ b/tce/src/applibs/Scheduler/ProgramRepresentations/DDG/DataDependenceGraph.cc @@ -750,7 +750,7 @@ DataDependenceGraph::firstScheduledRegisterWrite( /** * Returns the highest cycle where accesses the given register. - * If unscheudled moves accessing the register, returns INT_MAX; + * If unscheduled moves accessing the register, returns INT_MAX; * If none found, returns -1. * * @param rf The register file. @@ -849,7 +849,7 @@ DataDependenceGraph::lastRegisterCycle( /** * Returns the lowest cycle where accesses the given register. - * If unscheudled moves accessing the register, returns -1. + * If unscheduled moves accessing the register, returns -1. * If none found, return INT_MAX * * @param rf The register file. diff --git a/tce/src/applibs/Scheduler/ResourceModel/ExecutionPipelineResource.cc b/tce/src/applibs/Scheduler/ResourceModel/ExecutionPipelineResource.cc index f85fc9558b..9f2fb3d010 100644 --- a/tce/src/applibs/Scheduler/ResourceModel/ExecutionPipelineResource.cc +++ b/tce/src/applibs/Scheduler/ResourceModel/ExecutionPipelineResource.cc @@ -1526,13 +1526,13 @@ int ExecutionPipelineResource::resultReadyCycle( * * This checks that the guards are exclusive, and that the moves are * to be scheduled in same cycle (one already scheduled, on is going to - * be scheudled to given cycle, which has to be the same. + * be scheduled to given cycle, which has to be the same. * the same cycle requirements makes sure the value of the guard cannot be * changed between the moves. * * @param mn1 movenode which has already been scheduled * @param mn2 move which we are going to schedule - * @param cycle cycle where we are going to scheudle mn2. + * @param cycle cycle where we are going to schedule mn2. */ bool ExecutionPipelineResource::exclusiveMoves( const MoveNode* mn1, const MoveNode* mn2, int cycle) const { @@ -1817,7 +1817,7 @@ ExecutionPipelineResource::resultNotOverWritten( // no loop scheduling. ii = INT_MAX; } else { - // loop scheudling. + // loop scheduling. // make sure opeation does not cause conflict // with itself on next loop iteration. if (resultReadyCycle + (int)ii <= resultReadCycle) { @@ -1958,7 +1958,7 @@ bool ExecutionPipelineResource::checkOperandAllowed( // fail if the other operand happens eaelier than this (it has later usage). - // loop scheudling, op overlaps + // loop scheduling, op overlaps // need to also check that is not written before the use. if (operandUseModCycle < instructionIndex(mn.cycle())) { diff --git a/tce/tools/scripts/install_llvm_3.5.sh b/tce/tools/scripts/install_llvm_3.5.sh index 936b3dc759..7d6a9bf0a0 100755 --- a/tce/tools/scripts/install_llvm_3.5.sh +++ b/tce/tools/scripts/install_llvm_3.5.sh @@ -55,10 +55,15 @@ fi cd ../../$llvm_co_dir -##### Add patches here. -# ... -##### +# apply patches +patch -Np0 < $patch_dir/clang-3.5-64bit-doubles-not-forced-to-single.patch ./configure $LLVM_BUILD_MODE --enable-shared --prefix=$TARGET_DIR || eexit "Configuring LLVM/Clang failed." make -j2 CXXFLAGS="-std=c++11" REQUIRES_RTTI=1 || eexit "Building LLVM/Clang failed." -make install || eexit "Installation of LLVM/Clang failed." +if -w $TARGET_DIR +then + make install || eexit "Installation of LLVM/Clang failed." +else + echo "Installation directory not writable, enter 'sudo' password or cancel and install manually." + sudo make install || eexit "Installation of LLVM/Clang failed." +fi diff --git a/tce/tools/scripts/install_llvm_3.7.sh b/tce/tools/scripts/install_llvm_3.7.sh index 491c26b22b..fb7b8bf54d 100755 --- a/tce/tools/scripts/install_llvm_3.7.sh +++ b/tce/tools/scripts/install_llvm_3.7.sh @@ -59,10 +59,16 @@ cd ../../$llvm_co_dir patch -Np0 < $patch_dir/llvm-3.7-custom-vector-extension.patch patch -Np0 < $patch_dir/llvm-3.7-tce.patch patch -Np0 < $patch_dir/llvm-3.7-tcele.patch +patch -Np0 < $patch_dir/clang-3.7-64bit-doubles-not-forced-to-single.patch ##### mkdir -p build cd build ../configure $LLVM_BUILD_MODE --enable-bindings=none --enable-shared --prefix=$TARGET_DIR || eexit "Configuring LLVM/Clang failed." -make -j2 CXXFLAGS="-std=c++11" REQUIRES_RTTI=1 || eexit "Building LLVM/Clang failed." -make install || eexit "Installation of LLVM/Clang failed." +make -j16 CXXFLAGS="-std=c++11" REQUIRES_RTTI=1 || eexit "Building LLVM/Clang failed." +if [ -w "$TARGET_DIR" ]; then + make install || eexit "Installation of LLVM/Clang failed." +else + echo "Installation directory not writable, enter 'sudo' password or cancel and install manually." + sudo make install || eexit "Installation of LLVM/Clang failed." +fi From a0186b97052a1445adae67ed12ab42f46fbf1149 Mon Sep 17 00:00:00 2001 From: "Ryan Hinton on tce-virtual.corral" Date: Tue, 15 Dec 2015 18:59:23 -0700 Subject: [PATCH 03/14] added patches for clang-3.7 and 3.5 (yes, I missed 3.6) to prevent clang trying to use integer/fp32 arithmetic for fp64 variables and operations --- ...5-64bit-doubles-not-forced-to-single.patch | 30 +++++++++++++++++ ...7-64bit-doubles-not-forced-to-single.patch | 32 +++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 tce/tools/patches/clang-3.5-64bit-doubles-not-forced-to-single.patch create mode 100644 tce/tools/patches/clang-3.7-64bit-doubles-not-forced-to-single.patch diff --git a/tce/tools/patches/clang-3.5-64bit-doubles-not-forced-to-single.patch b/tce/tools/patches/clang-3.5-64bit-doubles-not-forced-to-single.patch new file mode 100644 index 0000000000..3fb29b1a06 --- /dev/null +++ b/tce/tools/patches/clang-3.5-64bit-doubles-not-forced-to-single.patch @@ -0,0 +1,30 @@ +This patch allows the TCE target machine to use 64bit doubles instead of +forcing them to 32bit (single precision "float"). + +Index: lib/Basic/Targets.cpp +=================================================================== +--- tools/clang/lib/Basic/Targets.cpp (revision 222960) ++++ tools/clang/lib/Basic/Targets.cpp (working copy) +@@ -5219,15 +5219,15 @@ + PtrDiffType = SignedInt; + FloatWidth = 32; + FloatAlign = 32; +- DoubleWidth = 32; +- DoubleAlign = 32; +- LongDoubleWidth = 32; +- LongDoubleAlign = 32; ++ DoubleWidth = 64; ++ DoubleAlign = 64; ++ LongDoubleWidth = 64; ++ LongDoubleAlign = 64; + FloatFormat = &llvm::APFloat::IEEEsingle; +- DoubleFormat = &llvm::APFloat::IEEEsingle; +- LongDoubleFormat = &llvm::APFloat::IEEEsingle; ++ DoubleFormat = &llvm::APFloat::IEEEdouble; ++ LongDoubleFormat = &llvm::APFloat::IEEEdouble; + DescriptionString = "E-p:32:32-i8:8:32-i16:16:32-i64:32" +- "-f64:32-v64:32-v128:32-a:0:32-n32"; ++ "-f64:64:64-v64:32-v128:32-a:0:32-n32"; + AddrSpaceMap = &TCEOpenCLAddrSpaceMap; + UseAddrSpaceMapMangling = true; + } diff --git a/tce/tools/patches/clang-3.7-64bit-doubles-not-forced-to-single.patch b/tce/tools/patches/clang-3.7-64bit-doubles-not-forced-to-single.patch new file mode 100644 index 0000000000..70d9f3e92c --- /dev/null +++ b/tce/tools/patches/clang-3.7-64bit-doubles-not-forced-to-single.patch @@ -0,0 +1,32 @@ +This patch allows the TCE target machine to use 64bit doubles instead of +forcing them to 32bit (single precision "float"). + +Index: lib/Basic/Targets.cpp +=================================================================== +--- tools/clang/lib/Basic/Targets.cpp (revision 222960) ++++ tools/clang/lib/Basic/Targets.cpp (working copy) +@@ -5973,17 +5973,17 @@ + PtrDiffType = SignedInt; + FloatWidth = 32; + FloatAlign = 32; +- DoubleWidth = 32; +- DoubleAlign = 32; +- LongDoubleWidth = 32; +- LongDoubleAlign = 32; ++ DoubleWidth = 64; ++ DoubleAlign = 64; ++ LongDoubleWidth = 64; ++ LongDoubleAlign = 64; + FloatFormat = &llvm::APFloat::IEEEsingle; +- DoubleFormat = &llvm::APFloat::IEEEsingle; +- LongDoubleFormat = &llvm::APFloat::IEEEsingle; ++ DoubleFormat = &llvm::APFloat::IEEEdouble; ++ LongDoubleFormat = &llvm::APFloat::IEEEdouble; + DescriptionString = "E-p:32:32:32-i1:8:8-i8:8:32-" + "i16:16:32-i32:32:32-i64:32:32-" +- "f32:32:32-f64:32:32-v64:32:32-" ++ "f32:32:32-f64:64:64-v64:32:32-" + "v128:32:32-v256:32:32-v512:32:32-v1024:32:32-a0:0:32-n32"; + AddrSpaceMap = &TCEOpenCLAddrSpaceMap; + UseAddrSpaceMapMangling = true; + } From 29d3bac5df33468eab51005d2a9b151b3de7a091 Mon Sep 17 00:00:00 2001 From: "Ryan Hinton on tce-virtual.corral" Date: Wed, 16 Dec 2015 08:40:57 -0700 Subject: [PATCH 04/14] fixed leftover failed merge cruft --- tce/src/applibs/LLVMBackend/plugin/TCEISelLowering.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tce/src/applibs/LLVMBackend/plugin/TCEISelLowering.cc b/tce/src/applibs/LLVMBackend/plugin/TCEISelLowering.cc index f5a116ce38..a0ab01359b 100644 --- a/tce/src/applibs/LLVMBackend/plugin/TCEISelLowering.cc +++ b/tce/src/applibs/LLVMBackend/plugin/TCEISelLowering.cc @@ -645,11 +645,7 @@ TCETargetLowering::TCETargetLowering( setOperationAction(ISD::BR_CC, MVT::i1, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::BR_CC, MVT::f32, Expand); -<<<<<<< TREE -======= setOperationAction(ISD::BR_CC, MVT::f64, Expand); -#endif ->>>>>>> MERGE-SOURCE setOperationAction(ISD::MULHU, MVT::i32, Expand); setOperationAction(ISD::MULHS, MVT::i32, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); From 1e0a4a7d2d65f51ed3d5b8c35181ef586bbfb9de Mon Sep 17 00:00:00 2001 From: "Ryan Hinton on tce-virtual.corral" Date: Wed, 16 Dec 2015 18:41:12 -0700 Subject: [PATCH 05/14] tweaked gitignore after LLVM build, perhaps I have an old git version? (1.9.1) --- .gitignore | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 549df932ce..8594626cf3 100644 --- a/.gitignore +++ b/.gitignore @@ -8,8 +8,11 @@ vgcore* core.* core *.lo +*.so *.la +*.a **/runner +**/llvm-build-temp tce/Makefile Makefile.in tce/Makefile.in @@ -35,7 +38,7 @@ tce/*/Makefile tce/src/tools/tce_version_string.h tce/newlib-1.17.0/tce-llvm RE:.*/.deps -RE:.*/.libs +**/.libs tce/test/Makefile.in RE:tce/src/.*/Makefile RE:tce/test/.*/Makefile.in @@ -77,6 +80,7 @@ tce/src/procgen/ProDe/prode tce/src/procgen/ProGe/buildicdecoderplugin tce/src/procgen/ProGe/generateprocessor tce/src/procgen/ProGe/ttaunittester +tce/src/procgen/MachInfo/machinfo tce/a.out tce/aclocal.m4 tce/autom4te.cache/ From 49c40f62039244bdff4b2c84f26b30ff2cea321a Mon Sep 17 00:00:00 2001 From: "Ryan Hinton on tce-virtual.corral" Date: Wed, 16 Dec 2015 18:42:00 -0700 Subject: [PATCH 06/14] install llvm using sudo after build --- tce/tools/scripts/install_llvm_3.7.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tce/tools/scripts/install_llvm_3.7.sh b/tce/tools/scripts/install_llvm_3.7.sh index 491c26b22b..6f34a12937 100755 --- a/tce/tools/scripts/install_llvm_3.7.sh +++ b/tce/tools/scripts/install_llvm_3.7.sh @@ -64,5 +64,10 @@ patch -Np0 < $patch_dir/llvm-3.7-tcele.patch mkdir -p build cd build ../configure $LLVM_BUILD_MODE --enable-bindings=none --enable-shared --prefix=$TARGET_DIR || eexit "Configuring LLVM/Clang failed." -make -j2 CXXFLAGS="-std=c++11" REQUIRES_RTTI=1 || eexit "Building LLVM/Clang failed." -make install || eexit "Installation of LLVM/Clang failed." +make -j4 CXXFLAGS="-std=c++11" REQUIRES_RTTI=1 || eexit "Building LLVM/Clang failed." +if [ -w "$TARGET_DIR" ]; then + make install || eexit "Installation of LLVM/Clang failed." +else + echo "Installation directory not writable, enter 'sudo' password or cancel and install manually." + sudo make install || eexit "Installation of LLVM/Clang failed." +fi From 405da15f704e96baea0ff5373b5f3288907d685b Mon Sep 17 00:00:00 2001 From: "Ryan Hinton on tce-virtual.corral" Date: Fri, 18 Dec 2015 08:05:37 -0700 Subject: [PATCH 07/14] added mkdir commands to create non-existent TARGET_DIR --- tce/tools/scripts/install_llvm_3.7.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tce/tools/scripts/install_llvm_3.7.sh b/tce/tools/scripts/install_llvm_3.7.sh index fb7b8bf54d..7e3f089aa3 100755 --- a/tce/tools/scripts/install_llvm_3.7.sh +++ b/tce/tools/scripts/install_llvm_3.7.sh @@ -66,9 +66,11 @@ mkdir -p build cd build ../configure $LLVM_BUILD_MODE --enable-bindings=none --enable-shared --prefix=$TARGET_DIR || eexit "Configuring LLVM/Clang failed." make -j16 CXXFLAGS="-std=c++11" REQUIRES_RTTI=1 || eexit "Building LLVM/Clang failed." +mkdir -p "$TARGET_DIR" if [ -w "$TARGET_DIR" ]; then make install || eexit "Installation of LLVM/Clang failed." else echo "Installation directory not writable, enter 'sudo' password or cancel and install manually." + sudo mkdir -p "$TARGET_DIR" sudo make install || eexit "Installation of LLVM/Clang failed." fi From 10006c6b05c5d5995d8ea598de46d67a705676f4 Mon Sep 17 00:00:00 2001 From: "Ryan Hinton on tce-virtual.corral" Date: Fri, 18 Dec 2015 08:32:11 -0700 Subject: [PATCH 08/14] added executable bit to test file --- testsuite/systemtest/bintools/Compiler/tcetest_double_support.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 testsuite/systemtest/bintools/Compiler/tcetest_double_support.sh diff --git a/testsuite/systemtest/bintools/Compiler/tcetest_double_support.sh b/testsuite/systemtest/bintools/Compiler/tcetest_double_support.sh old mode 100644 new mode 100755 From be5a08caddb11e113cdb8dafaf774552726b0798 Mon Sep 17 00:00:00 2001 From: "Ryan Hinton on tce-virtual.corral" Date: Fri, 18 Dec 2015 08:43:41 -0700 Subject: [PATCH 09/14] updating 64-bit value alignment to 64b to match machine definition (tce/src/applibs/LLVMBackend/TCETargetMachine.cc) --- tce/src/applibs/LLVMBackend/TDGen.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tce/src/applibs/LLVMBackend/TDGen.cc b/tce/src/applibs/LLVMBackend/TDGen.cc index 3fed0f0ce8..84f9dcb8ce 100644 --- a/tce/src/applibs/LLVMBackend/TDGen.cc +++ b/tce/src/applibs/LLVMBackend/TDGen.cc @@ -709,7 +709,7 @@ TDGen::write64bitRegisterInfo(std::ostream& o) { } o << std::endl - << "def R64Regs : RegisterClass<\"TCE\", [i64,f64], 32, (add " // DIRES + << "def R64Regs : RegisterClass<\"TCE\", [i64,f64], 64, (add " // DIRES << i64regs << ")> ;" << std::endl; @@ -731,7 +731,7 @@ TDGen::write64bitRegisterInfo(std::ostream& o) { f64regs += "DRES0"; } o << std::endl - << "def R64FPRegs : RegisterClass<\"TCE\", [f64], 32, (add " + << "def R64FPRegs : RegisterClass<\"TCE\", [f64], 64, (add " << f64regs << ")>;" << std::endl; } From 489889b437123285d019e147869a971ebb228d14 Mon Sep 17 00:00:00 2001 From: "Ryan Hinton on tce-virtual.corral" Date: Fri, 18 Dec 2015 10:57:39 -0700 Subject: [PATCH 10/14] fixed inline assembly register selection per excellent tracking down by pjaaskel, now I can compile _TCE_ABSD macro --- tce/src/applibs/LLVMBackend/plugin/TCEISelLowering.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tce/src/applibs/LLVMBackend/plugin/TCEISelLowering.cc b/tce/src/applibs/LLVMBackend/plugin/TCEISelLowering.cc index a0ab01359b..02d41684fa 100644 --- a/tce/src/applibs/LLVMBackend/plugin/TCEISelLowering.cc +++ b/tce/src/applibs/LLVMBackend/plugin/TCEISelLowering.cc @@ -945,12 +945,12 @@ TCETargetLowering::getRegForInlineAsmConstraint( if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': - return std::make_pair(0U, &TCE::R32IRegsRegClass); - case 'f': if (VT == MVT::f32) { return std::make_pair(0U, &TCE::R32FPRegsRegClass); } else if (VT == MVT::f64) { return std::make_pair(0U, &TCE::R64FPRegsRegClass); + } else { + return std::make_pair(0U, &TCE::R32IRegsRegClass); } } } From 2a4fe0e1c55ba208484b77f12ed016772a069acc Mon Sep 17 00:00:00 2001 From: "Ryan Hinton on tce-virtual.corral" Date: Fri, 18 Dec 2015 10:58:35 -0700 Subject: [PATCH 11/14] improved approximately equal checking to use hybrid relative/absolute checking, restored calls to fabs --- .../bintools/Compiler/data/doubles.cc | 47 ++++++++++--------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/testsuite/systemtest/bintools/Compiler/data/doubles.cc b/testsuite/systemtest/bintools/Compiler/data/doubles.cc index f3d8a65c84..bfb35eac56 100644 --- a/testsuite/systemtest/bintools/Compiler/data/doubles.cc +++ b/testsuite/systemtest/bintools/Compiler/data/doubles.cc @@ -5,10 +5,17 @@ //#include #include #include -#include "lwpr.h" +#include #include "tceops.h" +// this really belongs in tce/bclib/include/lwpr.h +void lwpr_print_str(const char* str) +{ + lwpr_print_str(const_cast(str)); +} + + // check storing double data to DATA section volatile double data64[16] = { 1.0, -2.0, 2.0/3.0, 10.0, 11.0, 12.0, 12.0+5.0, 0.9, //tanh(0.9), @@ -17,7 +24,7 @@ volatile double data64[16] = { 1.0, -2.0, 2.0/3.0, 10.0, // helper function until TUT comes back from Christmas... -#define my_fabs(val) (((val)>=0) ? (val) : (-(val))) +//#define my_fabs(val) (((val)>=0) ? (val) : (-(val))) //inline double my_fabs(double val) //{ // if (val >= 0.0) @@ -27,30 +34,26 @@ volatile double data64[16] = { 1.0, -2.0, 2.0/3.0, 10.0, //} // helper function, check for approximate equality, update success -void check_approx_eql(int &succ, double desired, double actual, const char *msg, double tol=1e-15) +void check_approx_eql(int &succ, double desired, double actual, const char *msg, double tol=5e-15) { - double norm = actual; - double val_abs; + // gentle variation from absolute tolerance at actual==0 to |actual|==1.0 + double abs_tol; + if (fabs(actual) < 1.0) { + // hybrid absolute/relative tolerance: absolute tolerance of tol/2 at + // actual==0, increases quadratically to tol*actual (with the right + // derivative) at |actual|==1.0 + abs_tol = 0.5*tol * (1.0 + actual * actual); + } else { + // typical relative error measure, i.e. the absolute tolerance is + // proportional to the actual value + abs_tol = fabs(tol * actual); + } - // handle normalization, don't want to divide by zero - //if (fabs(desired) < tol) { // newlib making a mess of this - //if (desired < tol) { // newlib making a mess of this - if (my_fabs(desired) < tol) { // newlib making a mess of this + // tolerance test + if (fabs(desired - actual) > abs_tol) { + lwpr_print_str(msg); succ = 0; } - //if (fabs(desired) < tol) { // newlib making a mess of this - //_TCE_ABSD(desired, val_abs); - //if (val_abs < tol) { - // norm = 1.0; - //} - -//compiler: // check for approximately equal within tolerance -//compiler: //if (fabs(actual - desired)/actual > tol) { -//compiler: _TCE_ABSD(actual - desired, val_abs); -//compiler: if (val_abs / actual > tol) { -//compiler: lwpr_print_str((char*)msg); -//compiler: succ = 0; -//compiler: } } From acb538b5d5df5e4229071d58c24170744eeeb3e1 Mon Sep 17 00:00:00 2001 From: "Ryan Hinton on tce-virtual.corral" Date: Fri, 18 Dec 2015 12:04:57 -0700 Subject: [PATCH 12/14] temporary changes to doubles.cc test source to hopefully narrow down the CompileError exception --- .../bintools/Compiler/data/doubles.cc | 65 ++++++++++--------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/testsuite/systemtest/bintools/Compiler/data/doubles.cc b/testsuite/systemtest/bintools/Compiler/data/doubles.cc index bfb35eac56..0918446339 100644 --- a/testsuite/systemtest/bintools/Compiler/data/doubles.cc +++ b/testsuite/systemtest/bintools/Compiler/data/doubles.cc @@ -23,7 +23,7 @@ volatile double data64[16] = { 1.0, -2.0, 2.0/3.0, 10.0, 2.3, 4.5, 1.1*4.9, 4.5};//pow(1.3, 4.5)}; -// helper function until TUT comes back from Christmas... +// helper function until we can get fabs sorted out //#define my_fabs(val) (((val)>=0) ? (val) : (-(val))) //inline double my_fabs(double val) //{ @@ -32,13 +32,20 @@ volatile double data64[16] = { 1.0, -2.0, 2.0/3.0, 10.0, // else // return -val; //} +double my_fabs(double val) +{ + //return std::abs(val); + double abs_val; + _TCE_ABSD(val, abs_val); + return abs_val; +} // helper function, check for approximate equality, update success void check_approx_eql(int &succ, double desired, double actual, const char *msg, double tol=5e-15) { // gentle variation from absolute tolerance at actual==0 to |actual|==1.0 double abs_tol; - if (fabs(actual) < 1.0) { + if (my_fabs(actual) < 1.0) { // hybrid absolute/relative tolerance: absolute tolerance of tol/2 at // actual==0, increases quadratically to tol*actual (with the right // derivative) at |actual|==1.0 @@ -46,11 +53,11 @@ void check_approx_eql(int &succ, double desired, double actual, const char *msg, } else { // typical relative error measure, i.e. the absolute tolerance is // proportional to the actual value - abs_tol = fabs(tol * actual); + abs_tol = my_fabs(tol * actual); } // tolerance test - if (fabs(desired - actual) > abs_tol) { + if (my_fabs(desired - actual) > abs_tol) { lwpr_print_str(msg); succ = 0; } @@ -64,31 +71,31 @@ int main() double a, b; // quick, simple tests - check_approx_eql(succ, 1.567, fabs(-1.567), "fabs (absd) failed"); - - // test that memory stores are correct - a = 2.0; - b = 3.0; - check_approx_eql(succ, a/b, data64[2], "stored divide result failed"); - a = 12.0; - b = 5.0; - check_approx_eql(succ, a+b, data64[6], "stored add result failed"); - a = -4.0; - b = 2.0; - check_approx_eql(succ, a-b, data64[10], "stored subtract result failed"); - a = 1.1; - b = 4.9; - check_approx_eql(succ, a*b, data64[14], "stored multiply result failed"); - - a = data64[14]; - b = data64[15]; - - a = 1.4; - - data64[2] = a + b; - data64[3] = a - b; - data64[4] = a * b; - data64[5] = a / b; + check_approx_eql(succ, 1.567, my_fabs(-1.567), "fabs (absd) failed"); + +//future: // test that memory stores are correct +//future: a = 2.0; +//future: b = 3.0; +//future: check_approx_eql(succ, a/b, data64[2], "stored divide result failed"); +//future: a = 12.0; +//future: b = 5.0; +//future: check_approx_eql(succ, a+b, data64[6], "stored add result failed"); +//future: a = -4.0; +//future: b = 2.0; +//future: check_approx_eql(succ, a-b, data64[10], "stored subtract result failed"); +//future: a = 1.1; +//future: b = 4.9; +//future: check_approx_eql(succ, a*b, data64[14], "stored multiply result failed"); +//future: +//future: a = data64[14]; +//future: b = data64[15]; +//future: +//future: a = 1.4; +//future: +//future: data64[2] = a + b; +//future: data64[3] = a - b; +//future: data64[4] = a * b; +//future: data64[5] = a / b; return !succ; From e5c91f38ad51ea309ea7c81ed665aac52bc8356b Mon Sep 17 00:00:00 2001 From: "Ryan Hinton on tce-virtual.corral" Date: Fri, 18 Dec 2015 12:15:41 -0700 Subject: [PATCH 13/14] added patterns to ignore test results --- .gitignore | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/.gitignore b/.gitignore index 8594626cf3..0d8700a2da 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ core *.la *.a **/runner +*-runner **/llvm-build-temp tce/Makefile Makefile.in @@ -24,6 +25,31 @@ compiletest.*.log testsuite/systemtest/bintools/Scheduler/**/program.bc testsuite/systemtest_long/bintools/Scheduler/**/program.bc testsuite/systemtest_long/bintools/Compiler/**/program.bc +tce/test/**/*.tpf +tce/test/**/*.tpef +tce/test/**/*.bin +tce/test/applibs/Assembler/AssemblerTest/data/temp.tceasm +tce/test/applibs/Explorer/DesignSpaceExplorerTest/data/test.dsdb +tce/test/applibs/Explorer/ExplorerPluginTest/data/test.dsdb +tce/test/applibs/TraceDB/ExecutionTraceTest/data/ +tce/test/applibs/bem/BEMGeneratorTest/data/complex_bem.bem +tce/test/applibs/bem/BEMGeneratorTest/data/par_bem.bem +tce/test/applibs/dsdb/DSDBManagerTest/data/1.adf +tce/test/applibs/dsdb/DSDBManagerTest/data/1.idf +tce/test/applibs/dsdb/DSDBManagerTest/dsdb1.ddb +tce/test/applibs/dsdb/DSDBManagerTest/dsdb2.ddb +tce/test/applibs/wxToolkit/GUIOptions/GUIOptionsSerializerTest/gui.config +tce/test/base/bem/BEMSerializerTest/data/ +tce/test/base/idf/IDFSerializerTest/data/new.idf +tce/test/base/mach/ADFSerializerTest/data/written.mdf +tce/test/base/osal/OperationSerializerTest/data/written.opp +tce/test/tools/RelationalDBTest/data/ +tce/test/tools/XMLSerializerTest/data/written.conf +compiletest.log +compiletest.error.log +systemtest +systemtest_long +systemtest_longlong RE:.*/Makefile.in tce/src/*/*/Makefile tce/src/*/*/*/Makefile From 5e202f60cbc9388e758c8e919088dd71434a0a4f Mon Sep 17 00:00:00 2001 From: "Ryan Hinton on tce-virtual.corral" Date: Fri, 18 Dec 2015 18:02:47 -0700 Subject: [PATCH 14/14] fixed lwpr include, wrapped in extern C --- testsuite/systemtest/bintools/Compiler/data/doubles.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/testsuite/systemtest/bintools/Compiler/data/doubles.cc b/testsuite/systemtest/bintools/Compiler/data/doubles.cc index 0918446339..8d7eebff0b 100644 --- a/testsuite/systemtest/bintools/Compiler/data/doubles.cc +++ b/testsuite/systemtest/bintools/Compiler/data/doubles.cc @@ -5,9 +5,10 @@ //#include #include #include -#include #include "tceops.h" - +extern "C" { +#include +} // this really belongs in tce/bclib/include/lwpr.h void lwpr_print_str(const char* str)