From ae1865edbfa6816ca872148948a95d8f55f71b6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A1s=20Vukics?= Date: Tue, 3 Jul 2018 12:41:45 +0200 Subject: [PATCH 01/18] Slicing operator now accepts any number of arguments (governed by BLITZ_ARRAY_LARGEST_RANK) --- blitz/array-impl.h | 156 +++++++++++++---------------------------- blitz/array/methods.cc | 21 ++++-- blitz/array/slice.h | 65 +++++++---------- blitz/array/slicing.cc | 26 +++---- blitz/blitz.h | 4 ++ 5 files changed, 101 insertions(+), 171 deletions(-) diff --git a/blitz/array-impl.h b/blitz/array-impl.h index 4bff205a..94999dce 100644 --- a/blitz/array-impl.h +++ b/blitz/array-impl.h @@ -40,6 +40,8 @@ * - apply */ +#if !BOOST_PP_IS_ITERATING + #ifndef BZ_ARRAY_H #define BZ_ARRAY_H @@ -63,6 +65,12 @@ #endif +#include +#include +#include +#include + + BZ_NAMESPACE(blitz) /* @@ -866,12 +874,11 @@ class Array : public MemoryBlockReference * a combination of integer and Range arguments. It's not intended * for end-user use. */ - template - Array(Array& array, R0 r0, R1 r1, R2 r2, - R3 r3, R4 r4, R5 r5, R6 r6, R7 r7, R8 r8, R9 r9, R10 r10) + + template + Array(Array& array, BOOST_PP_ENUM_BINARY_PARAMS(BLITZ_ARRAY_LARGEST_RANK,R,r)) { - constructSlice(array, r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10); + constructSlice(array, BOOST_PP_ENUM_PARAMS(BLITZ_ARRAY_LARGEST_RANK,r)); } ////////////////////////////////////////////// @@ -1026,12 +1033,12 @@ class Array : public MemoryBlockReference const TinyVector& ordering() const { return storage_.ordering(); } - void transposeSelf(int r0, int r1, int r2=0, - int r3=0, int r4=0, int r5=0, int r6=0, int r7=0, int r8=0, int - r9=0, int r10=0); - T_array transpose(int r0, int r1, int r2=0, - int r3=0, int r4=0, int r5=0, int r6=0, int r7=0, int r8=0, int - r9=0, int r10=0) const; +#define ARGUMENTS_here BOOST_PP_ENUM_PARAMS(BOOST_PP_SUB(BLITZ_ARRAY_LARGEST_RANK,2), int=0 BOOST_PP_INTERCEPT ) + + void transposeSelf(int r0, int r1, ARGUMENTS_here); + T_array transpose (int r0, int r1, ARGUMENTS_here) const; + +#undef ARGUMENTS_here static int rank() { return rank_; } @@ -1981,101 +1988,15 @@ class Array : public MemoryBlockReference #ifdef BZ_HAVE_PARTIAL_ORDERING - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, nilArraySection(), nilArraySection(), nilArraySection(), - nilArraySection(), nilArraySection(), nilArraySection(), - nilArraySection(), nilArraySection(), nilArraySection()); - } +#define DEFAULT_print(z, n, data) nilArraySection() +#define BOOST_PP_ITERATION_LIMITS (2,BLITZ_ARRAY_LARGEST_RANK) +#define BOOST_PP_FILENAME_1 "blitz/array-impl.h" - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, r3, nilArraySection(), nilArraySection(), nilArraySection(), - nilArraySection(), nilArraySection(), nilArraySection(), - nilArraySection(), nilArraySection()); - } +#include BOOST_PP_ITERATE() - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3, T4 r4) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, r3, r4, nilArraySection(), nilArraySection(), - nilArraySection(), nilArraySection(), nilArraySection(), - nilArraySection(), nilArraySection()); - } - - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, r3, r4, r5, nilArraySection(), - nilArraySection(), nilArraySection(), nilArraySection(), - nilArraySection(), nilArraySection()); - } - - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, r3, r4, r5, r6, nilArraySection(), nilArraySection(), nilArraySection(), - nilArraySection(), nilArraySection()); - } - - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, r3, r4, r5, r6, r7, nilArraySection(), nilArraySection(), - nilArraySection(), nilArraySection()); - } - - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, r3, r4, r5, r6, r7, r8, - nilArraySection(), nilArraySection(), nilArraySection()); - } - - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, r3, r4, r5, r6, r7, r8, r9, nilArraySection(), nilArraySection()); - } - - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, nilArraySection()); - } - - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11); - } +#undef BOOST_PP_FILENAME_1 +#undef BOOST_PP_ITERATION_LIMITS +#undef DEFAULT_print #endif // BZ_HAVE_PARTIAL_ORDERING @@ -2442,10 +2363,9 @@ class Array : public MemoryBlockReference void calculateZeroOffset(); - template - void constructSlice(Array& array, R0 r0, R1 r1, R2 r2, - R3 r3, R4 r4, R5 r5, R6 r6, R7 r7, R8 r8, R9 r9, R10 r10); + + template + void constructSlice(Array& array, BOOST_PP_ENUM_PARAMS(BLITZ_ARRAY_LARGEST_RANK,R)); template void slice(int& setRank, Range r, Array& array, @@ -2561,3 +2481,23 @@ BZ_NAMESPACE_END #endif // BZ_ARRAY_H + + +#else // BOOST_PP_IS_ITERATING + + +#define N BOOST_PP_ITERATION() + +template +typename SliceInfo::T_slice +operator()(BOOST_PP_ENUM_BINARY_PARAMS(N,T,r)) const +{ + typedef typename SliceInfo::T_slice slice; + return slice(noConst(), BOOST_PP_ENUM_PARAMS(N,r) BOOST_PP_ENUM_TRAILING(BOOST_PP_SUB(BLITZ_ARRAY_LARGEST_RANK,N),DEFAULT_print,~) ); +} + + +#undef N + + +#endif // BOOST_PP_IS_ITERATING diff --git a/blitz/array/methods.cc b/blitz/array/methods.cc index 057e702f..75297578 100644 --- a/blitz/array/methods.cc +++ b/blitz/array/methods.cc @@ -354,27 +354,36 @@ void Array::makeUnique() } template -Array Array::transpose(int r0, int r1, - int r2, int r3, int r4, int r5, int r6, int r7, int r8, int r9, int r10) const +Array Array::transpose(BOOST_PP_ENUM_PARAMS(BLITZ_ARRAY_LARGEST_RANK,int r)) const { T_array B(*this); - B.transposeSelf(r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10); + B.transposeSelf(BOOST_PP_ENUM_PARAMS(BLITZ_ARRAY_LARGEST_RANK,r)); return B; } template -void Array::transposeSelf(int r0, int r1, int r2, int r3, - int r4, int r5, int r6, int r7, int r8, int r9, int r10) +void Array::transposeSelf(BOOST_PP_ENUM_PARAMS(BLITZ_ARRAY_LARGEST_RANK,int r)) { - BZPRECHECK(r0+r1+r2+r3+r4+r5+r6+r7+r8+r9+r10 == N_rank * (N_rank-1) / 2, +#define DEFAULT_print(z, n, data) +r##n + + BZPRECHECK(r0+BOOST_PP_REPEAT_FROM_TO(1,BLITZ_ARRAY_LARGEST_RANK,DEFAULT_print,~) == N_rank * (N_rank-1) / 2, "Invalid array transpose() arguments." << endl << "Arguments must be a permutation of the numerals (0,...," << (N_rank - 1) << ")"); +#undef DEFAULT_print + // Create a temporary reference copy of this array Array x(*this); // Now reorder the dimensions using the supplied permutation +#define DEFAULT_print(z, n, data) doTranspose(n, r##n, x); + + BOOST_PP_REPEAT(BLITZ_ARRAY_LARGEST_RANK,DEFAULT_print,~) + +#undef DEFAULT_print + + doTranspose(0, r0, x); doTranspose(1, r1, x); doTranspose(2, r2, x); diff --git a/blitz/array/slice.h b/blitz/array/slice.h index 6837b214..0884cdb6 100644 --- a/blitz/array/slice.h +++ b/blitz/array/slice.h @@ -37,6 +37,13 @@ #include + +#include +#include +#include +#include + + BZ_NAMESPACE(blitz) // Forward declarations @@ -77,52 +84,30 @@ class ArraySectionInfo { static const int isPick = 0; }; -template + +// #define NUMBER_OF_PARAMS BOOST_PP_SUB(BLITZ_ARRAY_LARGEST_RANK,1) + +template class SliceInfo { public: - static const int - numValidTypes = ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType; +#define DEFAULT_print(z, n, data) + ArraySectionInfo::isValidType + + static const int + numValidTypes = ArraySectionInfo::isValidType BOOST_PP_REPEAT_FROM_TO(1,BLITZ_ARRAY_LARGEST_RANK,DEFAULT_print,~); + +#undef DEFAULT_print +#define DEFAULT_print(z, n, data) + ArraySectionInfo::rank static const int - rank = ArraySectionInfo::rank - + ArraySectionInfo::rank - + ArraySectionInfo::rank - + ArraySectionInfo::rank - + ArraySectionInfo::rank - + ArraySectionInfo::rank - + ArraySectionInfo::rank - + ArraySectionInfo::rank - + ArraySectionInfo::rank - + ArraySectionInfo::rank - + ArraySectionInfo::rank; + rank = ArraySectionInfo::rank BOOST_PP_REPEAT_FROM_TO(1,BLITZ_ARRAY_LARGEST_RANK,DEFAULT_print,~); + +#undef DEFAULT_print +#define DEFAULT_print(z, n, data) + ArraySectionInfo::isPick static const int - isPick = ArraySectionInfo::isPick - + ArraySectionInfo::isPick - + ArraySectionInfo::isPick - + ArraySectionInfo::isPick - + ArraySectionInfo::isPick - + ArraySectionInfo::isPick - + ArraySectionInfo::isPick - + ArraySectionInfo::isPick - + ArraySectionInfo::isPick - + ArraySectionInfo::isPick - + ArraySectionInfo::isPick; + isPick = ArraySectionInfo::isPick BOOST_PP_REPEAT_FROM_TO(1,BLITZ_ARRAY_LARGEST_RANK,DEFAULT_print,~); + +#undef DEFAULT_print typedef Array T_array; typedef Array T_slice; diff --git a/blitz/array/slicing.cc b/blitz/array/slicing.cc index 5a16fd24..d7a86024 100644 --- a/blitz/array/slicing.cc +++ b/blitz/array/slicing.cc @@ -30,6 +30,8 @@ #ifndef BZ_ARRAYSLICING_CC #define BZ_ARRAYSLICING_CC +#include + #ifndef BZ_ARRAY_H #error must be included via #endif @@ -214,12 +216,8 @@ void Array::constructSubarray( * combination of int and Range parameters. There's room for up * to 11 parameters, but any unused parameters have no effect. */ -template template -void Array::constructSlice(Array& array, - R0 r0, R1 r1, R2 r2, R3 r3, R4 r4, R5 r5, R6 r6, R7 r7, R8 r8, R9 r9, - R10 r10) +template template +void Array::constructSlice(Array& array, BOOST_PP_ENUM_BINARY_PARAMS(BLITZ_ARRAY_LARGEST_RANK,R,r)) { MemoryBlockReference::changeBlock(array); @@ -227,17 +225,11 @@ void Array::constructSlice(Array& array, TinyVector rankMap; - slice(setRank, r0, array, rankMap, 0); - slice(setRank, r1, array, rankMap, 1); - slice(setRank, r2, array, rankMap, 2); - slice(setRank, r3, array, rankMap, 3); - slice(setRank, r4, array, rankMap, 4); - slice(setRank, r5, array, rankMap, 5); - slice(setRank, r6, array, rankMap, 6); - slice(setRank, r7, array, rankMap, 7); - slice(setRank, r8, array, rankMap, 8); - slice(setRank, r9, array, rankMap, 9); - slice(setRank, r10, array, rankMap, 10); +#define DEFAULT_print(z, n, data) slice(setRank, r##n, array, rankMap, n); + + BOOST_PP_REPEAT(BLITZ_ARRAY_LARGEST_RANK,DEFAULT_print,~) + +#undef DEFAULT_print // Redo the ordering_ array to account for dimensions which // have been sliced away. diff --git a/blitz/blitz.h b/blitz/blitz.h index 4694713a..7470af36 100644 --- a/blitz/blitz.h +++ b/blitz/blitz.h @@ -54,6 +54,10 @@ #include // Performance tuning #include // Profiling +#ifndef BLITZ_ARRAY_LARGEST_RANK +#define BLITZ_ARRAY_LARGEST_RANK 11 +#endif // BLITZ_ARRAY_LARGEST_RANK + #ifdef BZ_HAVE_STL #include #endif From 001d4082a8a97a67f26498f9c4b83d006be53c9c Mon Sep 17 00:00:00 2001 From: Vukics Date: Fri, 24 Apr 2020 11:41:03 +0200 Subject: [PATCH 02/18] gitignore --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitignore b/.gitignore index b8bd0267..6f773328 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,9 @@ +# Build dir +/build* + +# Kdev files +*.kdev4 + # Compiled Object files *.slo *.lo From 3d18c90195b457490a96e008573f6438066bcea3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dr=2E=20Andr=C3=A1s=20Vukics?= Date: Wed, 2 Sep 2020 00:56:16 +0200 Subject: [PATCH 03/18] Enable serialization is not an option now, it gets enabled if Boost.Serialization is found --- appveyor.yml | 2 +- blitz/CMakeLists.txt | 25 +++++++++++-------------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 78f0c2e2..55ab0ae6 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -35,7 +35,7 @@ build_script: - cmake --build . --target install - - cmake .. -DCMAKE_GENERATOR_PLATFORM="%CMAKE_GEN_PLAT%" -DENABLE_SERIALISATION=1 + - cmake .. -DCMAKE_GENERATOR_PLATFORM="%CMAKE_GEN_PLAT%" - cmake --build . --config Debug - cmake --build . --config Release - cmake --build . --target install diff --git a/blitz/CMakeLists.txt b/blitz/CMakeLists.txt index a3839b16..1524030e 100644 --- a/blitz/CMakeLists.txt +++ b/blitz/CMakeLists.txt @@ -25,20 +25,17 @@ else() endif() set(EXTRA_LIBS) -option(ENABLE_SERIALISATION "Enable serialization support using Boost::Serialization" OFF) -if (ENABLE_SERIALISATION) - find_package(Boost COMPONENTS serialization) - if (Boost_FOUND) - set(BZ_HAVE_BOOST TRUE) - include_directories(${Boost_INCLUDE_DIRS}) - if (Boost_SERIALIZATION_FOUND) - set(DEP_PKGS ${DEP_PKGS} Boost) - set(BOOST_command "Boost COMPONENTS serialization" PARENT_SCOPE) - set(BZ_HAVE_BOOST_SERIALIZATION TRUE) - set(EXTRA_LIBS ${EXTRA_LIBS} ${BOOST_LIBRARIES}) - else() - message(WARNING "Boost serialization library not found !") - endif() +find_package(Boost COMPONENTS serialization) +if (Boost_FOUND) + set(BZ_HAVE_BOOST TRUE) + include_directories(${Boost_INCLUDE_DIRS}) + if (Boost_SERIALIZATION_FOUND) + set(DEP_PKGS ${DEP_PKGS} Boost) + set(BOOST_command "Boost COMPONENTS serialization" PARENT_SCOPE) + set(BZ_HAVE_BOOST_SERIALIZATION TRUE) + set(EXTRA_LIBS ${EXTRA_LIBS} ${BOOST_LIBRARIES}) + else() + message(WARNING "Boost serialization library not found !") endif() endif() From a39b77e33a92cce48495306817dc1573f62f755e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dr=2E=20Andr=C3=A1s=20Vukics?= Date: Mon, 7 Sep 2020 14:19:52 +0200 Subject: [PATCH 04/18] =?UTF-8?q?Greatly=20simplified=20the=20content=20of?= =?UTF-8?q?=20the=20package,=20making=20configuration/build=20much=20quick?= =?UTF-8?q?er.=20NOTE:=20testsuite=20kept=20intact,=20but=20probably=20doe?= =?UTF-8?q?sn=E2=80=99t=20run=20due=20to=20the=20absence=20of=20the=20`ran?= =?UTF-8?q?dom`=20component?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 9 +- benchmarks/CMakeLists.txt | 33 - benchmarks/acou3d.cpp | 211 - benchmarks/acou3db1.cpp | 30 - benchmarks/acou3db2.cpp | 30 - benchmarks/acou3db3.cpp | 26 - benchmarks/acou3db4.cpp | 28 - benchmarks/acou3df.f | 134 - benchmarks/acou3df2.f | 160 - benchmarks/acou3df90.f90 | 95 - benchmarks/acou3df902.f90 | 119 - benchmarks/acoustic.cpp | 369 - benchmarks/acousticf.f | 121 - benchmarks/acousticf2.f | 163 - benchmarks/acousticf90.f90 | 101 - benchmarks/acousticf902.f90 | 116 - benchmarks/arrdaxpy.cpp | 150 - benchmarks/arrdaxpyf.f | 26 - benchmarks/arrexpr1.cpp | 85 - benchmarks/arrexpr1.m | 28 - benchmarks/cfd.cpp | 129 - benchmarks/cfdf.f | 103 - benchmarks/cfortran.h | 2090 --- benchmarks/chunky.cpp | 156 - benchmarks/compiletime.cpp | 0 benchmarks/ctime-results | 94 - benchmarks/ctime1.cpp | 8 - benchmarks/ctime1v.cpp | 8 - benchmarks/ctime2.cpp | 15 - benchmarks/ctime2v.cpp | 15 - benchmarks/ctime3.cpp | 19 - benchmarks/ctime3v.cpp | 32 - benchmarks/ctime4.cpp | 24 - benchmarks/ctime4v.cpp | 39 - benchmarks/ctime5.cpp | 29 - benchmarks/ctime5c.cpp | 57 - benchmarks/ctime5v.cpp | 42 - benchmarks/daxpy.cpp | 346 - benchmarks/daxpy2.cpp | 312 - benchmarks/daxpyf90-2.f90 | 15 - benchmarks/daxpyf90.f90 | 12 - benchmarks/dot.cpp | 137 - benchmarks/dot2.cpp | 89 - benchmarks/echof2-back.f | 133 - benchmarks/echotune.cpp | 49 - benchmarks/echotune.m | 63 - benchmarks/echotunef.f | 128 - benchmarks/fdaxpy.f | 48 - benchmarks/fidaxpy.f | 27 - benchmarks/floop1.cpp | 438 - benchmarks/floop10.cpp | 483 - benchmarks/floop10f.f | 17 - benchmarks/floop10f90.f90 | 16 - benchmarks/floop11.cpp | 504 - benchmarks/floop11f.f | 17 - benchmarks/floop11f90.f90 | 16 - benchmarks/floop12.cpp | 527 - benchmarks/floop12f.f | 17 - benchmarks/floop12f90.f90 | 16 - benchmarks/floop13.cpp | 527 - benchmarks/floop13f.f | 17 - benchmarks/floop13f90.f90 | 16 - benchmarks/floop14.cpp | 482 - benchmarks/floop14f.f | 17 - benchmarks/floop14f90.f90 | 16 - benchmarks/floop15.cpp | 482 - benchmarks/floop15f.f | 17 - benchmarks/floop15f90.f90 | 16 - benchmarks/floop16.cpp | 505 - benchmarks/floop16f.f | 17 - benchmarks/floop16f90.f90 | 16 - benchmarks/floop17.cpp | 504 - benchmarks/floop17f.f | 17 - benchmarks/floop17f90.f90 | 16 - benchmarks/floop18.cpp | 462 - benchmarks/floop18f.f | 17 - benchmarks/floop18f90.f90 | 16 - benchmarks/floop19.cpp | 484 - benchmarks/floop19f.f | 17 - benchmarks/floop19f90.f90 | 16 - benchmarks/floop1f.f | 17 - benchmarks/floop1f90.f90 | 16 - benchmarks/floop2.cpp | 439 - benchmarks/floop21.cpp | 504 - benchmarks/floop21f.f | 17 - benchmarks/floop21f90.f90 | 16 - benchmarks/floop22.cpp | 504 - benchmarks/floop22f.f | 17 - benchmarks/floop22f90.f90 | 16 - benchmarks/floop23.cpp | 526 - benchmarks/floop23f.f | 17 - benchmarks/floop23f90.f90 | 16 - benchmarks/floop24.cpp | 526 - benchmarks/floop24f.f | 17 - benchmarks/floop24f90.f90 | 16 - benchmarks/floop25.cpp | 507 - benchmarks/floop25f.f | 17 - benchmarks/floop25f90.f90 | 16 - benchmarks/floop2f.f | 17 - benchmarks/floop2f90.f90 | 16 - benchmarks/floop3.cpp | 439 - benchmarks/floop36.cpp | 438 - benchmarks/floop36f.f | 17 - benchmarks/floop36f90.f90 | 16 - benchmarks/floop3f.f | 17 - benchmarks/floop3f90.f90 | 16 - benchmarks/floop5.cpp | 460 - benchmarks/floop5f.f | 17 - benchmarks/floop5f90.f90 | 16 - benchmarks/floop6.cpp | 460 - benchmarks/floop6f.f | 17 - benchmarks/floop6f90.f90 | 16 - benchmarks/floop8.cpp | 439 - benchmarks/floop8f.f | 17 - benchmarks/floop8f90.f90 | 16 - benchmarks/floop9.cpp | 438 - benchmarks/floop9f.f | 17 - benchmarks/floop9f90.f90 | 16 - benchmarks/frek.m | 7 - benchmarks/haney.cpp | 207 - benchmarks/haneyf.f | 87 - benchmarks/hao-he-mark.cpp | 56 - benchmarks/hao-he.cpp | 150 - benchmarks/iter.cpp | 33 - benchmarks/kepler.cpp | 243 - benchmarks/loop1-bug.cpp | 22 - benchmarks/loop1.cpp | 438 - benchmarks/loop10.cpp | 483 - benchmarks/loop100.cpp | 508 - benchmarks/loop100f.f | 18 - benchmarks/loop100f90.f90 | 16 - benchmarks/loop10f.f | 17 - benchmarks/loop10f90.f90 | 16 - benchmarks/loop11.cpp | 504 - benchmarks/loop11f.f | 17 - benchmarks/loop11f90.f90 | 16 - benchmarks/loop12.cpp | 527 - benchmarks/loop12f.f | 17 - benchmarks/loop12f90.f90 | 16 - benchmarks/loop13.cpp | 527 - benchmarks/loop13f.f | 17 - benchmarks/loop13f90.f90 | 16 - benchmarks/loop14.cpp | 482 - benchmarks/loop14f.f | 17 - benchmarks/loop14f90.f90 | 16 - benchmarks/loop15.cpp | 482 - benchmarks/loop15f.f | 17 - benchmarks/loop15f90.f90 | 16 - benchmarks/loop16.cpp | 505 - benchmarks/loop16f.f | 17 - benchmarks/loop16f90.f90 | 16 - benchmarks/loop17.cpp | 504 - benchmarks/loop17f.f | 17 - benchmarks/loop17f90.f90 | 16 - benchmarks/loop18.cpp | 462 - benchmarks/loop18f.f | 17 - benchmarks/loop18f90.f90 | 16 - benchmarks/loop19.cpp | 484 - benchmarks/loop19f.f | 17 - benchmarks/loop19f90.f90 | 16 - benchmarks/loop1f.f | 17 - benchmarks/loop1f90.f90 | 16 - benchmarks/loop2.cpp | 439 - benchmarks/loop21.cpp | 504 - benchmarks/loop21f.f | 17 - benchmarks/loop21f90.f90 | 16 - benchmarks/loop22.cpp | 504 - benchmarks/loop22f.f | 17 - benchmarks/loop22f90.f90 | 16 - benchmarks/loop23.cpp | 526 - benchmarks/loop23f.f | 17 - benchmarks/loop23f90.f90 | 16 - benchmarks/loop24.cpp | 526 - benchmarks/loop24f.f | 17 - benchmarks/loop24f90.f90 | 16 - benchmarks/loop25.cpp | 507 - benchmarks/loop25f.f | 17 - benchmarks/loop25f90.f90 | 16 - benchmarks/loop2f.f | 17 - benchmarks/loop2f90.f90 | 16 - benchmarks/loop3.cpp | 439 - benchmarks/loop36.cpp | 438 - benchmarks/loop36f.f | 17 - benchmarks/loop36f90.f90 | 16 - benchmarks/loop3f.f | 17 - benchmarks/loop3f90.f90 | 16 - benchmarks/loop4.cpp | 240 - benchmarks/loop4f.f | 12 - benchmarks/loop4f90.f90 | 10 - benchmarks/loop5.cpp | 460 - benchmarks/loop5f.f | 17 - benchmarks/loop5f90.f90 | 16 - benchmarks/loop6.cpp | 460 - benchmarks/loop6f.f | 17 - benchmarks/loop6f90.f90 | 16 - benchmarks/loop8.cpp | 439 - benchmarks/loop8f.f | 17 - benchmarks/loop8f90.f90 | 16 - benchmarks/loop9.cpp | 438 - benchmarks/loop9f.f | 17 - benchmarks/loop9f90.f90 | 16 - benchmarks/loops.data | 147 - benchmarks/loopstruct.cpp | 172 - benchmarks/looptest.cpp | 707 - benchmarks/makelogo.cpp | 114 - benchmarks/makeloops.cpp | 791 - benchmarks/plot_benchmarks.m.in | 57 - benchmarks/qcd.cpp | 244 - benchmarks/qcd.txt | 19 - benchmarks/qcdf.f | 77 - benchmarks/quinlan.cpp | 17 - benchmarks/stencil.cpp | 453 - benchmarks/stencilf.f | 32 - benchmarks/stencilf2.f | 48 - benchmarks/stencilf90.f90 | 19 - benchmarks/stenciln.cpp | 61 - benchmarks/tiny3.cpp | 128 - benchmarks/tinydaxpy.cpp | 124 - blitz/CMakeLists.txt | 24 +- blitz/config.cmake.h.in | 168 +- cmake/CheckCXXFeatures.cmake | 116 - compiler/LEGAL | 8 - compiler/README | 22 - compiler/bool.cpp | 24 - compiler/bzconfig | 377 - compiler/climits.cpp | 10 - compiler/cmthscop.cpp | 20 - compiler/complex.cpp | 15 - compiler/compmath.cpp | 38 - compiler/constcst.cpp | 16 - compiler/cstd.cpp | 7 - compiler/default.cpp | 16 - compiler/dynamic.cpp | 31 - compiler/elabbase.cpp | 45 - compiler/elabret.cpp | 39 - compiler/enumcmp2.cpp | 27 - compiler/enumcomp.cpp | 27 - compiler/except.cpp | 27 - compiler/explicit.cpp | 16 - compiler/fullspec.cpp | 22 - compiler/getruse.cpp | 8 - compiler/ieeemath.cpp | 52 - compiler/instant.cpp | 16 - compiler/mathscop.cpp | 19 - compiler/membcnst.cpp | 14 - compiler/membtmp2.cpp | 27 - compiler/membtmpl.cpp | 26 - compiler/mutable.cpp | 35 - compiler/namespac.cpp | 53 - compiler/nontype.cpp | 20 - compiler/numlimit.cpp | 14 - compiler/numtrait.cpp | 37 - compiler/oldfor.cpp | 16 - compiler/partial.cpp | 30 - compiler/porder.cpp | 38 - compiler/promote.cpp | 30 - compiler/reinterp.cpp | 38 - compiler/restric2.cpp | 24 - compiler/restrict.cpp | 24 - compiler/rtti.cpp | 31 - compiler/statcast.cpp | 34 - compiler/std.cpp | 13 - compiler/stl.cpp | 27 - compiler/sysvmath.cpp | 38 - compiler/tempkey.cpp | 20 - compiler/template.cpp | 21 - compiler/tempqmt.cpp | 29 - compiler/tempqual.cpp | 23 - compiler/temptemp.cpp | 24 - compiler/typename.cpp | 12 - compiler/vac.icc | 8 - compiler/valarray.cpp | 14 - doc/CMakeLists.txt | 91 - doc/about.texi | 15 - doc/arrays-ctors.texi | 302 - doc/arrays-debug.texi | 34 - doc/arrays-expr.texi | 1506 -- doc/arrays-globals.texi | 166 - doc/arrays-indirect.texi | 258 - doc/arrays-intro.texi | 166 - doc/arrays-io.texi | 79 - doc/arrays-members.texi | 555 - doc/arrays-multi.texi | 193 - doc/arrays-slicing.texi | 300 - doc/arrays-stencils.texi | 569 - doc/arrays-storage.texi | 345 - doc/arrays-types.texi | 22 - doc/arrays-usertype.texi | 42 - doc/blitz.gif | Bin 3875 -> 0 bytes doc/blitz.texi | 295 - doc/blitztiny.jpg | Bin 6006 -> 0 bytes doc/compiling.texi | 75 - doc/constants.texi | 0 doc/copyright.texi | 14 - doc/download.texi | 12 - doc/doxygen/CMakeLists.txt | 38 - doc/doxygen/Doxyfile.in | 2513 --- doc/examples/CMakeLists.txt | 59 - doc/examples/cast.cpp | 26 - doc/examples/debug.cpp | 15 - doc/examples/dump.cpp | 11 - doc/examples/fixed-class.cpp | 58 - doc/examples/fixed-point.h | 42 - doc/examples/fixed.cpp | 20 - doc/examples/io.cpp | 55 - doc/examples/io.data | 22 - doc/examples/makefile.example | 32 - doc/examples/outer.cpp | 22 - doc/examples/output.cpp | 21 - doc/examples/range.cpp | 19 - doc/examples/simple.cpp | 25 - doc/examples/slicing.cpp | 31 - doc/examples/storage.cpp | 63 - doc/examples/strideslice.cpp | 16 - doc/examples/xor.cpp | 13 - doc/faq.texi | 253 - doc/help.texi | 53 - doc/indirect.eps | 283 - doc/indirect.fig | 165 - doc/indirect.gif | Bin 8792 -> 0 bytes doc/indirect.pdf | Bin 4440 -> 0 bytes doc/indirect.txt | 1 - doc/install.texi | 143 - doc/legal.texi | 5 - doc/makedatestring | 2 - doc/numinquire.texi | 237 - doc/parallel.texi | 41 - doc/platforms.texi | 180 - doc/random.texi | 340 - doc/sinsoid.eps | 28062 ---------------------------- doc/sinsoid.gif | Bin 8594 -> 0 bytes doc/sinsoid.pdf | Bin 108493 -> 0 bytes doc/sinsoid.txt | 1 - doc/slice.eps | 277 - doc/slice.fig | 137 - doc/slice.gif | Bin 5178 -> 0 bytes doc/slice.pdf | Bin 2782 -> 0 bytes doc/slice.txt | 1 - doc/stamp-vti | 4 - doc/stencils/CMakeLists.txt | 44 - doc/stencils/ParseArguments.cmake | 30 - doc/stencils/dump-stencil.cpp | 187 - doc/strideslice.eps | 290 - doc/strideslice.fig | 126 - doc/strideslice.gif | Bin 7240 -> 0 bytes doc/strideslice.pdf | Bin 2406 -> 0 bytes doc/strideslice.txt | 1 - doc/tau.texi | 0 doc/tensor1.eps | 273 - doc/tensor1.fig | 171 - doc/tensor1.gif | Bin 1940 -> 0 bytes doc/tensor1.pdf | Bin 1836 -> 0 bytes doc/tensor1.txt | 1 - doc/tinymatrix.texi | 0 doc/tinyvector.texi | 181 - doc/tuning.texi | 0 doc/version.texi | 4 - examples/CMakeLists.txt | 23 - examples/array.cpp | 53 - examples/cartesian.cpp | 29 - examples/cast.cpp | 29 - examples/cfd.cpp | 465 - examples/complex-test.cpp | 49 - examples/convolve.cpp | 21 - examples/curldiv.cpp | 38 - examples/deriv.cpp | 75 - examples/diff.cpp | 86 - examples/erf.cpp | 94 - examples/fixed.cpp | 86 - examples/indirect.cpp | 157 - examples/io.cpp | 55 - examples/iter.cpp | 28 - examples/matmult.cpp | 130 - examples/nested.cpp | 28 - examples/numinquire.cpp | 55 - examples/outer.cpp | 27 - examples/pauli.cpp | 34 - examples/pick.cpp | 30 - examples/polymorph.cpp | 26 - examples/prettyprint.cpp | 32 - examples/profile.cpp | 38 - examples/qcd.cpp | 253 - examples/rand2.cpp | 66 - examples/random.cpp | 54 - examples/rangexpr.cpp | 21 - examples/reduce.cpp | 104 - examples/simple.cpp | 19 - examples/slicing.cpp | 42 - examples/stencil.cpp | 75 - examples/stencil2.cpp | 50 - examples/stencil3.cpp | 64 - examples/stencil4.f | 24 - examples/stencilet.cpp | 21 - examples/storage.cpp | 66 - examples/tiny.cpp | 71 - examples/tiny2.cpp | 29 - examples/tiny3.cpp | 21 - examples/transform.cpp | 44 - examples/useret.cpp | 41 - examples/where.cpp | 37 - examples/whitt.cpp | 35 - random/CMakeLists.txt | 5 - random/F.h | 92 - random/beta.h | 315 - random/chisquare.h | 65 - random/default.h | 125 - random/discrete-uniform.h | 70 - random/exponential.h | 78 - random/gamma.h | 316 - random/mt.h | 340 - random/mtparam.cc | 48 - random/normal.h | 120 - random/uniform.h | 431 - src/globals.cpp | 2 +- 414 files changed, 99 insertions(+), 81022 deletions(-) delete mode 100644 benchmarks/CMakeLists.txt delete mode 100644 benchmarks/acou3d.cpp delete mode 100644 benchmarks/acou3db1.cpp delete mode 100644 benchmarks/acou3db2.cpp delete mode 100644 benchmarks/acou3db3.cpp delete mode 100644 benchmarks/acou3db4.cpp delete mode 100644 benchmarks/acou3df.f delete mode 100644 benchmarks/acou3df2.f delete mode 100644 benchmarks/acou3df90.f90 delete mode 100644 benchmarks/acou3df902.f90 delete mode 100644 benchmarks/acoustic.cpp delete mode 100644 benchmarks/acousticf.f delete mode 100644 benchmarks/acousticf2.f delete mode 100644 benchmarks/acousticf90.f90 delete mode 100644 benchmarks/acousticf902.f90 delete mode 100644 benchmarks/arrdaxpy.cpp delete mode 100644 benchmarks/arrdaxpyf.f delete mode 100644 benchmarks/arrexpr1.cpp delete mode 100644 benchmarks/arrexpr1.m delete mode 100644 benchmarks/cfd.cpp delete mode 100644 benchmarks/cfdf.f delete mode 100644 benchmarks/cfortran.h delete mode 100644 benchmarks/chunky.cpp delete mode 100644 benchmarks/compiletime.cpp delete mode 100644 benchmarks/ctime-results delete mode 100644 benchmarks/ctime1.cpp delete mode 100644 benchmarks/ctime1v.cpp delete mode 100644 benchmarks/ctime2.cpp delete mode 100644 benchmarks/ctime2v.cpp delete mode 100644 benchmarks/ctime3.cpp delete mode 100644 benchmarks/ctime3v.cpp delete mode 100644 benchmarks/ctime4.cpp delete mode 100644 benchmarks/ctime4v.cpp delete mode 100644 benchmarks/ctime5.cpp delete mode 100644 benchmarks/ctime5c.cpp delete mode 100644 benchmarks/ctime5v.cpp delete mode 100644 benchmarks/daxpy.cpp delete mode 100644 benchmarks/daxpy2.cpp delete mode 100644 benchmarks/daxpyf90-2.f90 delete mode 100644 benchmarks/daxpyf90.f90 delete mode 100644 benchmarks/dot.cpp delete mode 100644 benchmarks/dot2.cpp delete mode 100644 benchmarks/echof2-back.f delete mode 100644 benchmarks/echotune.cpp delete mode 100644 benchmarks/echotune.m delete mode 100644 benchmarks/echotunef.f delete mode 100644 benchmarks/fdaxpy.f delete mode 100644 benchmarks/fidaxpy.f delete mode 100644 benchmarks/floop1.cpp delete mode 100644 benchmarks/floop10.cpp delete mode 100644 benchmarks/floop10f.f delete mode 100644 benchmarks/floop10f90.f90 delete mode 100644 benchmarks/floop11.cpp delete mode 100644 benchmarks/floop11f.f delete mode 100644 benchmarks/floop11f90.f90 delete mode 100644 benchmarks/floop12.cpp delete mode 100644 benchmarks/floop12f.f delete mode 100644 benchmarks/floop12f90.f90 delete mode 100644 benchmarks/floop13.cpp delete mode 100644 benchmarks/floop13f.f delete mode 100644 benchmarks/floop13f90.f90 delete mode 100644 benchmarks/floop14.cpp delete mode 100644 benchmarks/floop14f.f delete mode 100644 benchmarks/floop14f90.f90 delete mode 100644 benchmarks/floop15.cpp delete mode 100644 benchmarks/floop15f.f delete mode 100644 benchmarks/floop15f90.f90 delete mode 100644 benchmarks/floop16.cpp delete mode 100644 benchmarks/floop16f.f delete mode 100644 benchmarks/floop16f90.f90 delete mode 100644 benchmarks/floop17.cpp delete mode 100644 benchmarks/floop17f.f delete mode 100644 benchmarks/floop17f90.f90 delete mode 100644 benchmarks/floop18.cpp delete mode 100644 benchmarks/floop18f.f delete mode 100644 benchmarks/floop18f90.f90 delete mode 100644 benchmarks/floop19.cpp delete mode 100644 benchmarks/floop19f.f delete mode 100644 benchmarks/floop19f90.f90 delete mode 100644 benchmarks/floop1f.f delete mode 100644 benchmarks/floop1f90.f90 delete mode 100644 benchmarks/floop2.cpp delete mode 100644 benchmarks/floop21.cpp delete mode 100644 benchmarks/floop21f.f delete mode 100644 benchmarks/floop21f90.f90 delete mode 100644 benchmarks/floop22.cpp delete mode 100644 benchmarks/floop22f.f delete mode 100644 benchmarks/floop22f90.f90 delete mode 100644 benchmarks/floop23.cpp delete mode 100644 benchmarks/floop23f.f delete mode 100644 benchmarks/floop23f90.f90 delete mode 100644 benchmarks/floop24.cpp delete mode 100644 benchmarks/floop24f.f delete mode 100644 benchmarks/floop24f90.f90 delete mode 100644 benchmarks/floop25.cpp delete mode 100644 benchmarks/floop25f.f delete mode 100644 benchmarks/floop25f90.f90 delete mode 100644 benchmarks/floop2f.f delete mode 100644 benchmarks/floop2f90.f90 delete mode 100644 benchmarks/floop3.cpp delete mode 100644 benchmarks/floop36.cpp delete mode 100644 benchmarks/floop36f.f delete mode 100644 benchmarks/floop36f90.f90 delete mode 100644 benchmarks/floop3f.f delete mode 100644 benchmarks/floop3f90.f90 delete mode 100644 benchmarks/floop5.cpp delete mode 100644 benchmarks/floop5f.f delete mode 100644 benchmarks/floop5f90.f90 delete mode 100644 benchmarks/floop6.cpp delete mode 100644 benchmarks/floop6f.f delete mode 100644 benchmarks/floop6f90.f90 delete mode 100644 benchmarks/floop8.cpp delete mode 100644 benchmarks/floop8f.f delete mode 100644 benchmarks/floop8f90.f90 delete mode 100644 benchmarks/floop9.cpp delete mode 100644 benchmarks/floop9f.f delete mode 100644 benchmarks/floop9f90.f90 delete mode 100644 benchmarks/frek.m delete mode 100644 benchmarks/haney.cpp delete mode 100644 benchmarks/haneyf.f delete mode 100644 benchmarks/hao-he-mark.cpp delete mode 100644 benchmarks/hao-he.cpp delete mode 100644 benchmarks/iter.cpp delete mode 100644 benchmarks/kepler.cpp delete mode 100644 benchmarks/loop1-bug.cpp delete mode 100644 benchmarks/loop1.cpp delete mode 100644 benchmarks/loop10.cpp delete mode 100644 benchmarks/loop100.cpp delete mode 100644 benchmarks/loop100f.f delete mode 100644 benchmarks/loop100f90.f90 delete mode 100644 benchmarks/loop10f.f delete mode 100644 benchmarks/loop10f90.f90 delete mode 100644 benchmarks/loop11.cpp delete mode 100644 benchmarks/loop11f.f delete mode 100644 benchmarks/loop11f90.f90 delete mode 100644 benchmarks/loop12.cpp delete mode 100644 benchmarks/loop12f.f delete mode 100644 benchmarks/loop12f90.f90 delete mode 100644 benchmarks/loop13.cpp delete mode 100644 benchmarks/loop13f.f delete mode 100644 benchmarks/loop13f90.f90 delete mode 100644 benchmarks/loop14.cpp delete mode 100644 benchmarks/loop14f.f delete mode 100644 benchmarks/loop14f90.f90 delete mode 100644 benchmarks/loop15.cpp delete mode 100644 benchmarks/loop15f.f delete mode 100644 benchmarks/loop15f90.f90 delete mode 100644 benchmarks/loop16.cpp delete mode 100644 benchmarks/loop16f.f delete mode 100644 benchmarks/loop16f90.f90 delete mode 100644 benchmarks/loop17.cpp delete mode 100644 benchmarks/loop17f.f delete mode 100644 benchmarks/loop17f90.f90 delete mode 100644 benchmarks/loop18.cpp delete mode 100644 benchmarks/loop18f.f delete mode 100644 benchmarks/loop18f90.f90 delete mode 100644 benchmarks/loop19.cpp delete mode 100644 benchmarks/loop19f.f delete mode 100644 benchmarks/loop19f90.f90 delete mode 100644 benchmarks/loop1f.f delete mode 100644 benchmarks/loop1f90.f90 delete mode 100644 benchmarks/loop2.cpp delete mode 100644 benchmarks/loop21.cpp delete mode 100644 benchmarks/loop21f.f delete mode 100644 benchmarks/loop21f90.f90 delete mode 100644 benchmarks/loop22.cpp delete mode 100644 benchmarks/loop22f.f delete mode 100644 benchmarks/loop22f90.f90 delete mode 100644 benchmarks/loop23.cpp delete mode 100644 benchmarks/loop23f.f delete mode 100644 benchmarks/loop23f90.f90 delete mode 100644 benchmarks/loop24.cpp delete mode 100644 benchmarks/loop24f.f delete mode 100644 benchmarks/loop24f90.f90 delete mode 100644 benchmarks/loop25.cpp delete mode 100644 benchmarks/loop25f.f delete mode 100644 benchmarks/loop25f90.f90 delete mode 100644 benchmarks/loop2f.f delete mode 100644 benchmarks/loop2f90.f90 delete mode 100644 benchmarks/loop3.cpp delete mode 100644 benchmarks/loop36.cpp delete mode 100644 benchmarks/loop36f.f delete mode 100644 benchmarks/loop36f90.f90 delete mode 100644 benchmarks/loop3f.f delete mode 100644 benchmarks/loop3f90.f90 delete mode 100644 benchmarks/loop4.cpp delete mode 100644 benchmarks/loop4f.f delete mode 100644 benchmarks/loop4f90.f90 delete mode 100644 benchmarks/loop5.cpp delete mode 100644 benchmarks/loop5f.f delete mode 100644 benchmarks/loop5f90.f90 delete mode 100644 benchmarks/loop6.cpp delete mode 100644 benchmarks/loop6f.f delete mode 100644 benchmarks/loop6f90.f90 delete mode 100644 benchmarks/loop8.cpp delete mode 100644 benchmarks/loop8f.f delete mode 100644 benchmarks/loop8f90.f90 delete mode 100644 benchmarks/loop9.cpp delete mode 100644 benchmarks/loop9f.f delete mode 100644 benchmarks/loop9f90.f90 delete mode 100644 benchmarks/loops.data delete mode 100644 benchmarks/loopstruct.cpp delete mode 100644 benchmarks/looptest.cpp delete mode 100644 benchmarks/makelogo.cpp delete mode 100644 benchmarks/makeloops.cpp delete mode 100644 benchmarks/plot_benchmarks.m.in delete mode 100644 benchmarks/qcd.cpp delete mode 100644 benchmarks/qcd.txt delete mode 100644 benchmarks/qcdf.f delete mode 100644 benchmarks/quinlan.cpp delete mode 100644 benchmarks/stencil.cpp delete mode 100644 benchmarks/stencilf.f delete mode 100644 benchmarks/stencilf2.f delete mode 100644 benchmarks/stencilf90.f90 delete mode 100644 benchmarks/stenciln.cpp delete mode 100644 benchmarks/tiny3.cpp delete mode 100644 benchmarks/tinydaxpy.cpp delete mode 100644 cmake/CheckCXXFeatures.cmake delete mode 100644 compiler/LEGAL delete mode 100644 compiler/README delete mode 100644 compiler/bool.cpp delete mode 100755 compiler/bzconfig delete mode 100644 compiler/climits.cpp delete mode 100644 compiler/cmthscop.cpp delete mode 100644 compiler/complex.cpp delete mode 100644 compiler/compmath.cpp delete mode 100644 compiler/constcst.cpp delete mode 100644 compiler/cstd.cpp delete mode 100644 compiler/default.cpp delete mode 100644 compiler/dynamic.cpp delete mode 100644 compiler/elabbase.cpp delete mode 100644 compiler/elabret.cpp delete mode 100644 compiler/enumcmp2.cpp delete mode 100644 compiler/enumcomp.cpp delete mode 100644 compiler/except.cpp delete mode 100644 compiler/explicit.cpp delete mode 100644 compiler/fullspec.cpp delete mode 100644 compiler/getruse.cpp delete mode 100644 compiler/ieeemath.cpp delete mode 100644 compiler/instant.cpp delete mode 100644 compiler/mathscop.cpp delete mode 100644 compiler/membcnst.cpp delete mode 100644 compiler/membtmp2.cpp delete mode 100644 compiler/membtmpl.cpp delete mode 100644 compiler/mutable.cpp delete mode 100644 compiler/namespac.cpp delete mode 100644 compiler/nontype.cpp delete mode 100644 compiler/numlimit.cpp delete mode 100644 compiler/numtrait.cpp delete mode 100644 compiler/oldfor.cpp delete mode 100644 compiler/partial.cpp delete mode 100644 compiler/porder.cpp delete mode 100644 compiler/promote.cpp delete mode 100644 compiler/reinterp.cpp delete mode 100644 compiler/restric2.cpp delete mode 100644 compiler/restrict.cpp delete mode 100644 compiler/rtti.cpp delete mode 100644 compiler/statcast.cpp delete mode 100644 compiler/std.cpp delete mode 100644 compiler/stl.cpp delete mode 100644 compiler/sysvmath.cpp delete mode 100644 compiler/tempkey.cpp delete mode 100644 compiler/template.cpp delete mode 100644 compiler/tempqmt.cpp delete mode 100644 compiler/tempqual.cpp delete mode 100644 compiler/temptemp.cpp delete mode 100644 compiler/typename.cpp delete mode 100644 compiler/vac.icc delete mode 100644 compiler/valarray.cpp delete mode 100644 doc/CMakeLists.txt delete mode 100644 doc/about.texi delete mode 100644 doc/arrays-ctors.texi delete mode 100644 doc/arrays-debug.texi delete mode 100644 doc/arrays-expr.texi delete mode 100644 doc/arrays-globals.texi delete mode 100644 doc/arrays-indirect.texi delete mode 100644 doc/arrays-intro.texi delete mode 100644 doc/arrays-io.texi delete mode 100644 doc/arrays-members.texi delete mode 100644 doc/arrays-multi.texi delete mode 100644 doc/arrays-slicing.texi delete mode 100644 doc/arrays-stencils.texi delete mode 100644 doc/arrays-storage.texi delete mode 100644 doc/arrays-types.texi delete mode 100644 doc/arrays-usertype.texi delete mode 100644 doc/blitz.gif delete mode 100644 doc/blitz.texi delete mode 100644 doc/blitztiny.jpg delete mode 100644 doc/compiling.texi delete mode 100644 doc/constants.texi delete mode 100644 doc/copyright.texi delete mode 100644 doc/download.texi delete mode 100644 doc/doxygen/CMakeLists.txt delete mode 100644 doc/doxygen/Doxyfile.in delete mode 100644 doc/examples/CMakeLists.txt delete mode 100644 doc/examples/cast.cpp delete mode 100644 doc/examples/debug.cpp delete mode 100644 doc/examples/dump.cpp delete mode 100644 doc/examples/fixed-class.cpp delete mode 100644 doc/examples/fixed-point.h delete mode 100644 doc/examples/fixed.cpp delete mode 100644 doc/examples/io.cpp delete mode 100644 doc/examples/io.data delete mode 100644 doc/examples/makefile.example delete mode 100644 doc/examples/outer.cpp delete mode 100644 doc/examples/output.cpp delete mode 100644 doc/examples/range.cpp delete mode 100644 doc/examples/simple.cpp delete mode 100644 doc/examples/slicing.cpp delete mode 100644 doc/examples/storage.cpp delete mode 100644 doc/examples/strideslice.cpp delete mode 100644 doc/examples/xor.cpp delete mode 100644 doc/faq.texi delete mode 100644 doc/help.texi delete mode 100644 doc/indirect.eps delete mode 100644 doc/indirect.fig delete mode 100644 doc/indirect.gif delete mode 100644 doc/indirect.pdf delete mode 100644 doc/indirect.txt delete mode 100644 doc/install.texi delete mode 100644 doc/legal.texi delete mode 100755 doc/makedatestring delete mode 100644 doc/numinquire.texi delete mode 100644 doc/parallel.texi delete mode 100644 doc/platforms.texi delete mode 100644 doc/random.texi delete mode 100644 doc/sinsoid.eps delete mode 100644 doc/sinsoid.gif delete mode 100644 doc/sinsoid.pdf delete mode 100644 doc/sinsoid.txt delete mode 100644 doc/slice.eps delete mode 100644 doc/slice.fig delete mode 100644 doc/slice.gif delete mode 100644 doc/slice.pdf delete mode 100644 doc/slice.txt delete mode 100644 doc/stamp-vti delete mode 100644 doc/stencils/CMakeLists.txt delete mode 100644 doc/stencils/ParseArguments.cmake delete mode 100644 doc/stencils/dump-stencil.cpp delete mode 100644 doc/strideslice.eps delete mode 100644 doc/strideslice.fig delete mode 100644 doc/strideslice.gif delete mode 100644 doc/strideslice.pdf delete mode 100644 doc/strideslice.txt delete mode 100644 doc/tau.texi delete mode 100644 doc/tensor1.eps delete mode 100644 doc/tensor1.fig delete mode 100644 doc/tensor1.gif delete mode 100644 doc/tensor1.pdf delete mode 100644 doc/tensor1.txt delete mode 100644 doc/tinymatrix.texi delete mode 100644 doc/tinyvector.texi delete mode 100644 doc/tuning.texi delete mode 100644 doc/version.texi delete mode 100644 examples/CMakeLists.txt delete mode 100644 examples/array.cpp delete mode 100644 examples/cartesian.cpp delete mode 100644 examples/cast.cpp delete mode 100644 examples/cfd.cpp delete mode 100644 examples/complex-test.cpp delete mode 100644 examples/convolve.cpp delete mode 100644 examples/curldiv.cpp delete mode 100644 examples/deriv.cpp delete mode 100644 examples/diff.cpp delete mode 100644 examples/erf.cpp delete mode 100644 examples/fixed.cpp delete mode 100644 examples/indirect.cpp delete mode 100644 examples/io.cpp delete mode 100644 examples/iter.cpp delete mode 100644 examples/matmult.cpp delete mode 100644 examples/nested.cpp delete mode 100644 examples/numinquire.cpp delete mode 100644 examples/outer.cpp delete mode 100644 examples/pauli.cpp delete mode 100644 examples/pick.cpp delete mode 100644 examples/polymorph.cpp delete mode 100644 examples/prettyprint.cpp delete mode 100644 examples/profile.cpp delete mode 100644 examples/qcd.cpp delete mode 100644 examples/rand2.cpp delete mode 100644 examples/random.cpp delete mode 100644 examples/rangexpr.cpp delete mode 100644 examples/reduce.cpp delete mode 100644 examples/simple.cpp delete mode 100644 examples/slicing.cpp delete mode 100644 examples/stencil.cpp delete mode 100644 examples/stencil2.cpp delete mode 100644 examples/stencil3.cpp delete mode 100644 examples/stencil4.f delete mode 100644 examples/stencilet.cpp delete mode 100644 examples/storage.cpp delete mode 100644 examples/tiny.cpp delete mode 100644 examples/tiny2.cpp delete mode 100644 examples/tiny3.cpp delete mode 100644 examples/transform.cpp delete mode 100644 examples/useret.cpp delete mode 100644 examples/where.cpp delete mode 100644 examples/whitt.cpp delete mode 100644 random/CMakeLists.txt delete mode 100644 random/F.h delete mode 100644 random/beta.h delete mode 100644 random/chisquare.h delete mode 100644 random/default.h delete mode 100644 random/discrete-uniform.h delete mode 100644 random/exponential.h delete mode 100644 random/gamma.h delete mode 100644 random/mt.h delete mode 100644 random/mtparam.cc delete mode 100644 random/normal.h delete mode 100644 random/uniform.h diff --git a/CMakeLists.txt b/CMakeLists.txt index d4ac2629..d041b58d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,8 +15,7 @@ set(BZ_PACKAGE_STRING "${CMAKE_PROJECT_NAME} ${blitz_MAJOR}.${blitz_MINOR}") project(blitz VERSION ${blitz_VERSION} LANGUAGES CXX) -option(BUILD_DOC "Build documentation" OFF) -option(BUILD_TESTING "Build tests, examples and benchmarks" OFF) +option(BUILD_TESTING "Build tests" OFF) # Packaging @@ -43,16 +42,10 @@ add_custom_target(generated-headers ALL) set(PKGCONFIG_LIBS -lblitz) add_subdirectory(blitz) -add_subdirectory(random) add_subdirectory(src) -if (BUILD_DOC) - add_subdirectory(doc) -endif() if (BUILD_TESTING) add_subdirectory(testsuite) - add_subdirectory(examples) - add_subdirectory(benchmarks) endif() # Add a custom target to mimic autotools "make lib" diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt deleted file mode 100644 index 329b378c..00000000 --- a/benchmarks/CMakeLists.txt +++ /dev/null @@ -1,33 +0,0 @@ -#set(OTHER_BENCH tinydaxpy hao-he iter) -set(OTHER_BENCH tinydaxpy iter) - -option(FORTRAN_BENCHMARKS "Enable Fortran benchmarks" OFF) -if (FORTRAN_BENCHMARKS) - enable_language(Fortran OPTIONAL) - set(daxpy_SOURCES daxpy.cpp fdaxpy.f fidaxpy.f) - set(stencil_SOURCES stencil.cpp stencilf.f stencilf2.f) - set(acoustic_SOURCES acoustic.cpp acousticf.f acousticf2.f) - set(acou3d_SOURCES acou3db1.cpp acou3db2.cpp acou3db3.cpp acou3db4.cpp \ acou3d.cpp acou3df.f acou3df2.f ) - set(LOOP_KERNELS loop1 loop2 loop3 loop5 loop6 loop8 loop9 loop10 loop11 - loop12 loop13 loop14 loop15 loop16 loop17 loop18 loop19 loop21 loop22 - loop23 loop24 loop25 loop36 loop100 floop1 floop2 floop3 floop5 floop6 floop8 floop9 floop10 floop11 - floop12 floop13 floop14 floop15 floop16 floop17 floop18 floop19 floop21 floop22 - floop23 floop24 floop25 floop36) - set(OTHER_BENCH ${OTHER_BENCH} arrdaxpy haney qcd stencil cfd) - if (CMAKE_Fortran_COMPILER_SUPPORTS_F90) - set(daxpy_SOURCES ${daxpy_SOURCES} daxpyf90.f90) - set(stencil_SOURCES ${stencil_SOURCES} stencilf90.f90) - set(acoustic_SOURCES ${acoustic_SOURCES} acousticf90.f90 acousticf902.f90) - set(acou3d_SOURCES ${acou3d_SOURCES} acou3df90.f90 acou3df902.f90) - endif() -endif() - -add_custom_target(benchmark) - -TESTS(benchmark ${LOOP_KERNELS} ${OTHER_BENCH}) - -add_custom_target(check-benchmarks - DEPENDS blitz benchmark - COMMAND ${CMAKE_BUILD_TOOL} test) - -set(COMPILE_TIME_BENCHMARKS ctime1 ctime2 ctime3 ctime4 ctime5 ctime1v ctime2v ctime3v ctime4v ctime5v) diff --git a/benchmarks/acou3d.cpp b/benchmarks/acou3d.cpp deleted file mode 100644 index 2cb251b8..00000000 --- a/benchmarks/acou3d.cpp +++ /dev/null @@ -1,211 +0,0 @@ -#define BZ_DISABLE_RESTRICT - -#include -#include -#include - -#ifdef BZ_HAVE_STD - #include -#else - #include -#endif - -using namespace blitz; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define acoustic3d_f90 acoustic3d_f90_ - #define acoustic3d_f77 acoustic3d_f77_ - #define acoustic3d_f90tuned acoustic3d_f90tuned_ - #define acoustic3d_f77tuned acoustic3d_f77tuned_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define acoustic3d_f90 acoustic3d_f90__ - #define acoustic3d_f77 acoustic3d_f77__ - #define acoustic3d_f90tuned acoustic3d_f90tuned__ - #define acoustic3d_f77tuned acoustic3d_f77tuned__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define acoustic3d_f90 ACOUSTIC3D_F90 - #define acoustic3d_f77 ACOUSTIC3D_F77 - #define acoustic3d_f90tuned ACOUSTIC3D_F90TUNED - #define acoustic3d_f77tuned ACOUSTIC3D_F77TUNED -#endif - -extern "C" { -void acoustic3d_f90(int& N, int& niters, float& check); -void acoustic3d_f77(int& N, int& niters, float& check); -void acoustic3d_f90tuned(int& N, int& niters, float& check); -void acoustic3d_f77tuned(int& N, int& niters, float& check); -} - -float acoustic3D_BlitzRaw(int N, int niters); -float acoustic3D_BlitzInterlacedCycled(int N, int niters); -float acoustic3D_BlitzCycled(int N, int niters); -float acoustic3D_BlitzStencil(int N, int niters); - - -void output_data(const char* type, const Timer& t, float check, double Gflops) -{ - cout << type << ": " << t.elapsed() - << t.indep_var() << " check = " - << check << " Gflop/" << t.indep_var() << " = " - << (Gflops/t.elapsed()) - << endl << endl; -} - -int main() -{ - Timer timer; - int N = 112; - int niters = 210; // Must be divisible by 3 for tuned Fortran versions - float check; - - cout << "Acoustic 3D Benchmark" << endl << endl; - - double Gflops = (N-2)*(N-2)*(N-2) * 11.0 * niters / 1.0e+9; - - generateFastTraversalOrder(TinyVector(N-2,N-2)); - - timer.start(); - check = acoustic3D_BlitzRaw(N, niters); - timer.stop(); - output_data("Blitz++ (raw)", timer, check, Gflops); - - timer.start(); - check = acoustic3D_BlitzStencil(N, niters); - timer.stop(); - output_data("Blitz++ (stencil)", timer, check, Gflops); - -#if 0 - timer.start(); - check = acoustic3D_BlitzInterlaced(N, niters, c); - timer.stop(); - output_data("Blitz++ (interlaced)", timer, check, Gflops); -#endif - - timer.start(); - check = acoustic3D_BlitzCycled(N, niters); - timer.stop(); - output_data("Blitz++ (cycled)", timer, check, Gflops); - - timer.start(); - check = acoustic3D_BlitzInterlacedCycled(N, niters); - timer.stop(); - output_data("Blitz++ (interlaced & cycled)", timer, check, Gflops); - -#ifdef FORTRAN_90 - timer.start(); - acoustic3d_f90(N, niters, check); - timer.stop(); - output_data("Fortran 90", timer, check, Gflops); - - timer.start(); - acoustic3d_f90tuned(N, niters, check); - timer.stop(); - output_data("Fortran 90 (tuned)", timer, check, Gflops); -#endif - - timer.start(); - acoustic3d_f77(N, niters, check); - timer.stop(); - output_data("Fortran 77", timer, check, Gflops); - - timer.start(); - acoustic3d_f77tuned(N, niters, check); - timer.stop(); - output_data("Fortran 77 (tuned)", timer, check, Gflops); - - return 0; -} - -void setupInitialConditions(Array& P1, Array& P2, - Array& P3, Array& c, int N); - -void snapshot(const Array& P, const Array& c); - -void checkArray(const Array& A, int N); - -void setupInitialConditions(Array& P1, Array& P2, - Array& P3, Array& c, int N) -{ - // Set the velocity field - c(Range(0,N/2-1), Range::all(), Range::all()) = 0.05; - c(Range(N/2,N-1), Range::all(), Range::all()) = 0.3; - - double Nfp = static_cast(N); - int cavityLeft = static_cast(3*Nfp/7-1); - int cavityRight = static_cast(4*Nfp/7-1); - int cavityFront = static_cast(3*Nfp/7-1); - int cavityBack = static_cast(4*Nfp/7-1); - int cavityTop = static_cast(5*Nfp/7-1); - int cavityBottom = static_cast(6*Nfp/7-1); - - c(Range(cavityTop,cavityBottom),Range(cavityLeft,cavityRight), - Range(cavityFront,cavityBack)) = 0.02; - - int cavityTop2 = static_cast(1*Nfp/7-1); - int cavityBottom2 = static_cast(2*Nfp/7-1); - c(Range(cavityTop2,cavityBottom2),Range(cavityLeft,cavityRight), - Range(cavityFront,cavityBack)) = 0.001; - - // Initial pressure distribution - using namespace blitz::tensor; - - float NN = N; - float ci = N/2-1; - float cj = N/2-1; - float ck = N/2-1; - // pow2 is an ET-only function, it's not defined for POD types - float s2 = 64.0 * 9.0 / pow(NN/2.0, 2); - P1 = 0.0; - P2 = exp(-(pow2(i-ci)+pow2(j-cj)+pow2(k-ck)) * s2); - P3 = 0.0; - - checkArray(P2, N); - checkArray(c, N); -} - -void checkArray(const Array& A, int N) -{ - double check = 0.0; - - for (int i=0; i < N; ++i) - for (int j=0; j < N; ++j) - for (int k=0; k < N; ++k) - check += A(i,j,k) * ((i+1)+N*(j+1)+N*N*(k+1)); - - cout << "Array check: " << check << endl; -} - -void snapshot(const Array& P, const Array& c) -{ - static int count = 0, snapshotNum = 0; - if (++count < 5) - return; - - count = 0; - ++snapshotNum; - char filename[128]; - sprintf(filename, "snapshot%03d.m", snapshotNum); - - ofstream ofs(filename); - int N = P.length(firstDim); - - int k = N/2; - float Pmin = -0.2; - float PScale = 1.0/0.4; - float VScale = 0.5; - - ofs << "P" << snapshotNum << " = [ "; - for (int i=0; i < N; ++i) - { - for (int j=0; j < N; ++j) - { - float value = (P(i,j,k)-Pmin)*PScale + c(i,j,k)*VScale; - int r = static_cast(value * 4096); - ofs << r << " "; - } - if (i < N-1) - ofs << ";" << endl; - } - ofs << "];" << endl; -} - diff --git a/benchmarks/acou3db1.cpp b/benchmarks/acou3db1.cpp deleted file mode 100644 index 28850b12..00000000 --- a/benchmarks/acou3db1.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include - -using namespace blitz; - -void setupInitialConditions(Array& P1, Array& P2, - Array& P3, Array& c, int N); - -float acoustic3D_BlitzRaw(int N, int niters) -{ - // Allocate the arrays as a group. Blitz++ will interlace them in - // memory, improving data locality. - - Array P1(N,N,N), P2(N,N,N), P3(N,N,N), c(N,N,N); - Range I(1,N-2), J(1,N-2), K(1,N-2); - - setupInitialConditions(P1, P2, P3, c, N); - - for (int iter=0; iter < niters; ++iter) - { - P3(I,J,K) = (2-6*c(I,J,K)) * P2(I,J,K) - + c(I,J,K)*(P2(I-1,J,K) + P2(I+1,J,K) + P2(I,J-1,K) + P2(I,J+1,K) - + P2(I,J,K-1) + P2(I,J,K+1)) - P1(I,J,K); - - P1 = P2; - P2 = P3; - } - - return P1(N/2,N/2,N/2); -} - diff --git a/benchmarks/acou3db2.cpp b/benchmarks/acou3db2.cpp deleted file mode 100644 index a95c2142..00000000 --- a/benchmarks/acou3db2.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include - -using namespace blitz; - -void setupInitialConditions(Array& P1, Array& P2, - Array& P3, Array& c, int N); - -float acoustic3D_BlitzInterlacedCycled(int N, int niters) -{ - // Allocate the arrays as a group. Blitz++ will interlace them in - // memory, improving data locality. - - Array P1, P2, P3, c; - allocateArrays(shape(N,N,N), P1, P2, P3, c); - Range I(1,N-2), J(1,N-2), K(1,N-2); - - setupInitialConditions(P1, P2, P3, c, N); - - for (int iter=0; iter < niters; ++iter) - { - P3(I,J,K) = (2-6*c(I,J,K)) * P2(I,J,K) - + c(I,J,K)*(P2(I-1,J,K) + P2(I+1,J,K) + P2(I,J-1,K) + P2(I,J+1,K) - + P2(I,J,K-1) + P2(I,J,K+1)) - P1(I,J,K); - - cycleArrays(P1, P2, P3); - } - - return P1(N/2,N/2,N/2); -} - diff --git a/benchmarks/acou3db3.cpp b/benchmarks/acou3db3.cpp deleted file mode 100644 index b7da30b0..00000000 --- a/benchmarks/acou3db3.cpp +++ /dev/null @@ -1,26 +0,0 @@ -#include - -using namespace blitz; - -void setupInitialConditions(Array& P1, Array& P2, - Array& P3, Array& c, int N); - -float acoustic3D_BlitzCycled(int N, int niters) -{ - Array P1(N,N,N), P2(N,N,N), P3(N,N,N), c(N,N,N); - Range I(1,N-2), J(1,N-2), K(1,N-2); - - setupInitialConditions(P1, P2, P3, c, N); - - for (int iter=0; iter < niters; ++iter) - { - P3(I,J,K) = (2-6*c(I,J,K)) * P2(I,J,K) - + c(I,J,K)*(P2(I-1,J,K) + P2(I+1,J,K) + P2(I,J-1,K) + P2(I,J+1,K) - + P2(I,J,K-1) + P2(I,J,K+1)) - P1(I,J,K); - - cycleArrays(P1, P2, P3); - } - - return P1(N/2,N/2,N/2); -} - diff --git a/benchmarks/acou3db4.cpp b/benchmarks/acou3db4.cpp deleted file mode 100644 index dcbd46ae..00000000 --- a/benchmarks/acou3db4.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include -#include - -using namespace blitz; - -void setupInitialConditions(Array& P1, Array& P2, - Array& P3, Array& c, int N); - -BZ_DECLARE_STENCIL4(acoustic3D, P1, P2, P3, c) - P3 = 2 * P2 + c * Laplacian3D_stencilop(P2) - P1; -BZ_END_STENCIL - -float acoustic3D_BlitzStencil(int N, int niters) -{ - Array P1, P2, P3, c; - allocateArrays(shape(N,N,N), P1, P2, P3, c); - - setupInitialConditions(P1, P2, P3, c, N); - - for (int iter=0; iter < niters; ++iter) - { - applyStencil(acoustic3D(), P1, P2, P3, c); - cycleArrays(P1, P2, P3); - } - - return P1(N/2,N/2,N/2); -} - diff --git a/benchmarks/acou3df.f b/benchmarks/acou3df.f deleted file mode 100644 index f586ef91..00000000 --- a/benchmarks/acou3df.f +++ /dev/null @@ -1,134 +0,0 @@ -! INTEGER N, iters -! REAL check - -! N = 112 -! iters = 210 -! CALL acoustic3d_f77(N,iters,check) -! PRINT *, check -! END - - SUBROUTINE acoustic3d_f77(N, niters, check) - INTEGER N, niters, iter - REAL check - REAL P1(N,N,N), P2(N,N,N), P3(N,N,N), C(N,N,N) - INTEGER i, j, k - - CALL acoust3d_f77_setup(P1, P2, P3, C, N) - - DO iter=1, niters - DO k=2,N-1 - DO j=2,N-1 - DO i=2,N-1 - P3(i,j,k) = (2-6*C(i,j,k))*P2(i,j,k) + C(i,j,k) - . * (P2(i,j-1,k) + P2(i,j+1,k) + P2(i-1,j,k) - . + P2(i+1,j,k) + P2(i,j,k-1) + P2(i,j,k+1)) - . - P1(i,j,k) - END DO - END DO - END DO - - DO k=1,N - DO j=1,N - DO i=1,N - P1(i,j,k) = P2(i,j,k) - P2(i,j,k) = P3(i,j,k) - END DO - END DO - END DO - END DO - - check = P1(N/2,N/2,N/2) - - RETURN - END - - - - SUBROUTINE acoust3d_f77_setup(P1, P2, P3, c, N) - INTEGER N - REAL P1(N,N,N), P2(N,N,N), P3(N,N,N), c(N,N,N) - INTEGER i, j, k - INTEGER cavityLeft, cavityRight, cavityFront, cavityBack, - . cavityTop, cavityBottom, cavityTop2, cavityBottom2 - REAL ci, cj, ck, s2 - -! Set up velocity field - - DO k=1,N - DO j=1,N - DO i=1,N/2 - c(i,j,k) = 0.05 - END DO - DO i=N/2+1,N - c(i,j,k) = 0.3 - END DO - END DO - END DO - -! Cavities - - cavityLeft = 3 * N / 7.0 - cavityRight = 4 * N / 7.0 - cavityFront = 3 * N / 7.0 - cavityBack = 4 * N / 7.0 - cavityTop = 5 * N / 7.0 - cavityBottom = 6 * N / 7.0 - cavityTop2 = 1 * N / 7.0 - cavityBottom2 = 2 * N / 7.0 - - DO k=cavityFront, cavityBack - DO j=cavityLeft, cavityRight - DO i=cavityTop, cavityBottom - c(i,j,k) = 0.02 - END DO - DO i=cavityTop2, cavityBottom2 - c(i,j,k) = 0.001 - END DO - END DO - END DO - -! Initial pressure distribution - - ci = N/2.0 - cj = N/2.0 - ck = N/2.0 - s2 = 64.0 * 9.0 / ((N/2.0) ** 2) - - DO k=1,N - DO j=1,N - DO i=1,N - P1(i,j,k) = 0.0 - P2(i,j,k) = exp(- ((i-ci)**2+(j-cj)**2+(k-ck)**2) * s2) - P3(i,j,k) = 0.0 - END DO - END DO - END DO - - CALL acoust3d_f77_setup_check(P2, N) - CALL acoust3d_f77_setup_check(c, N) - - RETURN - END - - - - SUBROUTINE acoust3d_f77_setup_check(A, N) - INTEGER N - REAL A(N,N,N) - INTEGER i, j, k - REAL check - - check = 0.0 - DO k=1,N - DO j=1,N - DO i=1,N - check = check + A(i,j,k) * (i+N*j+N*N*k) - END DO - END DO - END DO - - PRINT *, 'Array check: ', check - - RETURN - END - diff --git a/benchmarks/acou3df2.f b/benchmarks/acou3df2.f deleted file mode 100644 index 935c169f..00000000 --- a/benchmarks/acou3df2.f +++ /dev/null @@ -1,160 +0,0 @@ -! -! Tuned Fortran 77 version -! Optimizations: -! - Rather than four separate arrays, one 4D array is allocated. This -! allows P1, P2, P3, and C to be interlaced in memory, improving data -! locality -! - The stencil is tiled to improve cache usage -! - Instead of copying, the indices into the 4D array are shuffled. - -! INTEGER N, iters -! REAL check - -! N = 112 -! iters = 210 -! CALL acoustic3d_f77Tuned(N,iters,check) -! PRINT *, check -! END - - SUBROUTINE acoustic3d_f77Tuned(N, niters, check) - INTEGER N, niters, iter - REAL check - REAL P1(N,N,N), P2(N,N,N), P3(N,N,N), C(N,N,N) - - CALL acoust3d_f77Tuned_setup(P1,P2,P3,C,N) - - DO iter = 1, niters, 3 - CALL acoustic3d_f77Tuned_stencil(P1,P2,P3,C,N) - CALL acoustic3d_f77Tuned_stencil(P2,P3,P1,C,N) - CALL acoustic3d_f77Tuned_stencil(P3,P1,P2,C,N) - END DO - - check = P1(N/2,N/2,N/2) - - RETURN - END - - - - SUBROUTINE acoustic3d_f77Tuned_stencil(P1,P2,P3,C,N) - INTEGER N - REAL P1(N,N,N), P2(N,N,N), P3(N,N,N), C(N,N,N) - INTEGER i,j,k - INTEGER bi,bj,bk,ni,nj,nk,blockSize - - blockSize = 8 - - DO bk=2,N-1,blockSize - nk = min(bk+blockSize-1,N-1) - DO bj=2,N-1,blockSize - nj = min(bj+blockSize-1,N-1) - DO bi=2,N-1,blockSize - ni = min(bi+blockSize-1,N-1) - DO k=bk,nk - DO j=bj,nj - DO i=bi,ni - P3(i,j,k) = (2-6*C(i,j,k))*P2(i,j,k) - . + C(i,j,k) * (P2(i,j-1,k) + P2(i,j+1,k) - . + P2(i-1,j,k) + P2(i+1,j,k) + P2(i,j,k-1) - . + P2(i,j,k+1)) - P1(i,j,k) - END DO - END DO - END DO - END DO - END DO - END DO - - RETURN - END - - - - - SUBROUTINE acoust3d_f77Tuned_setup(P1, P2, P3, c, N) - INTEGER N - REAL P1(N,N,N), P2(N,N,N), P3(N,N,N), c(N,N,N) - INTEGER i, j, k - INTEGER cavityLeft, cavityRight, cavityFront, cavityBack, - . cavityTop, cavityBottom, cavityTop2, cavityBottom2 - REAL ci, cj, ck, s2 - -! Set up velocity field - - DO k=1,N - DO j=1,N - DO i=1,N/2 - c(i,j,k) = 0.05 - END DO - DO i=N/2+1,N - c(i,j,k) = 0.3 - END DO - END DO - END DO - -! Cavities - - cavityLeft = 3 * N / 7.0 - cavityRight = 4 * N / 7.0 - cavityFront = 3 * N / 7.0 - cavityBack = 4 * N / 7.0 - cavityTop = 5 * N / 7.0 - cavityBottom = 6 * N / 7.0 - cavityTop2 = 1 * N / 7.0 - cavityBottom2 = 2 * N / 7.0 - - DO k=cavityFront, cavityBack - DO j=cavityLeft, cavityRight - DO i=cavityTop, cavityBottom - c(i,j,k) = 0.02 - END DO - DO i=cavityTop2, cavityBottom2 - c(i,j,k) = 0.001 - END DO - END DO - END DO - -! Initial pressure distribution - - ci = N/2.0 - cj = N/2.0 - ck = N/2.0 - s2 = 64.0 * 9.0 / ((N/2.0) ** 2) - - DO k=1,N - DO j=1,N - DO i=1,N - P1(i,j,k) = 0.0 - P2(i,j,k) = exp(- ((i-ci)**2+(j-cj)**2+(k-ck)**2) * s2) - P3(i,j,k) = 0.0 - END DO - END DO - END DO - - CALL acoust3d_f77Tuned_setup_check(P2, N) - CALL acoust3d_f77Tuned_setup_check(c, N) - - RETURN - END - - - - SUBROUTINE acoust3d_f77Tuned_setup_check(P, N) - INTEGER N - REAL P(N,N,N) - INTEGER i, j, k - REAL check - - check = 0.0 - DO k=1,N - DO j=1,N - DO i=1,N - check = check + P(i,j,k) * (i+N*j+N*N*k) - END DO - END DO - END DO - - PRINT *, 'Array check: ', check - - RETURN - END - diff --git a/benchmarks/acou3df90.f90 b/benchmarks/acou3df90.f90 deleted file mode 100644 index 47bcba76..00000000 --- a/benchmarks/acou3df90.f90 +++ /dev/null @@ -1,95 +0,0 @@ - -SUBROUTINE acoustic3d_f90(N, niters, check) - INTEGER, INTENT( IN ) :: N, niters - REAL, INTENT( OUT ) :: check - REAL, DIMENSION (N,N,N) :: P1, P2, P3, c - INTEGER iter - - CALL acoustic3d_f90_setup(P1, P2, P3, c, N) - - DO iter=1, niters - P3(2:N-1,2:N-1,2:N-1) = (2-6*c(2:N-1,2:N-1,2:N-1)) & - * P2(2:N-1,2:N-1,2:N-1) & - + c(2:N-1,2:N-1,2:N-1)*(P2(1:N-2,2:N-1,2:N-1) + P2(3:N,2:N-1,2:N-1) & - + P2(2:N-1,1:N-2,2:N-1)+P2(2:N-1,3:N,2:N-1) & - + P2(2:N-1,2:N-1,1:N-2)+P2(2:N-1,2:N-1,3:N)) - P1(2:N-1,2:N-1,2:N-1) - P1 = P2 - P2 = P3 - END DO - - check = P1(N/2,N/2,N/2) - - RETURN -END - -SUBROUTINE acoustic3d_f90_setup(P1, P2, P3, c, N) - INTEGER, INTENT( IN ) :: N - REAL, DIMENSION (N,N,N), INTENT( INOUT ) :: P1, P2, P3, c - - INTEGER cavityLeft, cavityRight, cavityFront, cavityBack, cavityTop, & - cavityBottom, cavityTop2, cavityBottom2 - REAL ci, cj, ck, s2 - -! Set the velocity field - - c(1:N/2,:,:) = 0.05 - c(N/2+1:N,:,:) = 0.3; - - cavityLeft = 3 * N / 7.0 - cavityRight = 4 * N / 7.0 - cavityFront = 3 * N / 7.0 - cavityBack = 4 * N / 7.0 - cavityTop = 5 * N / 7.0 - cavityBottom = 6 * N / 7.0 - cavityTop2 = 1 * N / 7.0 - cavityBottom2 = 2 * N / 7.0 - - c(cavityTop:cavityBottom,cavityLeft:cavityRight, & - cavityFront:cavityBack) = 0.02; - c(cavityTop2:cavityBottom2,cavityLeft:cavityRight, & - cavityFront:cavityBack) = 0.001; - -! Initial pressure distribution - P1 = 0.0 - P3 = 0.0 - - ci = N/2.0 - cj = N/2.0 - ck = N/2.0 - s2 = 64.0 * 9.0 / ((N/2.0) ** 2) - - DO k=1,N - DO j=1,N - DO i=1,N - P2(i,j,k) = exp(-((i-ci)**2+(j-cj)**2+(k-ck)**2) * s2) - END DO - END DO - END DO - - CALL acoustic3d_f90_setup_check(P2, N) - CALL acoustic3d_f90_setup_check(c, N) - - RETURN -END - - -SUBROUTINE acoustic3d_f90_setup_check(A, N) - INTEGER, INTENT( IN ) :: N - REAL, INTENT( IN ), DIMENSION(N,N,N) :: A - - INTEGER i, j, k - REAL check - - check = 0.0 - DO k=1,N - DO j=1,N - DO i=1,N - check = check + A(i,j,k) * (i+N*j+N*N*k) - END DO - END DO - END DO - - PRINT *, 'Array check: ', check - RETURN -END - diff --git a/benchmarks/acou3df902.f90 b/benchmarks/acou3df902.f90 deleted file mode 100644 index 84f30295..00000000 --- a/benchmarks/acou3df902.f90 +++ /dev/null @@ -1,119 +0,0 @@ -! -! F90 Tuned version -! Optimizations: -! - One 4D array allocated, other arrays are slices of this array. This -! arrangement interlaces the arrays in memory, improving data locality -! - Rather than copying arrays, the indices into the 4D array are copied -! - Rely on compiler for tiling. - -SUBROUTINE acoustic3d_f90Tuned(N, niters, check) - INTEGER, INTENT( IN ) :: N, niters - REAL, INTENT( OUT ) :: check - REAL, DIMENSION (N,N,N) :: P1, P2, P3, C - INTEGER iter - - CALL acoustic3d_f90Tuned_setup(P1, P2, P3, c, N) - - IF (MODULO(niters,3) > 0) THEN - PRINT *, 'Warning: In acoustic3d_f90Tuned: niters mod 3 != 0' - PRINT *, 'Will do fewer iterations, benchmark result will be off' - ENDIF - - DO iter=1, niters, 3 - CALL acoustic3d_f90Tuned_stencil(P1, P2, P3, C, N) - CALL acoustic3d_f90Tuned_stencil(P2, P3, P1, C, N) - CALL acoustic3d_f90Tuned_stencil(P3, P1, P2, C, N) - END DO - - check = P1(N/2,N/2,N/2) - - RETURN -END - -SUBROUTINE acoustic3d_f90Tuned_stencil(P1, P2, P3, C, N) - INTEGER, INTENT( IN ) :: N - REAL, DIMENSION (N,N,N), INTENT( IN ) :: P1, P2, C - REAL, DIMENSION (N,N,N), INTENT( OUT ) :: P3 - - P3(2:N-1,2:N-1,2:N-1) = (2-6*C(2:N-1,2:N-1,2:N-1)) & - * P2(2:N-1,2:N-1,2:N-1) + C(2:N-1,2:N-1,2:N-1) & - * (P2(1:N-2,2:N-1,2:N-1) + P2(3:N,2:N-1,2:N-1) & - + P2(2:N-1,1:N-2,2:N-1) + P2(2:N-1,3:N,2:N-1) & - + P2(2:N-1,2:N-1,1:N-2)+P2(2:N-1,2:N-1,3:N)) & - - P1(2:N-1,2:N-1,2:N-1) - RETURN -END - - -SUBROUTINE acoustic3d_f90Tuned_setup(P1, P2, P3, c, N) - INTEGER, INTENT( IN ) :: N - REAL, DIMENSION (N,N,N), INTENT( INOUT ) :: P1, P2, P3, c - - INTEGER cavityLeft, cavityRight, cavityFront, cavityBack, cavityTop, & - cavityBottom, cavityTop2, cavityBottom2 - REAL ci, cj, ck, s2 - -! Set the velocity field - - c(1:N/2,:,:) = 0.05 - c(N/2+1:N,:,:) = 0.3; - - cavityLeft = 3 * N / 7.0 - cavityRight = 4 * N / 7.0 - cavityFront = 3 * N / 7.0 - cavityBack = 4 * N / 7.0 - cavityTop = 5 * N / 7.0 - cavityBottom = 6 * N / 7.0 - cavityTop2 = 1 * N / 7.0 - cavityBottom2 = 2 * N / 7.0 - - c(cavityTop:cavityBottom,cavityLeft:cavityRight, & - cavityFront:cavityBack) = 0.02; - c(cavityTop2:cavityBottom2,cavityLeft:cavityRight, & - cavityFront:cavityBack) = 0.001; - -! Initial pressure distribution - P1 = 0.0 - P3 = 0.0 - - ci = N/2.0 - cj = N/2.0 - ck = N/2.0 - s2 = 64.0 * 9.0 / ((N/2.0) ** 2) - - DO k=1,N - DO j=1,N - DO i=1,N - P2(i,j,k) = exp(-((i-ci)**2+(j-cj)**2+(k-ck)**2) * s2) - END DO - END DO - END DO - - CALL acoustic3d_f90Tuned_setup_check(P2, N) - CALL acoustic3d_f90Tuned_setup_check(c, N) - - RETURN -END - - -SUBROUTINE acoustic3d_f90Tuned_setup_check(P, N) - INTEGER, INTENT( IN ) :: N - REAL, INTENT( IN ), DIMENSION(N,N,N) :: P - - INTEGER i, j, k - REAL check - - check = 0.0 - DO k=1,N - DO j=1,N - DO i=1,N - check = check + P(i,j,k) * (i+N*j+N*N*k) - END DO - END DO - END DO - - PRINT *, 'Array check: ', check - - RETURN -END - diff --git a/benchmarks/acoustic.cpp b/benchmarks/acoustic.cpp deleted file mode 100644 index a3a245d5..00000000 --- a/benchmarks/acoustic.cpp +++ /dev/null @@ -1,369 +0,0 @@ -//#define BZ_DISABLE_RESTRICT -#define BZ_ARRAY_2D_NEW_STENCIL_TILING - -#include -#include -#include -#include - -#ifdef BZ_HAVE_STD - #include -#else - #include -#endif - -using namespace blitz; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define echo_f90 echo_f90_ - #define echo_f77 echo_f77_ - #define echo_f90_tuned echo_f90_tuned_ - #define echo_f77tuned echo_f77tuned_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define echo_f90 echo_f90__ - #define echo_f77 echo_f77__ - #define echo_f90_tuned echo_f90_tuned__ - #define echo_f77tuned echo_f77tuned__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define echo_f90 ECHO_F90 - #define echo_f77 ECHO_F77 - #define echo_f90_tuned ECHO_F90_TUNED - #define echo_f77tuned ECHO_F77TUNED -#endif - -extern "C" { -void echo_f90(int& N, int& niters, float& check); -void echo_f77(int& N, int& niters, float& check); -void echo_f90_tuned(int& N, int& niters, float& check); -void echo_f77tuned(int& N, int& niters, float& check); -} - -void f77(BenchmarkExt&); -void f90(BenchmarkExt&); -void f77_tuned(BenchmarkExt&); -void f90_tuned(BenchmarkExt&); - -void echo_BlitzInterlacedCycled(BenchmarkExt&); -void echo_BlitzCycled(BenchmarkExt&); -void echo_BlitzRaw(BenchmarkExt&); -void echo_BlitzStencil(BenchmarkExt&); - -int main() -{ - Timer timer; - float check; - int numBenchmarks = 6; -#ifdef FORTRAN_90 - numBenchmarks+=2; -#endif - - BenchmarkExt bench("Acoustic 2D Benchmark", numBenchmarks); - const int numSizes=7; - bench.setNumParameters(numSizes); - Vector parameters(numSizes); - parameters=10*pow(2.0,tensor::i); - Vector flops(numSizes); - flops=(parameters-2)*(parameters-2) * 9.0; - Vector iters(numSizes); - // iters must be divisible by 3 for tuned fortran versions - iters=cast(100000000/flops)*3; - - bench.setParameterVector(parameters); - bench.setParameterDescription("Matrix size"); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - echo_BlitzRaw(bench); - echo_BlitzStencil(bench); - -#if 0 - echo_BlitzInterlaced(bench, c); -#endif - - echo_BlitzCycled(bench); - echo_BlitzInterlacedCycled(bench); - -#ifdef FORTRAN_90 - f90(bench); - f90_tuned(bench); -#endif - - f77(bench); - f77_tuned(bench); - - bench.endBenchmarking(); - bench.saveMatlabGraph("acoustic.m"); - - return 0; -} - -void checkArray(Array& A, int N) -{ - float check = 0.0; - for (int i=0; i < N; ++i) - for (int j=0; j < N; ++j) - check += ((i+1)*N + j + 1) * A(i,j); - - cout << "Array check: " << check << endl; -} - -void setInitialConditions(Array& c, Array& P1, - Array& P2, Array& P3, int N); - - -void echo_BlitzRaw(BenchmarkExt&bench) -{ - bench.beginImplementation("Blitz++ (raw)"); - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - int niters = bench.getIterations(); - - Array P1(N,N), P2(N,N), P3(N,N), c(N,N); - Range I(1,N-2), J(1,N-2); - - setInitialConditions(c, P1, P2, P3, N); - checkArray(P2, N); - checkArray(c, N); - - bench.start(); - for (int iter=0; iter < niters; ++iter) - { - P3(I,J) = (2-4*c(I,J)) * P2(I,J) - + c(I,J)*(P2(I-1,J) + P2(I+1,J) + P2(I,J-1) + P2(I,J+1)) - - P1(I,J); - - P1 = P2; - P2 = P3; - } - bench.stop(); - - cout << P1(N/2-1,(7*N)/8-1) << endl; - } - - bench.endImplementation(); - - -#if 0 -ofstream ofs("testecho.m"); -ofs << "A = ["; -for (int i=0; i < N; ++i) -{ - for (int j=0; j < N; ++j) - { - ofs << int(8192*P2(i,j)+1024*c(i,j)) << " "; - } - if (i < N-1) - ofs << ";" << endl; -} -ofs << "];" << endl; -#endif - -} - -void echo_BlitzCycled(BenchmarkExt&bench) -{ - bench.beginImplementation("Blitz++ (cycled)"); - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - int niters = bench.getIterations(); - cout << bench.currentImplementation() << " N=" << N << endl; - - Array P1(N,N), P2(N,N), P3(N,N), c(N,N); - Range I(1,N-2), J(1,N-2); - - setInitialConditions(c, P1, P2, P3, N); - checkArray(P2, N); - checkArray(c, N); - - bench.start(); - for (int iter=0; iter < niters; ++iter) - { - P3(I,J) = (2-4*c(I,J)) * P2(I,J) - + c(I,J)*(P2(I-1,J) + P2(I+1,J) + P2(I,J-1) + P2(I,J+1)) - - P1(I,J); - - cycleArrays(P1,P2,P3); - } - bench.stop(); - - cout << P1(N/2-1,(7*N)/8-1) << endl; - } - - bench.endImplementation(); -} - -void echo_BlitzInterlacedCycled(BenchmarkExt&bench) -{ - bench.beginImplementation("Blitz++ (interlaced & cycled)"); - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - int niters = bench.getIterations(); - cout << bench.currentImplementation() << " N=" << N << endl; - - Array P1, P2, P3, c; - allocateArrays(shape(N,N), P1, P2, P3, c); - Range I(1,N-2), J(1,N-2); - - setInitialConditions(c, P1, P2, P3, N); - checkArray(P2, N); - checkArray(c, N); - - bench.start(); - for (int iter=0; iter < niters; ++iter) - { - P3(I,J) = (2-4*c(I,J)) * P2(I,J) - + c(I,J)*(P2(I-1,J) + P2(I+1,J) + P2(I,J-1) + P2(I,J+1)) - - P1(I,J); - - cycleArrays(P1,P2,P3); - } - bench.stop(); - - cout << P1(N/2-1,(7*N)/8-1) << endl; - } - - bench.endImplementation(); -} - -BZ_DECLARE_STENCIL4(acoustic2D,P1,P2,P3,c) - P3 = 2 * P2 + c * Laplacian2D_stencilop(P2) - P1; -BZ_STENCIL_END - -void echo_BlitzStencil(BenchmarkExt&bench) -{ - bench.beginImplementation("Blitz++ (stencil)"); - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - int niters = bench.getIterations(); - cout << bench.currentImplementation() << " N=" << N << endl; - - Array P1, P2, P3, c; - allocateArrays(shape(N,N), P1, P2, P3, c); - - setInitialConditions(c, P1, P2, P3, N); - checkArray(P2, N); - checkArray(c, N); - - bench.start(); - for (int iter=0; iter < niters; ++iter) - { - applyStencil(acoustic2D(), P1, P2, P3, c); - cycleArrays(P1,P2,P3); - } - bench.stop(); - - cout << P1(N/2-1,(7*N)/8-1) << endl; - } - - bench.endImplementation(); -} - -void setInitialConditions(Array& c, Array& P1, - Array& P2, Array& P3, int N) -{ - // Set the velocity field - c = 0.2; - - // Solid block with which the pulse collides - int blockLeft = 0; - int blockRight = int(2*N/5.0-1); - int blockTop = int(N/3-1); - int blockBottom = int(2*N/3.0-1); - c(Range(blockTop,blockBottom),Range(blockLeft,blockRight)) = 0.5; - - // Channel directing the pulse leftwards - int channelLeft = int(4*N/5.0-1); - int channelRight = N-1; - int channel1Height = int(3*N/8.0-1); - int channel2Height = int(5*N/8.0-1); - c(channel1Height,Range(channelLeft,channelRight)) = 0.0; - c(channel2Height,Range(channelLeft,channelRight)) = 0.0; - - // Initial pressure distribution: gaussian pulse inside the channel - using namespace blitz::tensor; - int cr = int(N/2-1); - int cc = int(7.0*N/8.0-1); - // pow2 is not defined for pod types. - float s2 = 64.0 * 9.0 / pow(N/2.0,2); - cout << "cr = " << cr << " cc = " << cc << " s2 = " << s2 << endl; - P1 = 0.0; - P2 = exp(-(pow2(i-cr)+pow2(j-cc)) * s2); - P3 = 0.0; -} - - -void f77(BenchmarkExt&bench) -{ - bench.beginImplementation("Fortran77"); - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - int niters = bench.getIterations(); - cout << bench.currentImplementation() << " N=" << N << endl; - float check; - bench.start(); - echo_f77(N, niters, check); - bench.stop(); - cout << check << endl; - } - bench.endImplementation(); -}; - -void f77_tuned(BenchmarkExt&bench) -{ - bench.beginImplementation("Fortran77 (tuned)"); - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - int niters = bench.getIterations(); - cout << bench.currentImplementation() << " N=" << N << endl; - float check; - bench.start(); - echo_f77tuned(N, niters, check); - bench.stop(); - cout << check << endl; - } - - bench.endImplementation(); -}; - -void f90(BenchmarkExt&bench) -{ - bench.beginImplementation("Fortran90"); - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - int niters = bench.getIterations(); - cout << bench.currentImplementation() << " N=" << N << endl; - float check; - bench.start(); - echo_f90(N, niters, check); - bench.stop(); - cout << check << endl; - } - - bench.endImplementation(); -}; -void f90_tuned(BenchmarkExt&bench) -{ - bench.beginImplementation("Fortran90 (tuned)"); - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - int niters = bench.getIterations(); - cout << bench.currentImplementation() << " N=" << N << endl; - float check; - bench.start(); - echo_f90_tuned(N, niters, check); - bench.stop(); - cout << check << endl; - } - - bench.endImplementation(); -}; diff --git a/benchmarks/acousticf.f b/benchmarks/acousticf.f deleted file mode 100644 index 868f582b..00000000 --- a/benchmarks/acousticf.f +++ /dev/null @@ -1,121 +0,0 @@ - -! INTEGER N, iters -! REAL check - -! N = 128 -! iters = N*3 -! CALL echo_f77(N,iters,check) -! PRINT *, check -! END - - - - SUBROUTINE echo_f77(N, niters, check) - INTEGER N, niters, iter - REAL check - REAL P1(N,N), P2(N,N), P3(N,N), C(N,N) - INTEGER i, j - - CALL echo_f77_set(c, P1, P2, P3, N) - CALL checkArray(P2, N) - CALL checkArray(c, N) - - DO iter=1, niters - DO j=2,N-1 - DO i=2,N-1 - P3(i,j) = (2-4*c(i,j))*P2(i,j) + c(i,j)*(P2(i,j-1) - . + P2(i,j+1) + P2(i-1,j) + P2(i+1,j)) - P1(i,j) - END DO - END DO - - DO j=1,N - DO i=1,N - P1(i,j) = P2(i,j) - P2(i,j) = P3(i,j) - END DO - END DO - END DO - - check = P1(N/2,7*N/8) - - RETURN - END - - - SUBROUTINE echo_f77_set(c, P1, P2, P3, N) - INTEGER N - REAL c(N,N), P1(N,N), P2(N,N), P3(N,N) - - INTEGER i, j, blockLeft, blockRight, blockTop, blockBottom - INTEGER channelLeft, channelRight, channel1Height, channel2Height - INTEGER cr, cc - REAL s2 - -! Default velocity in the air - - DO j=1,N - DO i=1,N - c(i,j) = 0.2 - END DO - END DO - -! Solid block with which the pulse collids - - blockLeft = 1 - blockRight = 2 * N / 5.0 - blockTop = N / 3.0 - blockBottom = 2 * N / 3.0 - - DO j=blockLeft,blockRight - DO i=blockTop,blockBottom - c(i,j) = 0.5 - END DO - END DO - -! Channel directing the pulse leftwards - - channelLeft = 4 * N / 5.0 - channelRight = N - channel1Height = 3 * N / 8.0 - channel2Height = 5 * N / 8.0 - - DO j = channelLeft,channelRight - c(channel1Height,j) = 0.0 - c(channel2Height,j) = 0.0 - END DO - -! Initial pressure distribution: a gaussian pulse inside the channel - - cr = N / 2 - cc = 7 * N / 8.0 - s2 = 64.0 * 9.0 / ((N/2.0) ** 2) - - print *, 'cr = ', cr, ' cc = ', cc, ' s2 = ', s2 - - DO j=1,N - DO i=1,N - P1(i,j) = 0.0 - P2(i,j) = exp(-((i-cr)**2 + (j-cc)**2) * s2) - P3(i,j) = 0.0 - END DO - END DO - - RETURN - END - - SUBROUTINE checkArray(A, N) - INTEGER N - REAL A(N,N) - - INTEGER i,j - REAL check - check = 0.0 - DO j=1,N - DO i=1,N - check = check + (i*n+j)*A(i,j) - END DO - END DO - - PRINT *, 'Array check: ', check - RETURN - END diff --git a/benchmarks/acousticf2.f b/benchmarks/acousticf2.f deleted file mode 100644 index e117bffc..00000000 --- a/benchmarks/acousticf2.f +++ /dev/null @@ -1,163 +0,0 @@ -! INTEGER N, iters -! REAL check - -! N = 128 -! iters = N*3 -! CALL echo_f77Tuned(N,iters,check) -! PRINT *, check -! END - - - - SUBROUTINE echo_f77Tuned(N, niters, check) - INTEGER N, niters, iter - REAL check - REAL P1(N,N), P2(N,N), P3(N,N), C(N,N) - INTEGER i, j - INTEGER nitersd3, remainder - - CALL echo_f77_set2(c, P1, P2, P3, N) - CALL checkArray2(P2, N) - CALL checkArray2(c, N) - - nitersd3 = niters / 3 - remainder = niters - 3 * nitersd3 - IF (remainder .NE. 0) THEN - PRINT *, 'niters should be divisible by 3, results will be off' - ENDIF - - DO iter=1, niters, 3 - CALL stencil5(c, P1, P2, P3, N) - CALL stencil5(c, P2, P3, P1, N) - CALL stencil5(c, P3, P1, P2, N) - END DO - - check = P1(N/2,7*N/8) - - RETURN - END - - - - SUBROUTINE echo_f77_set2(c, P1, P2, P3, N) - INTEGER N - REAL c(N,N), P1(N,N), P2(N,N), P3(N,N) - - INTEGER i, j, blockLeft, blockRight, blockTop, blockBottom - INTEGER channelLeft, channelRight, channel1Height, channel2Height - INTEGER cr, cc - REAL s2 - -! Default velocity in the air - - DO j=1,N - DO i=1,N - c(i,j) = 0.2 - END DO - END DO - -! Solid block with which the pulse collids - - blockLeft = 1 - blockRight = 2 * N / 5.0 - blockTop = N / 3.0 - blockBottom = 2 * N / 3.0 - - DO j=blockLeft,blockRight - DO i=blockTop,blockBottom - c(i,j) = 0.5 - END DO - END DO - -! Channel directing the pulse leftwards - - channelLeft = 4 * N / 5.0 - channelRight = N - channel1Height = 3 * N / 8.0 - channel2Height = 5 * N / 8.0 - - DO j = channelLeft,channelRight - c(channel1Height,j) = 0.0 - c(channel2Height,j) = 0.0 - END DO - -! Initial pressure distribution: a gaussian pulse inside the channel - - cr = N / 2 - cc = 7 * N / 8.0 - s2 = 64.0 * 9.0 / ((N/2.0) ** 2) - - print *, 'cr = ', cr, ' cc = ', cc, ' s2 = ', s2 - - DO j=1,N - DO i=1,N - P1(i,j) = 0.0 - P2(i,j) = exp(-((i-cr)**2 + (j-cc)**2) * s2) - P3(i,j) = 0.0 - END DO - END DO - - RETURN - END - - - SUBROUTINE stencil5(c, P1, P2, P3, N) - INTEGER N - REAL c(N,N), P1(N,N), P2(N,N), P3(N,N) - REAL tmp1, tmp2, tmp3 - INTEGER TileWidth, TileHeight, bj, nj, bi, ni, i - - TileWidth = 16 - TileHeight = 3 - - DO bj=2, N-1, TileWidth - nj = MIN(bj+TileWidth-1, N-1) - - DO bi=2, N-1, TileHeight - IF (bi+TileHeight .LT. N) THEN - i = bi - DO j=bj,nj - tmp1 = (2-4*c(i,j))*P2(i,j) + c(i,j)*(P2(i,j-1) - . + P2(i,j+1) + P2(i-1,j) + P2(i+1,j)) - P1(i,j) - tmp2 = (2-4*c(i+1,j))*P2(i+1,j) + c(i+1,j) - . *(P2(i+1,j-1) + P2(i+1,j+1) + P2(i,j) + P2(i+2,j)) - . - P1(i+1,j) - tmp3 = (2-4*c(i+2,j))*P2(i+2,j) + c(i+2,j) - . *(P2(i+2,j-1) + P2(i+2,j+1) + P2(i+1,j) + P2(i+3,j)) - . - P1(i+2,j) - P3(i,j) = tmp1 - P3(i+1,j) = tmp2 - P3(i+2,j) = tmp3 - END DO - ELSE - DO i=bi, N-1 - DO j=bj,nj - P3(i,j) = (2-4*c(i,j))*P2(i,j) + c(i,j)*(P2(i,j-1) - . + P2(i,j+1) + P2(i-1,j) + P2(i+1,j)) - P1(i,j) - END DO - END DO - END IF - END DO - END DO - - RETURN - END - - - SUBROUTINE checkArray2(A, N) - INTEGER N - REAL A(N,N) - - INTEGER i,j - REAL check - check = 0.0 - DO j=1,N - DO i=1,N - check = check + (i*n+j)*A(i,j) - END DO - END DO - - PRINT *, 'Array check: ', check - RETURN - END - diff --git a/benchmarks/acousticf90.f90 b/benchmarks/acousticf90.f90 deleted file mode 100644 index 7c47069a..00000000 --- a/benchmarks/acousticf90.f90 +++ /dev/null @@ -1,101 +0,0 @@ -!INTEGER N, niters -!REAL check -!N = 128 -!niters = 128*3 -!CALL echo_f90(N, niters, check) -!PRINT *, check -!END - -SUBROUTINE echo_f90(N, niters, check) - INTEGER, INTENT( IN ) :: N, niters - REAL, INTENT( OUT ) :: check - - REAL, DIMENSION (N,N) :: P1, P2, P3, c - INTEGER iter - - CALL echo_f90_setupInitialConditions(c, P1, P2, P3, N) - CALL checkArray_f90(P2, N) - CALL checkArray_f90(c, N) - - DO iter=1, niters - P3(2:N-1,2:N-1) = (2-4*c(2:N-1,2:N-1)) * P2(2:N-1,2:N-1) & - + c(2:N-1,2:N-1)*(P2(1:N-2,2:N-1) + P2(3:N,2:N-1) & - + P2(2:N-1,1:N-2) + P2(2:N-1,3:N)) - P1(2:N-1,2:N-1) - P1 = P2 - P2 = P3 - END DO - - check = P1(N/2,7*N/8) - - RETURN -END - - - - - -SUBROUTINE echo_f90_setupInitialConditions(c, P1, P2, P3, N) - INTEGER, INTENT( IN ) :: N - REAL, DIMENSION (N,N) :: P1(N,N), P2(N,N), P3(N,N), c(N,N) - - INTEGER blockLeft, blockRight, blockTop, blockBottom - INTEGER channelLeft, channelRight, channel1Height, channel2Height - REAL cr, cc - INTEGER i, j - REAL s2 - - ! Set the velocity field - c = 0.2 - - ! Solid block with which the pulse collides - blockLeft = 1 - blockRight = 2 * N / 5.0 - blockTop = N / 3.0 - blockBottom = 2 * N / 3.0 - c(blockTop:blockBottom, blockLeft:blockRight) = 0.5 - - ! Channel directing the pulse leftwards - channelLeft = 4 * N / 5.0 - channelRight = N - channel1Height = 3 * N / 8.0 - channel2Height = 5 * N / 8.0 - - c(channel1Height,channelLeft:channelRight) = 0.0; - c(channel2Height,channelLeft:channelRight) = 0.0; - - ! Initial pressure distribution: a gaussian pulse inside the channel - cr = N / 2.0 - cc = 7.0 * N / 8.0 - s2 = 64.0 * 9.0 / ((N / 2.0) ** 2) - - DO j=1,N - DO i=1,N - P2(i,j) = exp(-((i-cr)**2 + (j-cc)**2) * s2) - END DO - END DO - - P1 = 0.0 - P3 = 0.0 -END - - - - - - SUBROUTINE checkArray_f90(A, N) - INTEGER N - REAL, DIMENSION(N,N) :: A - - INTEGER i,j - REAL check - check = 0.0 - DO j=1,N - DO i=1,N - check = check + (i*n+j)*A(i,j) - END DO - END DO - - PRINT *, 'Array check: ', check - RETURN - END - diff --git a/benchmarks/acousticf902.f90 b/benchmarks/acousticf902.f90 deleted file mode 100644 index db2ee345..00000000 --- a/benchmarks/acousticf902.f90 +++ /dev/null @@ -1,116 +0,0 @@ -!INTEGER N, niters -!REAL check -!N = 128 -!niters = 128*3 -!CALL echo_f90(N, niters, check) -!PRINT *, check -!END - -SUBROUTINE echo_f90_tuned(N, niters, check) - INTEGER, INTENT( IN ) :: N, niters - REAL, INTENT( OUT ) :: check - - REAL, DIMENSION (N,N) :: P1, P2, P3, c - INTEGER iter - - CALL echo_f90_tuned_setup(c, P1, P2, P3, N) - CALL checkArray_f90_tuned(P2, N) - CALL checkArray_f90_tuned(c, N) - - IF (MODULO(niters, 3) > 0) THEN - PRINT *, 'niters should be divisible by 3, results will be off' - ENDIF - - DO iter=1, niters, 3 - CALL stencil_f90(c, P1, P2, P3, N) - CALL stencil_f90(c, P2, P3, P1, N) - CALL stencil_f90(c, P3, P1, P2, N) - END DO - - check = P1(N/2,7*N/8) - - RETURN -END - - - -SUBROUTINE stencil_f90(c, P1, P2, P3, N) - INTEGER, INTENT( IN ) :: N - REAL, DIMENSION(N,N), INTENT( INOUT ) :: c, P1, P2, P3 - - P3(2:N-1,2:N-1) = (2-4*c(2:N-1,2:N-1)) * P2(2:N-1,2:N-1) & - + c(2:N-1,2:N-1)*(P2(1:N-2,2:N-1) + P2(3:N,2:N-1) & - + P2(2:N-1,1:N-2) + P2(2:N-1,3:N)) - P1(2:N-1,2:N-1) - - RETURN -END - - - - - -SUBROUTINE echo_f90_tuned_setup(c, P1, P2, P3, N) - INTEGER, INTENT( IN ) :: N - REAL, DIMENSION (N,N) :: P1(N,N), P2(N,N), P3(N,N), c(N,N) - - INTEGER blockLeft, blockRight, blockTop, blockBottom - INTEGER channelLeft, channelRight, channel1Height, channel2Height - REAL cr, cc - INTEGER i, j - REAL s2 - - ! Set the velocity field - c = 0.2 - - ! Solid block with which the pulse collides - blockLeft = 1 - blockRight = 2 * N / 5.0 - blockTop = N / 3.0 - blockBottom = 2 * N / 3.0 - c(blockTop:blockBottom, blockLeft:blockRight) = 0.5 - - ! Channel directing the pulse leftwards - channelLeft = 4 * N / 5.0 - channelRight = N - channel1Height = 3 * N / 8.0 - channel2Height = 5 * N / 8.0 - - c(channel1Height,channelLeft:channelRight) = 0.0; - c(channel2Height,channelLeft:channelRight) = 0.0; - - ! Initial pressure distribution: a gaussian pulse inside the channel - cr = N / 2.0 - cc = 7.0 * N / 8.0 - s2 = 64.0 * 9.0 / ((N / 2.0) ** 2) - - DO j=1,N - DO i=1,N - P2(i,j) = exp(-((i-cr)**2 + (j-cc)**2) * s2) - END DO - END DO - - P1 = 0.0 - P3 = 0.0 -END - - - - - -SUBROUTINE checkArray_f90_tuned(A, N) -INTEGER N -REAL, DIMENSION(N,N) :: A - -INTEGER i,j -REAL check -check = 0.0 -DO j=1,N - DO i=1,N - check = check + (i*n+j)*A(i,j) - END DO -END DO - -PRINT *, 'Array check: ', check -RETURN -END - diff --git a/benchmarks/arrdaxpy.cpp b/benchmarks/arrdaxpy.cpp deleted file mode 100644 index 0714a2a5..00000000 --- a/benchmarks/arrdaxpy.cpp +++ /dev/null @@ -1,150 +0,0 @@ -// Array DAXPY benchmark - -#include -#include -#include -#include - -namespace blitz { -extern void sink(); -} - -using namespace blitz; - -#ifdef BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES - #define arrdaxpyf arrdaxpyf_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define arrdaxpyf arrdaxpyf__ -#endif - -extern "C" { - void arrdaxpyf(double* A, double* B, int& N, double& a); -} - -void arrdaxpyFortran77Version(BenchmarkExt& bench); -void arrdaxpyBlitzVersion(BenchmarkExt& bench); - -int main() -{ - BenchmarkExt bench("Array DAXPY", 2); - - const int numSizes = 8; - - bench.setNumParameters(numSizes); - bench.setDependentVariable("flops"); - - Vector parameters(numSizes); - Vector iters(numSizes); - Vector flops(numSizes); - - parameters = pow(2.,tensor::i); - cout << parameters; - iters = 100*16*32*8*8*8/pow3(parameters); - cout << iters; - flops = pow3(parameters) * 2 * 2; - cout << flops; - - bench.setParameterVector(parameters); - bench.setParameterDescription("3D Array size"); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - - bench.beginBenchmarking(); - arrdaxpyBlitzVersion(bench); - arrdaxpyFortran77Version(bench); - bench.endBenchmarking(); - - bench.saveMatlabGraph("arrdaxpy.m"); - - return 0; -} - -void initializeRandomDouble(double* data, int numElements) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[i] = rnd.random(); -} - -void arrdaxpyBlitzVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Blitz++: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array A(N,N,N), B(N,N,N); - initializeRandomDouble(A.data(), N*N*N); - initializeRandomDouble(B.data(), N*N*N); - TinyVector size = N-2; - double a = 0.34928313; - double b = - a; - - bench.start(); - for (long i=0; i < iters; ++i) - { - A += a * B; - A += b * B; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -void arrdaxpyFortran77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 77: N = " << N << endl; - cout.flush(); - - int iters = (int)bench.getIterations(); - - size_t arraySize = size_t(N) * size_t(N) * N; - - double* A = new double[arraySize]; - double* B = new double[arraySize]; - - initializeRandomDouble(A, arraySize); - initializeRandomDouble(B, arraySize); - - double a = 0.34928313; - - for (long i=0; i < iters; ++i) - { - arrdaxpyf(A,B,N,a); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - - delete [] A; - delete [] B; - } - - bench.endImplementation(); -} diff --git a/benchmarks/arrdaxpyf.f b/benchmarks/arrdaxpyf.f deleted file mode 100644 index 3df64ba0..00000000 --- a/benchmarks/arrdaxpyf.f +++ /dev/null @@ -1,26 +0,0 @@ - subroutine arrdaxpyf(A, B, N, c1) - integer N, iters - double precision A(N,N,N), B(N,N,N) - double precision c1 - double precision c2 - c2 = - c1 - - do k=2,N-1 - do j=2,N-1 - do i=2,N-1 - A(i,j,k) = A(i,j,k) + c1 * B(i,j,k); - enddo - enddo - enddo - - do k=2,N-1 - do j=2,N-1 - do i=2,N-1 - A(i,j,k) = A(i,j,k) + c2 * B(i,j,k); - enddo - enddo - enddo - - return - end - diff --git a/benchmarks/arrexpr1.cpp b/benchmarks/arrexpr1.cpp deleted file mode 100644 index 8010ac16..00000000 --- a/benchmarks/arrexpr1.cpp +++ /dev/null @@ -1,85 +0,0 @@ -// Array expression benchmark - -#include -#include - -using namespace blitz; - -void blitzVersion(BenchmarkExt& bench); -void CVersion(BenchmarkExt& bench); - -int main() -{ - BenchmarkExt bench("Array expression", 2); - - bench.beginBenchmarking(); - blitzVersion(bench); - CVersion(bench); - bench.endBenchmarking(); - - bench.saveMatlabGraph("arrexpr1.m"); - - return 0; -} - -void blitzVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Blitz++: N = " << N << endl; - - long iters = bench.getIterations(); - - Array x(N); - - // Tickle - x = 0.; - firstIndex i; - - bench.start(); - for (long it=0; it < iters; ++it) - { - x = i * i; - } - bench.stop(); - } - - bench.endImplementation(); -} - -void CVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("C"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "C: N = " << N << endl; - - long iters = bench.getIterations(); - - double* x = new double[N]; - - // Tickle - for (int i=0; i < N; ++i) - x[i] = 0; - - bench.start(); - for (long it=0; it < iters; ++it) - { - for (int i=0; i < N; ++i) - x[i] = i * i; - } - bench.stop(); - - delete [] x; - } - - bench.endImplementation(); -} - diff --git a/benchmarks/arrexpr1.m b/benchmarks/arrexpr1.m deleted file mode 100644 index a63dee2a..00000000 --- a/benchmarks/arrexpr1.m +++ /dev/null @@ -1,28 +0,0 @@ -% This matlab file generated automatically by class Benchmark -% of the Blitz++ class library. - -parm = [ 1.000000000000e+00 3.000000000000e+00 5.000000000000e+00 1.000000000000e+01 1.700000000000e+01 3.100000000000e+01 5.600000000000e+01 1.000000000000e+02 1.770000000000e+02 3.160000000000e+02 5.620000000000e+02 1.000000000000e+03 1.778000000000e+03 3.162000000000e+03 5.623000000000e+03 1.000000000000e+04 1.778200000000e+04 3.162200000000e+04 5.623400000000e+04 ]; - -Mf = [ 2.500000000000e+01 5.555555555556e+00 ; -8.333300000000e+00 6.249975000000e+00 ; -8.333333333333e+00 6.250000000000e+00 ; -6.250000000000e+00 6.250000000000e+00 ; -6.249837500000e+00 6.249837500000e+00 ; -6.249987500000e+00 5.555544444444e+00 ; -6.249600000000e+00 6.249600000000e+00 ; -6.250000000000e+00 6.250000000000e+00 ; -6.248100000000e+00 5.553866666667e+00 ; -6.248900000000e+00 6.248900000000e+00 ; -5.551311111111e+00 6.245225000000e+00 ; -6.250000000000e+00 6.250000000000e+00 ; -6.245225000000e+00 5.551311111111e+00 ; -5.551066666667e+00 5.551066666667e+00 ; -5.498044444444e+00 5.498044444444e+00 ; -5.555555555556e+00 5.000000000000e+00 ; -4.978960000000e+00 4.978960000000e+00 ; -4.743300000000e+00 5.929125000000e+00 ; -4.998577777778e+00 4.498720000000e+00 ] ; - -semilogx(parm,Mf), title('Array expression'), - xlabel('Vector length'), ylabel('Mflops/s') -legend('Blitz++', 'C') diff --git a/benchmarks/cfd.cpp b/benchmarks/cfd.cpp deleted file mode 100644 index 43c5176c..00000000 --- a/benchmarks/cfd.cpp +++ /dev/null @@ -1,129 +0,0 @@ -#include -#include - -using namespace blitz; - -/* - * The current implementation of stencil objects forces these variables - * to be placed in global scope. Ugh. This restriction will be removed - * eventually. - */ -double rho; // Density of fluid -double recip_rho; // 1/rho -double eta; // Kinematic viscosity -double time_now; // Elapsed seconds -double delta_t; // Time step -double volume; // Volume of a cell -double airPressure; // Air pressure (Pa) -double spatialStep; // Grid element size -double gravity; // Acceleration due to gravity -double gravityPressureGradient; // Pressure gradient due to gravity -/* - * The "geometry" object specifies how an array is mapped into real-world - * space. In this case, "UniformCubicGeometry" is used, which means that - * the real-world grid is orthogonal, regularly spaced, with the same spatial - * step in each dimension. - */ - -UniformCubicGeometry<3> geom; // Geometry -/* - * Some typedefs to make life easier. - */ - -typedef TinyVector vector3d; -typedef Array vectorField; -typedef Array scalarField; - -/*********** Timestep the velocity field ************ - * This is a 63-point stencil. For example, Laplacian3DVec4 turns into - * a 45-point stencil: each 2nd derivative is a 5-point stencil, and - * there are 9 of these derivatives to take the Laplacian of a 3D vector - * field. - */ - -BZ_DECLARE_STENCIL5(timestep, V, nextV, P, advect, force) - - nextV = *V + delta_t * ( recip_rho * ( - eta * Laplacian3DVec4(V,geom) - grad3D4(P, geom) + *force) - *advect); - -BZ_END_STENCIL -/* - * Allocate arrays and set their initial state - */ -void setup(const int N, vectorField& V, vectorField& nextV, scalarField& P, - scalarField& P_rhs, vectorField& advect, vectorField& force) -{ - // A 1m x 1m x 1m domain - spatialStep = 1.0 / (N - 1); - geom = UniformCubicGeometry<3>(spatialStep); - - // Allocate arrays - allocateArrays(shape(N,N,N), advect, V, nextV, force); // vector fields - allocateArrays(shape(N,N,N), P, P_rhs); // scalar fields - - // Since incompressibility is assumed, pressure only shows up as - // derivative terms in the equations. We choose airPressure = 0 - // as an arbitrary datum. - - airPressure = 0; // Pa - rho = 1000; // density of fluid, kg/m^3 - recip_rho = 1.0 / rho; // inverse of density - eta = 1.0e-6; // kinematic viscosity of fluid, m^2/s - gravity = 9.81; // m/s^2 - delta_t = 0.001; // initial time step, in seconds - volume = pow3(spatialStep); // cubic volume associated with grid point - - // Kludge: Set eta high, so that the flow will spread faster. - // This means the cube is filled with molasses, rather than water. - eta *= 1000; - - // Initial conditions: quiescent - V = 0.0; - P_rhs = 0.0; - advect = 0.0; - nextV = 0.0; - P = 0.0; - force = 0.0; -} - -// Calculate a simple check on a vector field -void record(vectorField& V) -{ - // Calculate the magnitude of a field - const int x=0, y=1, z=2; - double magx = sum(pow2(V[x])) / V.numElements(); - double magy = sum(pow2(V[y])) / V.numElements(); - double magz = sum(pow2(V[z])) / V.numElements(); - - cout << "norm = [" << magx - << " " << magy << " " << magz << " ]" << endl; -} - -void iterate(vectorField& V, vectorField& nextV, scalarField& P, - scalarField& P_rhs, vectorField& advect, vectorField& force) -{ - // Time step - applyStencil(timestep(), V, nextV, P, advect, force); -} - -int main() -{ - vectorField V, nextV; // Velocity fields - scalarField P, P_rhs; // Pressure fields - vectorField advect; // Advection field - vectorField force; // Forcing function - - const int N = 50; // Arrays are NxNxN - - setup(N, V, nextV, P, P_rhs, advect, force); - - const int nIters = 10; - - for (int i=0; i < nIters; ++i) - { - iterate(V, nextV, P, P_rhs, advect, force); - } - - return 0; -} - diff --git a/benchmarks/cfdf.f b/benchmarks/cfdf.f deleted file mode 100644 index d713aab6..00000000 --- a/benchmarks/cfdf.f +++ /dev/null @@ -1,103 +0,0 @@ - - PROGRAM CFDF - - PARAMETER ( N = 50, niters = 10 ) - - REAL VX(N,N,N), VY(N,N,N), VZ(N,N,N) - REAL VX2(N,N,N), VY2(N,N,N), VZ2(N,N,N) - REAL P(N,N,N) - REAL FX(N,N,N), FY(N,N,N), FZ(N,N,N) - REAL AX(N,N,N), AY(N,N,N), AZ(N,N,N) - - INTEGER iter - -C Initialize arrays - - CALL initialize(N, VX, VY, VZ, VX2, VY2, VZ2, P, FX, FY, FZ, - . AX, AY, AZ) - -C Apply the stencil a few times - - DO iter=1,niters - CALL cfdStencil(N, VX, VY, VZ, VX2, VY2, VZ2, P, - . FX, FY, FZ, AX, AY, AZ) - END DO - - STOP - END - - SUBROUTINE cfdStencil(N, VX, VY, VZ, VX2, VY2, VZ2, P, - . FX, FY, FZ, AX, AY, AZ) - - INTEGER N - REAL VX(N,N,N), VY(N,N,N), VZ(N,N,N) - REAL VX2(N,N,N), VY2(N,N,N), VZ2(N,N,N) - REAL P(N,N,N) - REAL FX(N,N,N), FY(N,N,N), FZ(N,N,N) - REAL AX(N,N,N), AY(N,N,N), AZ(N,N,N) - - PARAMETER ( delta_t = 0.001, recip_rho = 1.0e-3, - . eta = 1.0e-6, c1 = 0.1, c2 = 0.1 ) - - DO i=3,N-2 - DO j=3,N-2 - DO k=3,N-2 - VX2(i,j,k) = VX(i,j,k)+delta_t*(recip_rho*(eta* - . c1 * (-90*VX(i,j,k)-VX(i-2,j,k)+16*VX(i-1,j,k) - . +16*VX(i+1,j,k)-VX(i+2,j,k)-VX(i,j-2,k)+16*VX(i,j-1,k) - . +16*VX(i,j+1,k)-VX(i,j+2,k)-VX(i,j,k-2)+16*VX(i,j,k-1) - . +16*VX(i,j,k+1)-VX(i,j,k+2))+c2*(P(i-2,j,k) - . -8*P(i-1,j,k)+8*P(i+1,j,k)+P(i+2,j,k))+FX(i,j,k)) - . -AX(i,j,k)) - VY2(i,j,k) = VY(i,j,k)+delta_t*(recip_rho*(eta* - . c1 * (-90*VY(i,j,k)-VY(i-2,j,k)+16*VY(i-1,j,k) - . +16*VY(i+1,j,k)-VY(i+2,j,k)-VY(i,j-2,k)+16*VY(i,j-1,k) - . +16*VY(i,j+1,k)-VY(i,j+2,k)-VY(i,j,k-2)+16*VY(i,j,k-1) - . +16*VY(i,j,k+1)-VY(i,j,k+2))+c2*(P(i,j-2,k) - . -8*P(i,j-1,k)+8*P(i,j+1,k)+P(i,j+2,k))+FY(i,j,k)) - . -AY(i,j,k)) - VZ2(i,j,k) = VZ(i,j,k)+delta_t*(recip_rho*(eta* - . c1 * (-90*VZ(i,j,k)-VZ(i-2,j,k)+16*VZ(i-1,j,k) - . +16*VZ(i+1,j,k)-VZ(i+2,j,k)-VZ(i,j-2,k)+16*VZ(i,j-1,k) - . +16*VZ(i,j+1,k)-VZ(i,j+2,k)-VZ(i,j,k-2)+16*VZ(i,j,k-1) - . +16*VZ(i,j,k+1)-VZ(i,j,k+2))+c2*(P(i,j,k-2) - . -8*P(i,j,k-1)+8*P(i,j,k+1)+P(i,j,k+2))+FZ(i,j,k)) - . -AZ(i,j,k)) - END DO - END DO - END DO - - RETURN - END - - - - SUBROUTINE initialize(N, VX, VY, VZ, VX2, VY2, VZ2, P, - . FX, FY, FZ, AX, AY, AZ) - - INTEGER N - REAL VX(N,N,N), VY(N,N,N), VZ(N,N,N) - REAL VX2(N,N,N), VY2(N,N,N), VZ2(N,N,N) - REAL P(N,N,N) - REAL FX(N,N,N), FY(N,N,N), FZ(N,N,N) - REAL AX(N,N,N), AY(N,N,N), AZ(N,N,N) - - DO i=3,N-2 - DO j=3,N-2 - DO k=3,N-2 - VX(i,j,k) = 0 - VY(i,j,k) = 0 - VZ(i,j,k) = 0 - P(i,j,k) = 0 - FX(i,j,k) = 0 - FY(i,j,k) = 0 - FZ(i,j,k) = 0 - AX(i,j,k) = 0 - AY(i,j,k) = 0 - AZ(i,j,k) = 0 - END DO - END DO - END DO - - END - diff --git a/benchmarks/cfortran.h b/benchmarks/cfortran.h deleted file mode 100644 index ea0532eb..00000000 --- a/benchmarks/cfortran.h +++ /dev/null @@ -1,2090 +0,0 @@ -/* cfortran.h 3.5 */ /* anonymous ftp@zebra.desy.de */ -/* Burkhard Burow burow@desy.de 1990 - 1996. */ - -#ifndef __CFORTRAN_LOADED -#define __CFORTRAN_LOADED - -/* - THIS FILE IS PROPERTY OF BURKHARD BUROW. IF YOU ARE USING THIS FILE YOU - SHOULD ALSO HAVE ACCESS TO CFORTRAN.DOC WHICH PROVIDES TERMS FOR USING, - MODIFYING, COPYING AND DISTRIBUTING THE CFORTRAN.H PACKAGE. -*/ - -/* - Avoid symbols already used by compilers and system *.h: - __ - OSF1 zukal06 V3.0 347 alpha, cc -c -std1 cfortest.c - - */ - - -/* First prepare for the C compiler. */ - -#ifndef ANSI_C_preprocessor /* i.e. user can override. */ -#ifdef __CF__KnR -#define ANSI_C_preprocessor 0 -#else -#ifdef __STDC__ -#define ANSI_C_preprocessor 1 -#else -#define _cfleft 1 -#define _cfright -#define _cfleft_cfright 0 -#define ANSI_C_preprocessor _cfleft/**/_cfright -#endif -#endif -#endif - -#if ANSI_C_preprocessor -#define _0(A,B) A##B -#define _(A,B) _0(A,B) /* see cat,xcat of K&R ANSI C p. 231 */ -#define _2(A,B) A##B /* K&R ANSI C p.230: .. identifier is not replaced */ -#define _3(A,B,C) _(A,_(B,C)) -#else /* if it turns up again during rescanning. */ -#define _(A,B) A/**/B -#define _2(A,B) A/**/B -#define _3(A,B,C) A/**/B/**/C -#endif - -#if (defined(vax)&&defined(unix)) || (defined(__vax__)&&defined(__unix__)) -#define VAXUltrix -#endif - -#include /* NULL [in all machines stdio.h] */ -#include /* strlen, memset, memcpy, memchr. */ -#if !( defined(VAXUltrix) || defined(sun) || (defined(apollo)&&!defined(__STDCPP__)) ) -#include /* malloc,free */ -#else -#include /* Had to be removed for DomainOS h105 10.4 sys5.3 425t*/ -#ifdef apollo -#define __CF__APOLLO67 /* __STDCPP__ is in Apollo 6.8 (i.e. ANSI) and onwards */ -#endif -#endif - -#if !defined(__GNUC__) && !defined(__sun) && (defined(sun)||defined(VAXUltrix)||defined(lynx)) -#define __CF__KnR /* Sun, LynxOS and VAX Ultrix cc only supports K&R. */ - /* Manually define __CF__KnR for HP if desired/required.*/ -#endif /* i.e. We will generate Kernighan and Ritchie C. */ -/* Note that you may define __CF__KnR before #include cfortran.h, in order to -generate K&R C instead of the default ANSI C. The differences are mainly in the -function prototypes and declarations. All machines, except the Apollo, work -with either style. The Apollo's argument promotion rules require ANSI or use of -the obsolete std_$call which we have not implemented here. Hence on the Apollo, -only C calling FORTRAN subroutines will work using K&R style.*/ - - -/* Remainder of cfortran.h depends on the Fortran compiler. */ - -#ifdef CLIPPERFortran -#define f2cFortran -#endif - -/* VAX/VMS does not let us \-split long #if lines. */ -/* Split #if into 2 because some HP-UX can't handle long #if */ -#if !(defined(NAGf90Fortran)||defined(f2cFortran)||defined(hpuxFortran)||defined(apolloFortran)||defined(sunFortran)||defined(IBMR2Fortran)||defined(CRAYFortran)) -#if !(defined(mipsFortran)||defined(DECFortran)||defined(vmsFortran)||defined(CONVEXFortran)||defined(PowerStationFortran)||defined(AbsoftUNIXFortran)) -/* If no Fortran compiler is given, we choose one for the machines we know. */ -#if defined(lynx) || defined(VAXUltrix) -#define f2cFortran /* Lynx: Only support f2c at the moment. - VAXUltrix: f77 behaves like f2c. - Support f2c or f77 with gcc, vcc with f2c. - f77 with vcc works, missing link magic for f77 I/O.*/ -#endif -#if defined(__hpux) /* 921107: Use __hpux instead of __hp9000s300 */ -#define hpuxFortran /* Should also allow hp9000s7/800 use.*/ -#endif -#if defined(apollo) -#define apolloFortran /* __CF__APOLLO67 defines some behavior. */ -#endif -#if defined(sun) || defined(__sun) -#define sunFortran -#endif -#if defined(_IBMR2) -#define IBMR2Fortran -#endif -#if defined(_CRAY) -#define CRAYFortran /* _CRAY2 defines some behavior. */ -#endif -#if defined(mips) || defined(__mips) -#define mipsFortran -#endif -#if defined(vms) || defined(__vms) -#define vmsFortran -#endif -#if defined(__alpha) && defined(__unix__) -#define DECFortran -#endif -#if defined(__convex__) -#define CONVEXFortran -#endif -#if defined(VISUAL_CPLUSPLUS) -#define PowerStationFortran -#endif -#endif /* ...Fortran */ -#endif /* ...Fortran */ - -/* Split #if into 2 because some HP-UX can't handle long #if */ -#if !(defined(NAGf90Fortran)||defined(f2cFortran)||defined(hpuxFortran)||defined(apolloFortran)||defined(sunFortran)||defined(IBMR2Fortran)||defined(CRAYFortran)) -#if !(defined(mipsFortran)||defined(DECFortran)||defined(vmsFortran)||defined(CONVEXFortran)||defined(PowerStationFortran)||defined(AbsoftUNIXFortran)) -/* Apologies for the trigraph, but some compilers barf on #error. */ -??=error "cfortran.h: Can't find your environment among:\ - - MIPS cc and f77 2.0. (e.g. Silicon Graphics, DECstations, ...) \ - - IBM AIX XL C and FORTRAN Compiler/6000 Version 01.01.0000.0000 \ - - VAX VMS CC 3.1 and FORTRAN 5.4. \ - - Alpha VMS DEC C 1.3 and DEC FORTRAN 6.0. \ - - Alpha OSF DEC C and DEC Fortran for OSF/1 AXP Version 1.2 \ - - Apollo DomainOS 10.2 (sys5.3) with f77 10.7 and cc 6.7. \ - - CRAY \ - - CONVEX \ - - Sun \ - - PowerStation Fortran with Visual C++ \ - - HP9000s300/s700/s800 Latest test with: HP-UX A.08.07 A 9000/730 \ - - LynxOS: cc or gcc with f2c. \ - - VAXUltrix: vcc,cc or gcc with f2c. gcc or cc with f77. \ - - f77 with vcc works; but missing link magic for f77 I/O. \ - - NO fort. None of gcc, cc or vcc generate required names.\ - - f2c : Use #define f2cFortran, or cc -Df2cFortran \ - - NAG f90: Use #define NAGf90Fortran, or cc -DNAGf90Fortran \ - - Absoft UNIX F77: Use #define AbsoftUNIXFortran or cc -DAbsoftUNIXFortran" -/* Compiler must throw us out at this point! */ -#endif -#endif - - -#if defined(VAXC) && !defined(__VAXC) -#define OLD_VAXC -#pragma nostandard /* Prevent %CC-I-PARAMNOTUSED. */ -#endif - -/* Throughout cfortran.h we use: UN = Uppercase Name. LN = Lowercase Name. */ - -#if defined(f2cFortran) || defined(NAGf90Fortran) || defined(DECFortran) || defined(mipsFortran) || defined(apolloFortran) || defined(sunFortran) || defined(CONVEXFortran) || defined(extname) -#define CFC_(UN,LN) _(LN,_) /* Lowercase FORTRAN symbols. */ -#define orig_fcallsc(UN,LN) CFC_(UN,LN) -#else -#if defined(CRAYFortran) || defined(PowerStationFortran) -#define CFC_(UN,LN) UN /* Uppercase FORTRAN symbols. */ -#define orig_fcallsc(UN,LN) CFC_(UN,LN) /* CRAY insists on arg.'s here. */ -#else /* For following machines one may wish to change the fcallsc default. */ -#define CF_SAME_NAMESPACE -#ifdef vmsFortran -#define CFC_(UN,LN) LN /* Either case FORTRAN symbols. */ - /* BUT we usually use UN for C macro to FORTRAN routines, so use LN here,*/ - /* because VAX/VMS doesn't do recursive macros. */ -#define orig_fcallsc(UN,LN) UN -#else /* HP-UX without +ppu or IBMR2 without -qextname. NOT reccomended. */ -#define CFC_(UN,LN) LN /* Lowercase FORTRAN symbols. */ -#define orig_fcallsc(UN,LN) CFC_(UN,LN) -#endif /* vmsFortran */ -#endif /* CRAYFortran */ -#endif /* ....Fortran */ - -#define fcallsc(UN,LN) orig_fcallsc(UN,LN) -#define preface_fcallsc(P,p,UN,LN) CFC_(_(P,UN),_(p,LN)) -#define append_fcallsc(P,p,UN,LN) CFC_(_(UN,P),_(LN,p)) - -#define C_FUNCTION(UN,LN) fcallsc(UN,LN) -#define FORTRAN_FUNCTION(UN,LN) CFC_(UN,LN) - -#ifndef COMMON_BLOCK -#ifndef CONVEXFortran -#ifndef CLIPPERFortran -#define COMMON_BLOCK(UN,LN) CFC_(UN,LN) -#else -#define COMMON_BLOCK(UN,LN) _(LN,__) -#endif -#else -#define COMMON_BLOCK(UN,LN) _3(_,LN,_) -#endif -#endif - -#ifdef CRAYFortran -#ifdef _CRAY -#include -#else -#include "fortran.h" /* i.e. if crosscompiling assume user has file. */ -#endif -#ifndef DOUBLE_PRECISION -#define DOUBLE_PRECISION long double -#endif -#define FLOATVVVVVVV_cfPP (float *) /* Used for C calls FORTRAN. */ -/* CRAY's double==float but CRAY says pointers to doubles and floats are diff.*/ -#define VOIDP (void *) /* When FORTRAN calls C, we don't know if C routine - arg.'s have been declared float *, or double *. */ -#else -#ifndef DOUBLE_PRECISION -#define DOUBLE_PRECISION double -#endif -#define FLOATVVVVVVV_cfPP -#define VOIDP -#endif - -#ifdef vmsFortran -#if defined(vms) || defined(__vms) -#include -#else -#include "descrip.h" /* i.e. if crosscompiling assume user has file. */ -#endif -#endif - -#ifdef sunFortran -#if defined(sun) || defined(__sun) -#include /* Sun's FLOATFUNCTIONTYPE, ASSIGNFLOAT, RETURNFLOAT. */ -#else -#include "math.h" /* i.e. if crosscompiling assume user has file. */ -#endif -/* At least starting with the default C compiler SC3.0.1 of SunOS 5.3, - * FLOATFUNCTIONTYPE, ASSIGNFLOAT, RETURNFLOAT are not required and not in - * , since sun C no longer promotes C float return values to doubles. - * Therefore, only use them if defined. - * Even if gcc is being used, assume that it exhibits the Sun C compiler - * behavior in order to be able to use *.o from the Sun C compiler. - * i.e. If FLOATFUNCTIONTYPE, etc. are in math.h, they required by gcc. - */ -#endif - -#ifndef apolloFortran -#define COMMON_BLOCK_DEF(DEFINITION, NAME) extern DEFINITION NAME -#define CF_NULL_PROTO -#else /* HP doesn't understand #elif. */ -/* Without ANSI prototyping, Apollo promotes float functions to double. */ -/* Note that VAX/VMS, IBM, Mips choke on 'type function(...);' prototypes. */ -#define CF_NULL_PROTO ... -#ifndef __CF__APOLLO67 -#define COMMON_BLOCK_DEF(DEFINITION, NAME) \ - DEFINITION NAME __attribute((__section(NAME))) -#else -#define COMMON_BLOCK_DEF(DEFINITION, NAME) \ - DEFINITION NAME #attribute[section(NAME)] -#endif -#endif - -#ifdef __cplusplus -#undef CF_NULL_PROTO -#define CF_NULL_PROTO ... -#endif - -#ifdef mipsFortran -#define CF_DECLARE_GETARG int f77argc; char **f77argv -#define CF_SET_GETARG(ARGC,ARGV) f77argc = ARGC; f77argv = ARGV -#else -#define CF_DECLARE_GETARG -#define CF_SET_GETARG(ARGC,ARGV) -#endif - -#ifdef OLD_VAXC /* Allow %CC-I-PARAMNOTUSED. */ -#pragma standard -#endif - -#define ACOMMA , -#define ACOLON ; - -/*-------------------------------------------------------------------------*/ - -/* UTILITIES USED WITHIN CFORTRAN.H */ - -#define _cfMIN(A,B) (As) { /* Need this to handle NULL string.*/ - while (e>s && *--e==t); /* Don't follow t's past beginning. */ - e[*e==t?0:1] = '\0'; /* Handle s[0]=t correctly. */ -} return s; } - -/* kill_trailingn(s,t,e) will kill the trailing t's in string s. e normally -points to the terminating '\0' of s, but may actually point to anywhere in s. -s's new '\0' will be placed at e or earlier in order to remove any trailing t's. -If es) { /* Watch out for neg. length string.*/ - while (e>s && *--e==t); /* Don't follow t's past beginning. */ - e[*e==t?0:1] = '\0'; /* Handle s[0]=t correctly. */ -} return s; } - -/* Note the following assumes that any element which has t's to be chopped off, -does indeed fill the entire element. */ -#ifndef __CF__KnR -static char *vkill_trailing(char* cstr, int elem_len, int sizeofcstr, char t) -#else -static char *vkill_trailing( cstr, elem_len, sizeofcstr, t) - char* cstr; int elem_len; int sizeofcstr; char t; -#endif -{ int i; -for (i=0; i= 4.3 gives message: - zow35> cc -c -DDECFortran cfortest.c - cfe: Fatal: Out of memory: cfortest.c - zow35> - Old __hpux had the problem, but new 'HP-UX A.09.03 A 9000/735' is fine - if using -Aa, otherwise we have a problem. - */ -#ifndef MAX_PREPRO_ARGS -#if !defined(__GNUC__) && (defined(VAXUltrix) || defined(__CF__APOLLO67) || (defined(sun)&&!defined(__sun)) || defined(_CRAY) || defined(__ultrix__) || (defined(__hpux)&&defined(__CF__KnR))) -#define MAX_PREPRO_ARGS 31 -#else -#define MAX_PREPRO_ARGS 99 -#endif -#endif - -#if defined(AbsoftUNIXFortran) -/* In addition to explicit Absoft stuff, only Absoft requires: - - DEFAULT coming from _cfSTR. - DEFAULT could have been called e.g. INT, but keep it for clarity. - - M term in CFARGT14 and CFARGT14FS. - */ -#define ABSOFT_cf1(T0) _(T0,_cfSTR)(0,ABSOFT1,0,0,0,0,0) -#define ABSOFT_cf2(T0) _(T0,_cfSTR)(0,ABSOFT2,0,0,0,0,0) -#define ABSOFT_cf3(T0) _(T0,_cfSTR)(0,ABSOFT3,0,0,0,0,0) -#define DEFAULT_cfABSOFT1 -#define LOGICAL_cfABSOFT1 -#define STRING_cfABSOFT1 ,MAX_LEN_FORTRAN_FUNCTION_STRING -#define DEFAULT_cfABSOFT2 -#define LOGICAL_cfABSOFT2 -#define STRING_cfABSOFT2 ,unsigned D0 -#define DEFAULT_cfABSOFT3 -#define LOGICAL_cfABSOFT3 -#define STRING_cfABSOFT3 ,D0 -#else -#define ABSOFT_cf1(T0) -#define ABSOFT_cf2(T0) -#define ABSOFT_cf3(T0) -#endif - -#define CFARGT14S(S,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - S(T1,1) S(T2,2) S(T3,3) S(T4,4) S(T5,5) S(T6,6) S(T7,7) \ - S(T8,8) S(T9,9) S(TA,A) S(TB,B) S(TC,C) S(TD,D) S(TE,E) -#define CFARGT14FS(F,S,M,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - F(T1,1,0) F(T2,2,1) F(T3,3,1) F(T4,4,1) F(T5,5,1) F(T6,6,1) F(T7,7,1) \ - F(T8,8,1) F(T9,9,1) F(TA,A,1) F(TB,B,1) F(TC,C,1) F(TD,D,1) F(TE,E,1) \ - M CFARGT14S(S,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) - -#if !(defined(PowerStationFortran)||defined(hpuxFortran800)) -/* Old CFARGT14 -> CFARGT14FS as seen below, for Absoft cross-compile yields: - SunOS> cc -c -Xa -DAbsoftUNIXFortran c.c - "c.c", line 406: warning: argument mismatch - Haven't checked if this is ANSI C or a SunOS bug. SunOS -Xs works ok. - Behavior is most clearly seen in example: - #define A 1 , 2 - #define C(X,Y,Z) x=X. y=Y. z=Z. - #define D(X,Y,Z) C(X,Y,Z) - D(x,A,z) - Output from preprocessor is: x = x . y = 1 . z = 2 . - #define CFARGT14(F,S,M,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - CFARGT14FS(F,S,M,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) -*/ -#define CFARGT14(F,S,M,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - F(T1,1,0) F(T2,2,1) F(T3,3,1) F(T4,4,1) F(T5,5,1) F(T6,6,1) F(T7,7,1) \ - F(T8,8,1) F(T9,9,1) F(TA,A,1) F(TB,B,1) F(TC,C,1) F(TD,D,1) F(TE,E,1) \ - M CFARGT14S(S,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) -#define CFARGT20(F,S,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK) \ - F(T1,1,0) F(T2,2,1) F(T3,3,1) F(T4,4,1) F(T5,5,1) F(T6,6,1) F(T7,7,1) \ - F(T8,8,1) F(T9,9,1) F(TA,A,1) F(TB,B,1) F(TC,C,1) F(TD,D,1) F(TE,E,1) \ - F(TF,F,1) F(TG,G,1) F(TH,H,1) F(TI,I,1) F(TJ,J,1) F(TK,K,1) \ - S(T1,1) S(T2,2) S(T3,3) S(T4,4) S(T5,5) S(T6,6) S(T7,7) \ - S(T8,8) S(T9,9) S(TA,A) S(TB,B) S(TC,C) S(TD,D) S(TE,E) \ - S(TF,F) S(TG,G) S(TH,H) S(TI,I) S(TJ,J) S(TK,K) -#define CFARGTA14(F,S,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE) \ - F(T1,A1,1,0) F(T2,A2,2,1) F(T3,A3,3,1) F(T4,A4,4,1) F(T5,A5,5,1) F(T6,A6,6,1) \ - F(T7,A7,7,1) F(T8,A8,8,1) F(T9,A9,9,1) F(TA,AA,A,1) F(TB,AB,B,1) F(TC,AC,C,1) \ - F(TD,AD,D,1) F(TE,AE,E,1) S(T1,1) S(T2,2) S(T3,3) S(T4,4) \ - S(T5,5) S(T6,6) S(T7,7) S(T8,8) S(T9,9) S(TA,A) \ - S(TB,B) S(TC,C) S(TD,D) S(TE,E) -#if MAX_PREPRO_ARGS>31 -#define CFARGTA20(F,S,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI,AJ,AK) \ - F(T1,A1,1,0) F(T2,A2,2,1) F(T3,A3,3,1) F(T4,A4,4,1) F(T5,A5,5,1) F(T6,A6,6,1) \ - F(T7,A7,7,1) F(T8,A8,8,1) F(T9,A9,9,1) F(TA,AA,A,1) F(TB,AB,B,1) F(TC,AC,C,1) \ - F(TD,AD,D,1) F(TE,AE,E,1) F(TF,AF,F,1) F(TG,AG,G,1) F(TH,AH,H,1) F(TI,AI,I,1) \ - F(TJ,AJ,J,1) F(TK,AK,K,1) S(T1,1) S(T2,2) S(T3,3) S(T4,4) \ - S(T5,5) S(T6,6) S(T7,7) S(T8,8) S(T9,9) S(TA,A) \ - S(TB,B) S(TC,C) S(TD,D) S(TE,E) S(TF,F) S(TG,G) \ - S(TH,H) S(TI,I) S(TJ,J) S(TK,K) -#endif -#else -#define CFARGT14(F,S,M,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - F(T1,1,0) S(T1,1) F(T2,2,1) S(T2,2) F(T3,3,1) S(T3,3) F(T4,4,1) S(T4,4) \ - F(T5,5,1) S(T5,5) F(T6,6,1) S(T6,6) F(T7,7,1) S(T7,7) F(T8,8,1) S(T8,8) \ - F(T9,9,1) S(T9,9) F(TA,A,1) S(TA,A) F(TB,B,1) S(TB,B) F(TC,C,1) S(TC,C) \ - F(TD,D,1) S(TD,D) F(TE,E,1) S(TE,E) -#define CFARGT20(F,S,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK) \ - F(T1,1,0) S(T1,1) F(T2,2,1) S(T2,2) F(T3,3,1) S(T3,3) F(T4,4,1) S(T4,4) \ - F(T5,5,1) S(T5,5) F(T6,6,1) S(T6,6) F(T7,7,1) S(T7,7) F(T8,8,1) S(T8,8) \ - F(T9,9,1) S(T9,9) F(TA,A,1) S(TA,A) F(TB,B,1) S(TB,B) F(TC,C,1) S(TC,C) \ - F(TD,D,1) S(TD,D) F(TE,E,1) S(TE,E) F(TF,F,1) S(TF,F) F(TG,G,1) S(TG,G) \ - F(TH,H,1) S(TH,H) F(TI,I,1) S(TI,I) F(TJ,J,1) S(TJ,J) F(TK,K,1) S(TK,K) -#define CFARGTA14(F,S,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE) \ - F(T1,A1,1,0) S(T1,1) F(T2,A2,2,1) S(T2,2) F(T3,A3,3,1) S(T3,3) \ - F(T4,A4,4,1) S(T4,4) F(T5,A5,5,1) S(T5,5) F(T6,A6,6,1) S(T6,6) \ - F(T7,A7,7,1) S(T7,7) F(T8,A8,8,1) S(T8,8) F(T9,A9,9,1) S(T9,9) \ - F(TA,AA,A,1) S(TA,A) F(TB,AB,B,1) S(TB,B) F(TC,AC,C,1) S(TC,C) \ - F(TD,AD,D,1) S(TD,D) F(TE,AE,E,1) S(TE,E) -#if MAX_PREPRO_ARGS>31 -#define CFARGTA20(F,S,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI,AJ,AK) \ - F(T1,A1,1,0) S(T1,1) F(T2,A2,2,1) S(T2,2) F(T3,A3,3,1) S(T3,3) \ - F(T4,A4,4,1) S(T4,4) F(T5,A5,5,1) S(T5,5) F(T6,A6,6,1) S(T6,6) \ - F(T7,A7,7,1) S(T7,7) F(T8,A8,8,1) S(T8,8) F(T9,A9,9,1) S(T9,9) \ - F(TA,AA,A,1) S(TA,A) F(TB,AB,B,1) S(TB,B) F(TC,AC,C,1) S(TC,C) \ - F(TD,AD,D,1) S(TD,D) F(TE,AE,E,1) S(TE,E) F(TF,AF,F,1) S(TF,F) \ - F(TG,AG,G,1) S(TG,G) F(TH,AH,H,1) S(TH,H) F(TI,AI,I,1) S(TI,I) \ - F(TJ,AJ,J,1) S(TJ,J) F(TK,AK,K,1) S(TK,K) -#endif -#endif - - -#define PROTOCCALLSFSUB1( UN,LN,T1) \ - PROTOCCALLSFSUB14(UN,LN,T1,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB2( UN,LN,T1,T2) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB3( UN,LN,T1,T2,T3) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB4( UN,LN,T1,T2,T3,T4) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB5( UN,LN,T1,T2,T3,T4,T5) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB6( UN,LN,T1,T2,T3,T4,T5,T6) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB7( UN,LN,T1,T2,T3,T4,T5,T6,T7) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB8( UN,LN,T1,T2,T3,T4,T5,T6,T7,T8) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB9( UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB10(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB11(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB12(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,CF_0,CF_0) -#define PROTOCCALLSFSUB13(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,CF_0) - - -#define PROTOCCALLSFSUB15(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF) \ - PROTOCCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB16(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG) \ - PROTOCCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB17(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH) \ - PROTOCCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB18(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI) \ - PROTOCCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,CF_0,CF_0) -#define PROTOCCALLSFSUB19(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ) \ - PROTOCCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,CF_0) - - -#ifndef FCALLSC_QUALIFIER -#ifdef VISUAL_CPLUSPLUS -#define FCALLSC_QUALIFIER __stdcall -#else -#define FCALLSC_QUALIFIER -#endif -#endif - -#ifdef __cplusplus -#define CFextern extern "C" -#else -#define CFextern extern -#endif - - -#ifdef CFSUBASFUN -#define PROTOCCALLSFSUB0(UN,LN) \ - PROTOCCALLSFFUN0( VOID,UN,LN) -#define PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - PROTOCCALLSFFUN14(VOID,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) -#define PROTOCCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK)\ - PROTOCCALLSFFUN20(VOID,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK) -#else -/* Note: Prevent compiler warnings, null #define PROTOCCALLSFSUB14/20 after - #include-ing cfortran.h if calling the FORTRAN wrapper within the same - source code where the wrapper is created. */ -#define PROTOCCALLSFSUB0(UN,LN) CFextern void FCALLSC_QUALIFIER CFC_(UN,LN)(); -#ifndef __CF__KnR -#define PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - CFextern void FCALLSC_QUALIFIER CFC_(UN,LN)( CFARGT14(NCF,KCF,,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) ); -#define PROTOCCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK)\ - CFextern void FCALLSC_QUALIFIER CFC_(UN,LN)( CFARGT20(NCF,KCF,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK) ); -#else -#define PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - PROTOCCALLSFSUB0(UN,LN) -#define PROTOCCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK) \ - PROTOCCALLSFSUB0(UN,LN) -#endif -#endif - - -#ifdef OLD_VAXC /* Allow %CC-I-PARAMNOTUSED. */ -#pragma standard -#endif - - -#define CCALLSFSUB1( UN,LN,T1, A1) \ - CCALLSFSUB5 (UN,LN,T1,CF_0,CF_0,CF_0,CF_0,A1,0,0,0,0) -#define CCALLSFSUB2( UN,LN,T1,T2, A1,A2) \ - CCALLSFSUB5 (UN,LN,T1,T2,CF_0,CF_0,CF_0,A1,A2,0,0,0) -#define CCALLSFSUB3( UN,LN,T1,T2,T3, A1,A2,A3) \ - CCALLSFSUB5 (UN,LN,T1,T2,T3,CF_0,CF_0,A1,A2,A3,0,0) -#define CCALLSFSUB4( UN,LN,T1,T2,T3,T4, A1,A2,A3,A4)\ - CCALLSFSUB5 (UN,LN,T1,T2,T3,T4,CF_0,A1,A2,A3,A4,0) -#define CCALLSFSUB5( UN,LN,T1,T2,T3,T4,T5, A1,A2,A3,A4,A5) \ - CCALLSFSUB10(UN,LN,T1,T2,T3,T4,T5,CF_0,CF_0,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,0,0,0,0,0) -#define CCALLSFSUB6( UN,LN,T1,T2,T3,T4,T5,T6, A1,A2,A3,A4,A5,A6) \ - CCALLSFSUB10(UN,LN,T1,T2,T3,T4,T5,T6,CF_0,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,0,0,0,0) -#define CCALLSFSUB7( UN,LN,T1,T2,T3,T4,T5,T6,T7, A1,A2,A3,A4,A5,A6,A7) \ - CCALLSFSUB10(UN,LN,T1,T2,T3,T4,T5,T6,T7,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,0,0,0) -#define CCALLSFSUB8( UN,LN,T1,T2,T3,T4,T5,T6,T7,T8, A1,A2,A3,A4,A5,A6,A7,A8) \ - CCALLSFSUB10(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,0,0) -#define CCALLSFSUB9( UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,A1,A2,A3,A4,A5,A6,A7,A8,A9)\ - CCALLSFSUB10(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,0) -#define CCALLSFSUB10(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA)\ - CCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,CF_0,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,0,0,0,0) -#define CCALLSFSUB11(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB)\ - CCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,0,0,0) -#define CCALLSFSUB12(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC)\ - CCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,0,0) -#define CCALLSFSUB13(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD)\ - CCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,0) - -#ifdef __cplusplus -#define CPPPROTOCLSFSUB0( UN,LN) -#define CPPPROTOCLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) -#define CPPPROTOCLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK) -#else -#define CPPPROTOCLSFSUB0(UN,LN) \ - PROTOCCALLSFSUB0(UN,LN) -#define CPPPROTOCLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) -#define CPPPROTOCLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK) \ - PROTOCCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK) -#endif - -#ifdef CFSUBASFUN -#define CCALLSFSUB0(UN,LN) CCALLSFFUN0(UN,LN) -#define CCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE)\ - CCALLSFFUN14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE) -#else -/* do{...}while(FALSE) allows if(a==b) FORT(); else BORT(); */ -#define CCALLSFSUB0( UN,LN) do{CPPPROTOCLSFSUB0(UN,LN) CFC_(UN,LN)();}while(FALSE) -#define CCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE)\ -do{VVCF(T1,A1,B1) VVCF(T2,A2,B2) VVCF(T3,A3,B3) VVCF(T4,A4,B4) VVCF(T5,A5,B5) \ - VVCF(T6,A6,B6) VVCF(T7,A7,B7) VVCF(T8,A8,B8) VVCF(T9,A9,B9) VVCF(TA,AA,BA) \ - VVCF(TB,AB,BB) VVCF(TC,AC,BC) VVCF(TD,AD,BD) VVCF(TE,AE,BE) \ - CPPPROTOCLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - ACF(LN,T1,A1,1) ACF(LN,T2,A2,2) ACF(LN,T3,A3,3) \ - ACF(LN,T4,A4,4) ACF(LN,T5,A5,5) ACF(LN,T6,A6,6) ACF(LN,T7,A7,7) \ - ACF(LN,T8,A8,8) ACF(LN,T9,A9,9) ACF(LN,TA,AA,A) ACF(LN,TB,AB,B) \ - ACF(LN,TC,AC,C) ACF(LN,TD,AD,D) ACF(LN,TE,AE,E) \ - CFC_(UN,LN)( CFARGTA14(AACF,JCF,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE) );\ - WCF(T1,A1,1) WCF(T2,A2,2) WCF(T3,A3,3) WCF(T4,A4,4) WCF(T5,A5,5) \ - WCF(T6,A6,6) WCF(T7,A7,7) WCF(T8,A8,8) WCF(T9,A9,9) WCF(TA,AA,A) \ - WCF(TB,AB,B) WCF(TC,AC,C) WCF(TD,AD,D) WCF(TE,AE,E) }while(FALSE) -#endif - - -#if MAX_PREPRO_ARGS>31 -#define CCALLSFSUB15(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF)\ - CCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,CF_0,CF_0,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,0,0,0,0,0) -#define CCALLSFSUB16(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG)\ - CCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,CF_0,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,0,0,0,0) -#define CCALLSFSUB17(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH)\ - CCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,0,0,0) -#define CCALLSFSUB18(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI)\ - CCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI,0,0) -#define CCALLSFSUB19(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI,AJ)\ - CCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI,AJ,0) - -#ifdef CFSUBASFUN -#define CCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH, \ - TI,TJ,TK, A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI,AJ,AK) \ - CCALLSFFUN20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH, \ - TI,TJ,TK, A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI,AJ,AK) -#else -#define CCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH, \ - TI,TJ,TK, A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI,AJ,AK) \ -do{VVCF(T1,A1,B1) VVCF(T2,A2,B2) VVCF(T3,A3,B3) VVCF(T4,A4,B4) VVCF(T5,A5,B5) \ - VVCF(T6,A6,B6) VVCF(T7,A7,B7) VVCF(T8,A8,B8) VVCF(T9,A9,B9) VVCF(TA,AA,BA) \ - VVCF(TB,AB,BB) VVCF(TC,AC,BC) VVCF(TD,AD,BD) VVCF(TE,AE,BE) VVCF(TF,AF,BF) \ - VVCF(TG,AG,BG) VVCF(TH,AH,BH) VVCF(TI,AI,BI) VVCF(TJ,AJ,BJ) VVCF(TK,AK,BK) \ - CPPPROTOCLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK) \ - ACF(LN,T1,A1,1) ACF(LN,T2,A2,2) ACF(LN,T3,A3,3) ACF(LN,T4,A4,4) \ - ACF(LN,T5,A5,5) ACF(LN,T6,A6,6) ACF(LN,T7,A7,7) ACF(LN,T8,A8,8) \ - ACF(LN,T9,A9,9) ACF(LN,TA,AA,A) ACF(LN,TB,AB,B) ACF(LN,TC,AC,C) \ - ACF(LN,TD,AD,D) ACF(LN,TE,AE,E) ACF(LN,TF,AF,F) ACF(LN,TG,AG,G) \ - ACF(LN,TH,AH,H) ACF(LN,TI,AI,I) ACF(LN,TJ,AJ,J) ACF(LN,TK,AK,K) \ - CFC_(UN,LN)( CFARGTA20(AACF,JCF,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI,AJ,AK) ); \ - WCF(T1,A1,1) WCF(T2,A2,2) WCF(T3,A3,3) WCF(T4,A4,4) WCF(T5,A5,5) WCF(T6,A6,6) \ - WCF(T7,A7,7) WCF(T8,A8,8) WCF(T9,A9,9) WCF(TA,AA,A) WCF(TB,AB,B) WCF(TC,AC,C) \ - WCF(TD,AD,D) WCF(TE,AE,E) WCF(TF,AF,F) WCF(TG,AG,G) WCF(TH,AH,H) WCF(TI,AI,I) \ - WCF(TJ,AJ,J) WCF(TK,AK,K) }while(FALSE) -#endif -#endif /* MAX_PREPRO_ARGS */ - -/*-------------------------------------------------------------------------*/ - -/* UTILITIES FOR C TO CALL FORTRAN FUNCTIONS */ - -/*N.B. PROTOCCALLSFFUNn(..) generates code, whether or not the FORTRAN - function is called. Therefore, especially for creator's of C header files - for large FORTRAN libraries which include many functions, to reduce - compile time and object code size, it may be desirable to create - preprocessor directives to allow users to create code for only those - functions which they use. */ - -/* The following defines the maximum length string that a function can return. - Of course it may be undefine-d and re-define-d before individual - PROTOCCALLSFFUNn(..) as required. It would also be nice to have this derived - from the individual machines' limits. */ -#define MAX_LEN_FORTRAN_FUNCTION_STRING 0x4FE - -/* The following defines a character used by CFORTRAN.H to flag the end of a - string coming out of a FORTRAN routine. */ -#define CFORTRAN_NON_CHAR 0x7F - -#ifdef OLD_VAXC /* Prevent %CC-I-PARAMNOTUSED. */ -#pragma nostandard -#endif - -#define _SEP_(TN,C,COMMA) _(__SEP_,C)(TN,COMMA) -#define __SEP_0(TN,COMMA) -#define __SEP_1(TN,COMMA) _Icf(2,SEP,TN,COMMA,0) -#define INT_cfSEP(T,B) _(A,B) -#define INTV_cfSEP(T,B) INT_cfSEP(T,B) -#define INTVV_cfSEP(T,B) INT_cfSEP(T,B) -#define INTVVV_cfSEP(T,B) INT_cfSEP(T,B) -#define INTVVVV_cfSEP(T,B) INT_cfSEP(T,B) -#define INTVVVVV_cfSEP(T,B) INT_cfSEP(T,B) -#define INTVVVVVV_cfSEP(T,B) INT_cfSEP(T,B) -#define INTVVVVVVV_cfSEP(T,B) INT_cfSEP(T,B) -#define PINT_cfSEP(T,B) INT_cfSEP(T,B) -#define PVOID_cfSEP(T,B) INT_cfSEP(T,B) -#define ROUTINE_cfSEP(T,B) INT_cfSEP(T,B) -#define SIMPLE_cfSEP(T,B) INT_cfSEP(T,B) -#define VOID_cfSEP(T,B) INT_cfSEP(T,B) /* For FORTRAN calls C subr.s.*/ -#define STRING_cfSEP(T,B) INT_cfSEP(T,B) -#define STRINGV_cfSEP(T,B) INT_cfSEP(T,B) -#define PSTRING_cfSEP(T,B) INT_cfSEP(T,B) -#define PSTRINGV_cfSEP(T,B) INT_cfSEP(T,B) -#define PNSTRING_cfSEP(T,B) INT_cfSEP(T,B) -#define PPSTRING_cfSEP(T,B) INT_cfSEP(T,B) -#define ZTRINGV_cfSEP(T,B) INT_cfSEP(T,B) -#define PZTRINGV_cfSEP(T,B) INT_cfSEP(T,B) - -#if defined(SIGNED_BYTE) || !defined(UNSIGNED_BYTE) -#ifdef OLD_VAXC -#define INTEGER_BYTE char /* Old VAXC barfs on 'signed char' */ -#else -#define INTEGER_BYTE signed char /* default */ -#endif -#else -#define INTEGER_BYTE unsigned char -#endif -#define BYTEVVVVVVV_cfTYPE INTEGER_BYTE -#define DOUBLEVVVVVVV_cfTYPE DOUBLE_PRECISION -#define FLOATVVVVVVV_cfTYPE float -#define INTVVVVVVV_cfTYPE int -#define LOGICALVVVVVVV_cfTYPE int -#define LONGVVVVVVV_cfTYPE long -#define SHORTVVVVVVV_cfTYPE short -#define PBYTE_cfTYPE INTEGER_BYTE -#define PDOUBLE_cfTYPE DOUBLE_PRECISION -#define PFLOAT_cfTYPE float -#define PINT_cfTYPE int -#define PLOGICAL_cfTYPE int -#define PLONG_cfTYPE long -#define PSHORT_cfTYPE short - -#define CFARGS0(A,T,V,W,X,Y,Z) _3(T,_cf,A) -#define CFARGS1(A,T,V,W,X,Y,Z) _3(T,_cf,A)(V) -#define CFARGS2(A,T,V,W,X,Y,Z) _3(T,_cf,A)(V,W) -#define CFARGS3(A,T,V,W,X,Y,Z) _3(T,_cf,A)(V,W,X) -#define CFARGS4(A,T,V,W,X,Y,Z) _3(T,_cf,A)(V,W,X,Y) -#define CFARGS5(A,T,V,W,X,Y,Z) _3(T,_cf,A)(V,W,X,Y,Z) - -#define _Icf(N,T,I,X,Y) _(I,_cfINT)(N,T,I,X,Y,0) -#define _Icf4(N,T,I,X,Y,Z) _(I,_cfINT)(N,T,I,X,Y,Z) -#define BYTE_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z) -#define DOUBLE_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,INT,B,X,Y,Z,0) -#define FLOAT_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z) -#define INT_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z) -#define LOGICAL_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z) -#define LONG_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z) -#define SHORT_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z) -#define PBYTE_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z) -#define PDOUBLE_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,PINT,B,X,Y,Z,0) -#define PFLOAT_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z) -#define PINT_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z) -#define PLOGICAL_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z) -#define PLONG_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z) -#define PSHORT_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z) -#define BYTEV_cfINT(N,A,B,X,Y,Z) DOUBLEV_cfINT(N,A,B,X,Y,Z) -#define BYTEVV_cfINT(N,A,B,X,Y,Z) DOUBLEVV_cfINT(N,A,B,X,Y,Z) -#define BYTEVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVV_cfINT(N,A,B,X,Y,Z) -#define BYTEVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVV_cfINT(N,A,B,X,Y,Z) -#define BYTEVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVV_cfINT(N,A,B,X,Y,Z) -#define BYTEVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVV_cfINT(N,A,B,X,Y,Z) -#define BYTEVVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVVV_cfINT(N,A,B,X,Y,Z) -#define DOUBLEV_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,INTV,B,X,Y,Z,0) -#define DOUBLEVV_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,INTVV,B,X,Y,Z,0) -#define DOUBLEVVV_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,INTVVV,B,X,Y,Z,0) -#define DOUBLEVVVV_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,INTVVVV,B,X,Y,Z,0) -#define DOUBLEVVVVV_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,INTVVVVV,B,X,Y,Z,0) -#define DOUBLEVVVVVV_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,INTVVVVVV,B,X,Y,Z,0) -#define DOUBLEVVVVVVV_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,INTVVVVVVV,B,X,Y,Z,0) -#define FLOATV_cfINT(N,A,B,X,Y,Z) DOUBLEV_cfINT(N,A,B,X,Y,Z) -#define FLOATVV_cfINT(N,A,B,X,Y,Z) DOUBLEVV_cfINT(N,A,B,X,Y,Z) -#define FLOATVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVV_cfINT(N,A,B,X,Y,Z) -#define FLOATVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVV_cfINT(N,A,B,X,Y,Z) -#define FLOATVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVV_cfINT(N,A,B,X,Y,Z) -#define FLOATVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVV_cfINT(N,A,B,X,Y,Z) -#define FLOATVVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVVV_cfINT(N,A,B,X,Y,Z) -#define INTV_cfINT(N,A,B,X,Y,Z) DOUBLEV_cfINT(N,A,B,X,Y,Z) -#define INTVV_cfINT(N,A,B,X,Y,Z) DOUBLEVV_cfINT(N,A,B,X,Y,Z) -#define INTVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVV_cfINT(N,A,B,X,Y,Z) -#define INTVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVV_cfINT(N,A,B,X,Y,Z) -#define INTVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVV_cfINT(N,A,B,X,Y,Z) -#define INTVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVV_cfINT(N,A,B,X,Y,Z) -#define INTVVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVVV_cfINT(N,A,B,X,Y,Z) -#define LOGICALV_cfINT(N,A,B,X,Y,Z) DOUBLEV_cfINT(N,A,B,X,Y,Z) -#define LOGICALVV_cfINT(N,A,B,X,Y,Z) DOUBLEVV_cfINT(N,A,B,X,Y,Z) -#define LOGICALVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVV_cfINT(N,A,B,X,Y,Z) -#define LOGICALVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVV_cfINT(N,A,B,X,Y,Z) -#define LOGICALVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVV_cfINT(N,A,B,X,Y,Z) -#define LOGICALVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVV_cfINT(N,A,B,X,Y,Z) -#define LOGICALVVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVVV_cfINT(N,A,B,X,Y,Z) -#define LONGV_cfINT(N,A,B,X,Y,Z) DOUBLEV_cfINT(N,A,B,X,Y,Z) -#define LONGVV_cfINT(N,A,B,X,Y,Z) DOUBLEVV_cfINT(N,A,B,X,Y,Z) -#define LONGVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVV_cfINT(N,A,B,X,Y,Z) -#define LONGVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVV_cfINT(N,A,B,X,Y,Z) -#define LONGVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVV_cfINT(N,A,B,X,Y,Z) -#define LONGVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVV_cfINT(N,A,B,X,Y,Z) -#define LONGVVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVVV_cfINT(N,A,B,X,Y,Z) -#define SHORTV_cfINT(N,A,B,X,Y,Z) DOUBLEV_cfINT(N,A,B,X,Y,Z) -#define SHORTVV_cfINT(N,A,B,X,Y,Z) DOUBLEVV_cfINT(N,A,B,X,Y,Z) -#define SHORTVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVV_cfINT(N,A,B,X,Y,Z) -#define SHORTVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVV_cfINT(N,A,B,X,Y,Z) -#define SHORTVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVV_cfINT(N,A,B,X,Y,Z) -#define SHORTVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVV_cfINT(N,A,B,X,Y,Z) -#define SHORTVVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVVV_cfINT(N,A,B,X,Y,Z) -#define PVOID_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,B,B,X,Y,Z,0) -#define ROUTINE_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -/*CRAY coughs on the first, - i.e. the usual trouble of not being able to - define macros to macros with arguments. - New ultrix is worse, it coughs on all such uses. - */ -/*#define SIMPLE_cfINT PVOID_cfINT*/ -#define SIMPLE_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define VOID_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define STRING_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define STRINGV_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define PSTRING_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define PSTRINGV_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define PNSTRING_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define PPSTRING_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define ZTRINGV_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define PZTRINGV_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define CF_0_cfINT(N,A,B,X,Y,Z) - - -#define UCF(TN,I,C) _SEP_(TN,C,COMMA) _Icf(2,U,TN,_(A,I),0) -#define UUCF(TN,I,C) _SEP_(TN,C,COMMA) _SEP_(TN,1,I) -#define UUUCF(TN,I,C) _SEP_(TN,C,COLON) _Icf(2,U,TN,_(A,I),0) -#define INT_cfU(T,A) _(T,VVVVVVV_cfTYPE) A -#define INTV_cfU(T,A) _(T,VVVVVV_cfTYPE) * A -#define INTVV_cfU(T,A) _(T,VVVVV_cfTYPE) * A -#define INTVVV_cfU(T,A) _(T,VVVV_cfTYPE) * A -#define INTVVVV_cfU(T,A) _(T,VVV_cfTYPE) * A -#define INTVVVVV_cfU(T,A) _(T,VV_cfTYPE) * A -#define INTVVVVVV_cfU(T,A) _(T,V_cfTYPE) * A -#define INTVVVVVVV_cfU(T,A) _(T,_cfTYPE) * A -#define PINT_cfU(T,A) _(T,_cfTYPE) * A -#define PVOID_cfU(T,A) void *A -#define ROUTINE_cfU(T,A) void (*A)() -#define VOID_cfU(T,A) void A /* Needed for C calls FORTRAN sub.s. */ -#define STRING_cfU(T,A) char *A /* via VOID and wrapper. */ -#define STRINGV_cfU(T,A) char *A -#define PSTRING_cfU(T,A) char *A -#define PSTRINGV_cfU(T,A) char *A -#define ZTRINGV_cfU(T,A) char *A -#define PZTRINGV_cfU(T,A) char *A - -/* VOID breaks U into U and UU. */ -#define INT_cfUU(T,A) _(T,VVVVVVV_cfTYPE) A -#define VOID_cfUU(T,A) /* Needed for FORTRAN calls C sub.s. */ -#define STRING_cfUU(T,A) char *A - - -#define BYTE_cfPU(A) CFextern INTEGER_BYTE FCALLSC_QUALIFIER A -#define DOUBLE_cfPU(A) CFextern DOUBLE_PRECISION FCALLSC_QUALIFIER A -#if ! (defined(FLOATFUNCTIONTYPE)&&defined(ASSIGNFLOAT)&&defined(RETURNFLOAT)) -#define FLOAT_cfPU(A) CFextern float FCALLSC_QUALIFIER A -#else -#define FLOAT_cfPU(A) CFextern FLOATFUNCTIONTYPE FCALLSC_QUALIFIER A -#endif -#define INT_cfPU(A) CFextern int FCALLSC_QUALIFIER A -#define LOGICAL_cfPU(A) CFextern int FCALLSC_QUALIFIER A -#define LONG_cfPU(A) CFextern long FCALLSC_QUALIFIER A -#define SHORT_cfPU(A) CFextern short FCALLSC_QUALIFIER A -#define STRING_cfPU(A) CFextern void FCALLSC_QUALIFIER A -#define VOID_cfPU(A) CFextern void FCALLSC_QUALIFIER A - -#define BYTE_cfE INTEGER_BYTE A0; -#define DOUBLE_cfE DOUBLE_PRECISION A0; -#if ! (defined(FLOATFUNCTIONTYPE)&&defined(ASSIGNFLOAT)&&defined(RETURNFLOAT)) -#define FLOAT_cfE float A0; -#else -#define FLOAT_cfE float AA0; FLOATFUNCTIONTYPE A0; -#endif -#define INT_cfE int A0; -#define LOGICAL_cfE int A0; -#define LONG_cfE long A0; -#define SHORT_cfE short A0; -#define VOID_cfE -#ifdef vmsFortran -#define STRING_cfE static char AA0[1+MAX_LEN_FORTRAN_FUNCTION_STRING]; \ - static fstring A0 = \ - {MAX_LEN_FORTRAN_FUNCTION_STRING,DSC$K_DTYPE_T,DSC$K_CLASS_S,AA0};\ - memset(AA0, CFORTRAN_NON_CHAR, MAX_LEN_FORTRAN_FUNCTION_STRING);\ - *(AA0+MAX_LEN_FORTRAN_FUNCTION_STRING)='\0'; -#else -#ifdef CRAYFortran -#define STRING_cfE static char AA0[1+MAX_LEN_FORTRAN_FUNCTION_STRING]; \ - static _fcd A0; *(AA0+MAX_LEN_FORTRAN_FUNCTION_STRING)='\0';\ - memset(AA0,CFORTRAN_NON_CHAR, MAX_LEN_FORTRAN_FUNCTION_STRING);\ - A0 = _cptofcd(AA0,MAX_LEN_FORTRAN_FUNCTION_STRING); -#else -/* 'cc: SC3.0.1 13 Jul 1994' barfs on char A0[0x4FE+1]; - * char A0[0x4FE +1]; char A0[1+0x4FE]; are both OK. */ -#define STRING_cfE static char A0[1+MAX_LEN_FORTRAN_FUNCTION_STRING]; \ - memset(A0, CFORTRAN_NON_CHAR, \ - MAX_LEN_FORTRAN_FUNCTION_STRING); \ - *(A0+MAX_LEN_FORTRAN_FUNCTION_STRING)='\0'; -#endif -#endif -/* ESTRING must use static char. array which is guaranteed to exist after - function returns. */ - -/* N.B.i) The diff. for 0 (Zero) and >=1 arguments. - ii)That the following create an unmatched bracket, i.e. '(', which - must of course be matched in the call. - iii)Commas must be handled very carefully */ -#define INT_cfGZ(T,UN,LN) A0=CFC_(UN,LN)( -#define VOID_cfGZ(T,UN,LN) CFC_(UN,LN)( -#ifdef vmsFortran -#define STRING_cfGZ(T,UN,LN) CFC_(UN,LN)(&A0 -#else -#if defined(CRAYFortran) || defined(AbsoftUNIXFortran) -#define STRING_cfGZ(T,UN,LN) CFC_(UN,LN)( A0 -#else -#define STRING_cfGZ(T,UN,LN) CFC_(UN,LN)( A0,MAX_LEN_FORTRAN_FUNCTION_STRING -#endif -#endif - -#define INT_cfG(T,UN,LN) INT_cfGZ(T,UN,LN) -#define VOID_cfG(T,UN,LN) VOID_cfGZ(T,UN,LN) -#define STRING_cfG(T,UN,LN) STRING_cfGZ(T,UN,LN), /*, is only diff. from _cfG*/ - -#define BYTEVVVVVVV_cfPP -#define INTVVVVVVV_cfPP /* These complement FLOATVVVVVVV_cfPP. */ -#define DOUBLEVVVVVVV_cfPP -#define LOGICALVVVVVVV_cfPP -#define LONGVVVVVVV_cfPP -#define SHORTVVVVVVV_cfPP -#define PBYTE_cfPP -#define PINT_cfPP -#define PDOUBLE_cfPP -#define PLOGICAL_cfPP -#define PLONG_cfPP -#define PSHORT_cfPP -#define PFLOAT_cfPP FLOATVVVVVVV_cfPP - -#define BCF(TN,AN,C) _SEP_(TN,C,COMMA) _Icf(2,B,TN,AN,0) -#define INT_cfB(T,A) (_(T,VVVVVVV_cfTYPE)) A -#define INTV_cfB(T,A) A -#define INTVV_cfB(T,A) (A)[0] -#define INTVVV_cfB(T,A) (A)[0][0] -#define INTVVVV_cfB(T,A) (A)[0][0][0] -#define INTVVVVV_cfB(T,A) (A)[0][0][0][0] -#define INTVVVVVV_cfB(T,A) (A)[0][0][0][0][0] -#define INTVVVVVVV_cfB(T,A) (A)[0][0][0][0][0][0] -#define PINT_cfB(T,A) _(T,_cfPP)&A -#define STRING_cfB(T,A) (char *) A -#define STRINGV_cfB(T,A) (char *) A -#define PSTRING_cfB(T,A) (char *) A -#define PSTRINGV_cfB(T,A) (char *) A -#define PVOID_cfB(T,A) (void *) A -#define ROUTINE_cfB(T,A) (void(*)())A -#define ZTRINGV_cfB(T,A) (char *) A -#define PZTRINGV_cfB(T,A) (char *) A - -#define SCF(TN,NAME,I,A) _(TN,_cfSTR)(3,S,NAME,I,A,0,0) -#define DEFAULT_cfS(M,I,A) -#define LOGICAL_cfS(M,I,A) -#define PLOGICAL_cfS(M,I,A) -#define STRING_cfS(M,I,A) ,sizeof(A) -#define STRINGV_cfS(M,I,A) ,( (unsigned)0xFFFF*firstindexlength(A) \ - +secondindexlength(A)) -#define PSTRING_cfS(M,I,A) ,sizeof(A) -#define PSTRINGV_cfS(M,I,A) STRINGV_cfS(M,I,A) -#define ZTRINGV_cfS(M,I,A) -#define PZTRINGV_cfS(M,I,A) - -#define HCF(TN,I) _(TN,_cfSTR)(3,H,COMMA, H,_(C,I),0,0) -#define HHCF(TN,I) _(TN,_cfSTR)(3,H,COMMA,HH,_(C,I),0,0) -#define HHHCF(TN,I) _(TN,_cfSTR)(3,H,COLON, H,_(C,I),0,0) -#define H_CF_SPECIAL unsigned -#define HH_CF_SPECIAL -#define DEFAULT_cfH(M,I,A) -#define LOGICAL_cfH(S,U,B) -#define PLOGICAL_cfH(S,U,B) -#define STRING_cfH(S,U,B) _(A,S) _(U,_CF_SPECIAL) B -#define STRINGV_cfH(S,U,B) STRING_cfH(S,U,B) -#define PSTRING_cfH(S,U,B) STRING_cfH(S,U,B) -#define PSTRINGV_cfH(S,U,B) STRING_cfH(S,U,B) -#define PNSTRING_cfH(S,U,B) STRING_cfH(S,U,B) -#define PPSTRING_cfH(S,U,B) STRING_cfH(S,U,B) -#define ZTRINGV_cfH(S,U,B) -#define PZTRINGV_cfH(S,U,B) - -/* Need VOID_cfSTR because Absoft forced function types go through _cfSTR. */ -/* No spaces inside expansion. They screws up macro catenation kludge. */ -#define VOID_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define BYTE_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define DOUBLE_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define FLOAT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define INT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LOGICAL_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,LOGICAL,A,B,C,D,E) -#define LONG_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define SHORT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define BYTEV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define BYTEVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define BYTEVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define BYTEVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define BYTEVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define BYTEVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define BYTEVVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define DOUBLEV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define DOUBLEVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define DOUBLEVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define DOUBLEVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define DOUBLEVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,LOGICAL,A,B,C,D,E) -#define DOUBLEVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define DOUBLEVVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define FLOATV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define FLOATVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define FLOATVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define FLOATVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define FLOATVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,LOGICAL,A,B,C,D,E) -#define FLOATVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define FLOATVVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define INTV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define INTVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define INTVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define INTVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define INTVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,LOGICAL,A,B,C,D,E) -#define INTVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define INTVVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LOGICALV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LOGICALVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LOGICALVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LOGICALVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LOGICALVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,LOGICAL,A,B,C,D,E) -#define LOGICALVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LOGICALVVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LONGV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LONGVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LONGVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LONGVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LONGVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,LOGICAL,A,B,C,D,E) -#define LONGVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LONGVVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define SHORTV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define SHORTVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define SHORTVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define SHORTVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define SHORTVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,LOGICAL,A,B,C,D,E) -#define SHORTVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define SHORTVVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define PBYTE_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define PDOUBLE_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define PFLOAT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define PINT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define PLOGICAL_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,PLOGICAL,A,B,C,D,E) -#define PLONG_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define PSHORT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define STRING_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,STRING,A,B,C,D,E) -#define PSTRING_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,PSTRING,A,B,C,D,E) -#define STRINGV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,STRINGV,A,B,C,D,E) -#define PSTRINGV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,PSTRINGV,A,B,C,D,E) -#define PNSTRING_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,PNSTRING,A,B,C,D,E) -#define PPSTRING_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,PPSTRING,A,B,C,D,E) -#define PVOID_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define ROUTINE_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define SIMPLE_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define ZTRINGV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,ZTRINGV,A,B,C,D,E) -#define PZTRINGV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,PZTRINGV,A,B,C,D,E) -#define CF_0_cfSTR(N,T,A,B,C,D,E) - -/* See ACF table comments, which explain why CCF was split into two. */ -#define CCF(NAME,TN,I) _(TN,_cfSTR)(5,C,NAME,I,_(A,I),_(B,I),_(C,I)) -#define DEFAULT_cfC(M,I,A,B,C) -#define LOGICAL_cfC(M,I,A,B,C) A=C2FLOGICAL( A); -#define PLOGICAL_cfC(M,I,A,B,C) *A=C2FLOGICAL(*A); -#ifdef vmsFortran -#define STRING_cfC(M,I,A,B,C) (B.clen=strlen(A),B.f.dsc$a_pointer=A, \ - C==sizeof(char*)||C==(unsigned)(B.clen+1)?B.f.dsc$w_length=B.clen: \ - (memset((A)+B.clen,' ',C-B.clen-1),A[B.f.dsc$w_length=C-1]='\0')); - /* PSTRING_cfC to beware of array A which does not contain any \0. */ -#define PSTRING_cfC(M,I,A,B,C) (B.dsc$a_pointer=A, C==sizeof(char*) ? \ - B.dsc$w_length=strlen(A): (A[C-1]='\0',B.dsc$w_length=strlen(A), \ - memset((A)+B.dsc$w_length,' ',C-B.dsc$w_length-1), B.dsc$w_length=C-1)); -#else -#define STRING_cfC(M,I,A,B,C) (B.clen=strlen(A), \ - C==sizeof(char*)||C==(unsigned)(B.clen+1)?B.flen=B.clen: \ - (memset((A)+B.clen,' ',C-B.clen-1),A[B.flen=C-1]='\0')); -#define PSTRING_cfC(M,I,A,B,C) (C==sizeof(char*)? B=strlen(A): \ - (A[C-1]='\0',B=strlen(A),memset((A)+B,' ',C-B-1),B=C-1)); -#endif - /* For CRAYFortran for (P)STRINGV_cfC, B.fs is set, but irrelevant. */ -#define STRINGV_cfC(M,I,A,B,C) \ - AATRINGV_cfA( A,B,(C/0xFFFF)*(C%0xFFFF),C/0xFFFF,C%0xFFFF) -#define PSTRINGV_cfC(M,I,A,B,C) \ - APATRINGV_cfA( A,B,(C/0xFFFF)*(C%0xFFFF),C/0xFFFF,C%0xFFFF) -#define ZTRINGV_cfC(M,I,A,B,C) \ - AATRINGV_cfA( A,B, (_3(M,_ELEMS_,I))*((_3(M,_ELEMLEN_,I))+1), \ - (_3(M,_ELEMS_,I)), (_3(M,_ELEMLEN_,I))+1 ) -#define PZTRINGV_cfC(M,I,A,B,C) \ - APATRINGV_cfA( A,B, (_3(M,_ELEMS_,I))*((_3(M,_ELEMLEN_,I))+1), \ - (_3(M,_ELEMS_,I)), (_3(M,_ELEMLEN_,I))+1 ) - -#define BYTE_cfCCC(A,B) &A -#define DOUBLE_cfCCC(A,B) &A -#if !defined(__CF__KnR) -#define FLOAT_cfCCC(A,B) &A - /* Although the VAX doesn't, at least the */ -#else /* HP and K&R mips promote float arg.'s of */ -#define FLOAT_cfCCC(A,B) &B /* unprototyped functions to double. Cannot */ -#endif /* use A here to pass the argument to FORTRAN. */ -#define INT_cfCCC(A,B) &A -#define LOGICAL_cfCCC(A,B) &A -#define LONG_cfCCC(A,B) &A -#define SHORT_cfCCC(A,B) &A -#define PBYTE_cfCCC(A,B) A -#define PDOUBLE_cfCCC(A,B) A -#define PFLOAT_cfCCC(A,B) A -#define PINT_cfCCC(A,B) A -#define PLOGICAL_cfCCC(A,B) B=A /* B used to keep a common W table. */ -#define PLONG_cfCCC(A,B) A -#define PSHORT_cfCCC(A,B) A - -#define CCCF(TN,I,M) _SEP_(TN,M,COMMA) _Icf(3,CC,TN,_(A,I),_(B,I)) -#define INT_cfCC(T,A,B) _(T,_cfCCC)(A,B) -#define INTV_cfCC(T,A,B) A -#define INTVV_cfCC(T,A,B) A -#define INTVVV_cfCC(T,A,B) A -#define INTVVVV_cfCC(T,A,B) A -#define INTVVVVV_cfCC(T,A,B) A -#define INTVVVVVV_cfCC(T,A,B) A -#define INTVVVVVVV_cfCC(T,A,B) A -#define PINT_cfCC(T,A,B) _(T,_cfCCC)(A,B) -#define PVOID_cfCC(T,A,B) A -#if defined(apolloFortran) || defined(hpuxFortran800) || defined(AbsoftUNIXFortran) -#define ROUTINE_cfCC(T,A,B) &A -#else -#define ROUTINE_cfCC(T,A,B) A -#endif -#define SIMPLE_cfCC(T,A,B) A -#ifdef vmsFortran -#define STRING_cfCC(T,A,B) &B.f -#define STRINGV_cfCC(T,A,B) &B -#define PSTRING_cfCC(T,A,B) &B -#define PSTRINGV_cfCC(T,A,B) &B -#else -#ifdef CRAYFortran -#define STRING_cfCC(T,A,B) _cptofcd(A,B.flen) -#define STRINGV_cfCC(T,A,B) _cptofcd(B.s,B.flen) -#define PSTRING_cfCC(T,A,B) _cptofcd(A,B) -#define PSTRINGV_cfCC(T,A,B) _cptofcd(A,B.flen) -#else -#define STRING_cfCC(T,A,B) A -#define STRINGV_cfCC(T,A,B) B.fs -#define PSTRING_cfCC(T,A,B) A -#define PSTRINGV_cfCC(T,A,B) B.fs -#endif -#endif -#define ZTRINGV_cfCC(T,A,B) STRINGV_cfCC(T,A,B) -#define PZTRINGV_cfCC(T,A,B) PSTRINGV_cfCC(T,A,B) - -#define BYTE_cfX return A0; -#define DOUBLE_cfX return A0; -#if ! (defined(FLOATFUNCTIONTYPE)&&defined(ASSIGNFLOAT)&&defined(RETURNFLOAT)) -#define FLOAT_cfX return A0; -#else -#define FLOAT_cfX ASSIGNFLOAT(AA0,A0); return AA0; -#endif -#define INT_cfX return A0; -#define LOGICAL_cfX return F2CLOGICAL(A0); -#define LONG_cfX return A0; -#define SHORT_cfX return A0; -#define VOID_cfX return ; -#if defined(vmsFortran) || defined(CRAYFortran) -#define STRING_cfX return kill_trailing( \ - kill_trailing(AA0,CFORTRAN_NON_CHAR),' '); -#else -#define STRING_cfX return kill_trailing( \ - kill_trailing( A0,CFORTRAN_NON_CHAR),' '); -#endif - -#define CFFUN(NAME) _(__cf__,NAME) - -/* Note that we don't use LN here, but we keep it for consistency. */ -#define CCALLSFFUN0(UN,LN) CFFUN(UN)() - -#ifdef OLD_VAXC /* Allow %CC-I-PARAMNOTUSED. */ -#pragma standard -#endif - -#define CCALLSFFUN1( UN,LN,T1, A1) \ - CCALLSFFUN5 (UN,LN,T1,CF_0,CF_0,CF_0,CF_0,A1,0,0,0,0) -#define CCALLSFFUN2( UN,LN,T1,T2, A1,A2) \ - CCALLSFFUN5 (UN,LN,T1,T2,CF_0,CF_0,CF_0,A1,A2,0,0,0) -#define CCALLSFFUN3( UN,LN,T1,T2,T3, A1,A2,A3) \ - CCALLSFFUN5 (UN,LN,T1,T2,T3,CF_0,CF_0,A1,A2,A3,0,0) -#define CCALLSFFUN4( UN,LN,T1,T2,T3,T4, A1,A2,A3,A4)\ - CCALLSFFUN5 (UN,LN,T1,T2,T3,T4,CF_0,A1,A2,A3,A4,0) -#define CCALLSFFUN5( UN,LN,T1,T2,T3,T4,T5, A1,A2,A3,A4,A5) \ - CCALLSFFUN10(UN,LN,T1,T2,T3,T4,T5,CF_0,CF_0,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,0,0,0,0,0) -#define CCALLSFFUN6( UN,LN,T1,T2,T3,T4,T5,T6, A1,A2,A3,A4,A5,A6) \ - CCALLSFFUN10(UN,LN,T1,T2,T3,T4,T5,T6,CF_0,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,0,0,0,0) -#define CCALLSFFUN7( UN,LN,T1,T2,T3,T4,T5,T6,T7, A1,A2,A3,A4,A5,A6,A7) \ - CCALLSFFUN10(UN,LN,T1,T2,T3,T4,T5,T6,T7,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,0,0,0) -#define CCALLSFFUN8( UN,LN,T1,T2,T3,T4,T5,T6,T7,T8, A1,A2,A3,A4,A5,A6,A7,A8) \ - CCALLSFFUN10(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,0,0) -#define CCALLSFFUN9( UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,A1,A2,A3,A4,A5,A6,A7,A8,A9)\ - CCALLSFFUN10(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,0) -#define CCALLSFFUN10(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA)\ - CCALLSFFUN14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,CF_0,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,0,0,0,0) -#define CCALLSFFUN11(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB)\ - CCALLSFFUN14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,0,0,0) -#define CCALLSFFUN12(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC)\ - CCALLSFFUN14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,0,0) -#define CCALLSFFUN13(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD)\ - CCALLSFFUN14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,0) - -#define CCALLSFFUN14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE)\ -((CFFUN(UN)( BCF(T1,A1,0) BCF(T2,A2,1) BCF(T3,A3,1) BCF(T4,A4,1) BCF(T5,A5,1) \ - BCF(T6,A6,1) BCF(T7,A7,1) BCF(T8,A8,1) BCF(T9,A9,1) BCF(TA,AA,1) \ - BCF(TB,AB,1) BCF(TC,AC,1) BCF(TD,AD,1) BCF(TE,AE,1) \ - SCF(T1,LN,1,A1) SCF(T2,LN,2,A2) SCF(T3,LN,3,A3) SCF(T4,LN,4,A4) \ - SCF(T5,LN,5,A5) SCF(T6,LN,6,A6) SCF(T7,LN,7,A7) SCF(T8,LN,8,A8) \ - SCF(T9,LN,9,A9) SCF(TA,LN,A,AA) SCF(TB,LN,B,AB) SCF(TC,LN,C,AC) \ - SCF(TD,LN,D,AD)))) - -/* N.B. Create a separate function instead of using (call function, function -value here) because in order to create the variables needed for the input -arg.'s which may be const.'s one has to do the creation within {}, but these -can never be placed within ()'s. Therefore one must create wrapper functions. -gcc, on the other hand may be able to avoid the wrapper functions. */ - -/* Prototypes are needed to correctly handle the value returned correctly. N.B. -Can only have prototype arg.'s with difficulty, a la G... table since FORTRAN -functions returning strings have extra arg.'s. Don't bother, since this only -causes a compiler warning to come up when one uses FCALLSCFUNn and CCALLSFFUNn -for the same function in the same source code. Something done by the experts in -debugging only.*/ - -#define PROTOCCALLSFFUN0(F,UN,LN) \ -_(F,_cfPU)( CFC_(UN,LN))(CF_NULL_PROTO); \ -static _Icf(2,U,F,CFFUN(UN),0)() {_(F,_cfE) _Icf(3,GZ,F,UN,LN) ABSOFT_cf1(F));_(F,_cfX)} - -#define PROTOCCALLSFFUN1( T0,UN,LN,T1) \ - PROTOCCALLSFFUN5 (T0,UN,LN,T1,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFFUN2( T0,UN,LN,T1,T2) \ - PROTOCCALLSFFUN5 (T0,UN,LN,T1,T2,CF_0,CF_0,CF_0) -#define PROTOCCALLSFFUN3( T0,UN,LN,T1,T2,T3) \ - PROTOCCALLSFFUN5 (T0,UN,LN,T1,T2,T3,CF_0,CF_0) -#define PROTOCCALLSFFUN4( T0,UN,LN,T1,T2,T3,T4) \ - PROTOCCALLSFFUN5 (T0,UN,LN,T1,T2,T3,T4,CF_0) -#define PROTOCCALLSFFUN5( T0,UN,LN,T1,T2,T3,T4,T5) \ - PROTOCCALLSFFUN10(T0,UN,LN,T1,T2,T3,T4,T5,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFFUN6( T0,UN,LN,T1,T2,T3,T4,T5,T6) \ - PROTOCCALLSFFUN10(T0,UN,LN,T1,T2,T3,T4,T5,T6,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFFUN7( T0,UN,LN,T1,T2,T3,T4,T5,T6,T7) \ - PROTOCCALLSFFUN10(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,CF_0,CF_0,CF_0) -#define PROTOCCALLSFFUN8( T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8) \ - PROTOCCALLSFFUN10(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,CF_0,CF_0) -#define PROTOCCALLSFFUN9( T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9) \ - PROTOCCALLSFFUN10(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,CF_0) -#define PROTOCCALLSFFUN10(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA) \ - PROTOCCALLSFFUN14(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFFUN11(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB) \ - PROTOCCALLSFFUN14(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,CF_0,CF_0,CF_0) -#define PROTOCCALLSFFUN12(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC) \ - PROTOCCALLSFFUN14(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,CF_0,CF_0) -#define PROTOCCALLSFFUN13(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD) \ - PROTOCCALLSFFUN14(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,CF_0) - -/* HP/UX 9.01 cc requires the blank between '_Icf(3,G,T0,UN,LN) CCCF(T1,1,0)' */ - -#ifndef __CF__KnR -#define PROTOCCALLSFFUN14(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - _(T0,_cfPU)(CFC_(UN,LN))(CF_NULL_PROTO); static _Icf(2,U,T0,CFFUN(UN),0)( \ - CFARGT14FS(UCF,HCF,,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) ) \ -{ CFARGT14S(VCF,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) _(T0,_cfE) \ - CCF(LN,T1,1) CCF(LN,T2,2) CCF(LN,T3,3) CCF(LN,T4,4) CCF(LN,T5,5) \ - CCF(LN,T6,6) CCF(LN,T7,7) CCF(LN,T8,8) CCF(LN,T9,9) CCF(LN,TA,A) \ - CCF(LN,TB,B) CCF(LN,TC,C) CCF(LN,TD,D) CCF(LN,TE,E) _Icf(3,G,T0,UN,LN) \ - CFARGT14(CCCF,JCF,ABSOFT_cf1(T0),T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE)); \ - WCF(T1,A1,1) WCF(T2,A2,2) WCF(T3,A3,3) WCF(T4,A4,4) WCF(T5,A5,5) \ - WCF(T6,A6,6) WCF(T7,A7,7) WCF(T8,A8,8) WCF(T9,A9,9) WCF(TA,AA,A) \ - WCF(TB,AB,B) WCF(TC,AC,C) WCF(TD,AD,D) WCF(TE,AE,E) _(T0,_cfX)} -#else -#define PROTOCCALLSFFUN14(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - _(T0,_cfPU)(CFC_(UN,LN))(CF_NULL_PROTO); static _Icf(2,U,T0,CFFUN(UN),0)( \ - CFARGT14FS(UUCF,HHCF,,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) ) \ - CFARGT14FS(UUUCF,HHHCF,,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) ; \ -{ CFARGT14S(VCF,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) _(T0,_cfE) \ - CCF(LN,T1,1) CCF(LN,T2,2) CCF(LN,T3,3) CCF(LN,T4,4) CCF(LN,T5,5) \ - CCF(LN,T6,6) CCF(LN,T7,7) CCF(LN,T8,8) CCF(LN,T9,9) CCF(LN,TA,A) \ - CCF(LN,TB,B) CCF(LN,TC,C) CCF(LN,TD,D) CCF(LN,TE,E) _Icf(3,G,T0,UN,LN) \ - CFARGT14(CCCF,JCF,ABSOFT_cf1(T0),T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE)); \ - WCF(T1,A1,1) WCF(T2,A2,2) WCF(T3,A3,3) WCF(T4,A4,4) WCF(T5,A5,5) \ - WCF(T6,A6,6) WCF(T7,A7,7) WCF(T8,A8,8) WCF(T9,A9,9) WCF(TA,AA,A) \ - WCF(TB,AB,B) WCF(TC,AC,C) WCF(TD,AD,D) WCF(TE,AE,E) _(T0,_cfX)} -#endif - -/*-------------------------------------------------------------------------*/ - -/* UTILITIES FOR FORTRAN TO CALL C ROUTINES */ - -#ifdef OLD_VAXC /* Prevent %CC-I-PARAMNOTUSED. */ -#pragma nostandard -#endif - -#if defined(vmsFortran) || defined(CRAYFortran) -#define DCF(TN,I) -#define DDCF(TN,I) -#define DDDCF(TN,I) -#else -#define DCF(TN,I) HCF(TN,I) -#define DDCF(TN,I) HHCF(TN,I) -#define DDDCF(TN,I) HHHCF(TN,I) -#endif - -#define QCF(TN,I) _(TN,_cfSTR)(1,Q,_(B,I), 0,0,0,0) -#define DEFAULT_cfQ(B) -#define LOGICAL_cfQ(B) -#define PLOGICAL_cfQ(B) -#define STRINGV_cfQ(B) char *B; unsigned int _(B,N); -#define STRING_cfQ(B) char *B=NULL; -#define PSTRING_cfQ(B) char *B=NULL; -#define PSTRINGV_cfQ(B) STRINGV_cfQ(B) -#define PNSTRING_cfQ(B) char *B=NULL; -#define PPSTRING_cfQ(B) - -#if defined(apolloFortran) || defined(hpuxFortran800) || defined(AbsoftUNIXFortran) -#define ROUTINE_orig (void *)* /* Else, function value has to match. */ -#else /* !apolloFortran */ -#ifdef __sgi /* Else SGI gives warning 182 contrary to its C LRM A.17.7 */ -#define ROUTINE_orig *(void**)& -#else /* !__sgi */ -#define ROUTINE_orig (void *) -#endif /* __sgi */ -#endif /* apolloFortran */ - -#define ROUTINE_1 ROUTINE_orig -#define ROUTINE_2 ROUTINE_orig -#define ROUTINE_3 ROUTINE_orig -#define ROUTINE_4 ROUTINE_orig -#define ROUTINE_5 ROUTINE_orig -#define ROUTINE_6 ROUTINE_orig -#define ROUTINE_7 ROUTINE_orig -#define ROUTINE_8 ROUTINE_orig -#define ROUTINE_9 ROUTINE_orig -#define ROUTINE_10 ROUTINE_orig - -#define TCF(NAME,TN,I,M) _SEP_(TN,M,COMMA) _(TN,_cfT)(NAME,I,_(A,I),_(B,I),_(C,I)) -#define BYTE_cfT(M,I,A,B,D) *A -#define DOUBLE_cfT(M,I,A,B,D) *A -#define FLOAT_cfT(M,I,A,B,D) *A -#define INT_cfT(M,I,A,B,D) *A -#define LOGICAL_cfT(M,I,A,B,D) F2CLOGICAL(*A) -#define LONG_cfT(M,I,A,B,D) *A -#define SHORT_cfT(M,I,A,B,D) *A -#define BYTEV_cfT(M,I,A,B,D) A -#define DOUBLEV_cfT(M,I,A,B,D) A -#define FLOATV_cfT(M,I,A,B,D) VOIDP A -#define INTV_cfT(M,I,A,B,D) A -#define LOGICALV_cfT(M,I,A,B,D) A -#define LONGV_cfT(M,I,A,B,D) A -#define SHORTV_cfT(M,I,A,B,D) A -#define BYTEVV_cfT(M,I,A,B,D) (void *)A /* We have to cast to void *,*/ -#define BYTEVVV_cfT(M,I,A,B,D) (void *)A /* since we don't know the */ -#define BYTEVVVV_cfT(M,I,A,B,D) (void *)A /* dimensions of the array. */ -#define BYTEVVVVV_cfT(M,I,A,B,D) (void *)A /* i.e. Unfortunately, can't */ -#define BYTEVVVVVV_cfT(M,I,A,B,D) (void *)A /* check that the type */ -#define BYTEVVVVVVV_cfT(M,I,A,B,D) (void *)A /* matches the prototype. */ -#define DOUBLEVV_cfT(M,I,A,B,D) (void *)A -#define DOUBLEVVV_cfT(M,I,A,B,D) (void *)A -#define DOUBLEVVVV_cfT(M,I,A,B,D) (void *)A -#define DOUBLEVVVVV_cfT(M,I,A,B,D) (void *)A -#define DOUBLEVVVVVV_cfT(M,I,A,B,D) (void *)A -#define DOUBLEVVVVVVV_cfT(M,I,A,B,D) (void *)A -#define FLOATVV_cfT(M,I,A,B,D) (void *)A -#define FLOATVVV_cfT(M,I,A,B,D) (void *)A -#define FLOATVVVV_cfT(M,I,A,B,D) (void *)A -#define FLOATVVVVV_cfT(M,I,A,B,D) (void *)A -#define FLOATVVVVVV_cfT(M,I,A,B,D) (void *)A -#define FLOATVVVVVVV_cfT(M,I,A,B,D) (void *)A -#define INTVV_cfT(M,I,A,B,D) (void *)A -#define INTVVV_cfT(M,I,A,B,D) (void *)A -#define INTVVVV_cfT(M,I,A,B,D) (void *)A -#define INTVVVVV_cfT(M,I,A,B,D) (void *)A -#define INTVVVVVV_cfT(M,I,A,B,D) (void *)A -#define INTVVVVVVV_cfT(M,I,A,B,D) (void *)A -#define LOGICALVV_cfT(M,I,A,B,D) (void *)A -#define LOGICALVVV_cfT(M,I,A,B,D) (void *)A -#define LOGICALVVVV_cfT(M,I,A,B,D) (void *)A -#define LOGICALVVVVV_cfT(M,I,A,B,D) (void *)A -#define LOGICALVVVVVV_cfT(M,I,A,B,D) (void *)A -#define LOGICALVVVVVVV_cfT(M,I,A,B,D) (void *)A -#define LONGVV_cfT(M,I,A,B,D) (void *)A -#define LONGVVV_cfT(M,I,A,B,D) (void *)A -#define LONGVVVV_cfT(M,I,A,B,D) (void *)A -#define LONGVVVVV_cfT(M,I,A,B,D) (void *)A -#define LONGVVVVVV_cfT(M,I,A,B,D) (void *)A -#define LONGVVVVVVV_cfT(M,I,A,B,D) (void *)A -#define SHORTVV_cfT(M,I,A,B,D) (void *)A -#define SHORTVVV_cfT(M,I,A,B,D) (void *)A -#define SHORTVVVV_cfT(M,I,A,B,D) (void *)A -#define SHORTVVVVV_cfT(M,I,A,B,D) (void *)A -#define SHORTVVVVVV_cfT(M,I,A,B,D) (void *)A -#define SHORTVVVVVVV_cfT(M,I,A,B,D) (void *)A -#define PBYTE_cfT(M,I,A,B,D) A -#define PDOUBLE_cfT(M,I,A,B,D) A -#define PFLOAT_cfT(M,I,A,B,D) VOIDP A -#define PINT_cfT(M,I,A,B,D) A -#define PLOGICAL_cfT(M,I,A,B,D) ((*A=F2CLOGICAL(*A)),A) -#define PLONG_cfT(M,I,A,B,D) A -#define PSHORT_cfT(M,I,A,B,D) A -#define PVOID_cfT(M,I,A,B,D) A -#define ROUTINE_cfT(M,I,A,B,D) _(ROUTINE_,I) A -/* A == pointer to the characters - D == length of the string, or of an element in an array of strings - E == number of elements in an array of strings */ -#define TTSTR( A,B,D) \ - ((B=(char*)malloc(D+1))[D]='\0', memcpy(B,A,D), kill_trailing(B,' ')) -#define TTTTSTR( A,B,D) (!(D<4||A[0]||A[1]||A[2]||A[3]))?NULL: \ - memchr(A,'\0',D) ?A : TTSTR(A,B,D) -#define TTTTSTRV( A,B,D,E) (_(B,N)=E,B=(char*)malloc(_(B,N)*(D+1)), (void *) \ - vkill_trailing(f2cstrv(A,B,D+1, _(B,N)*(D+1)), D+1,_(B,N)*(D+1),' ')) -#ifdef vmsFortran -#define STRING_cfT(M,I,A,B,D) TTTTSTR( A->dsc$a_pointer,B,A->dsc$w_length) -#define STRINGV_cfT(M,I,A,B,D) TTTTSTRV(A->dsc$a_pointer, B, \ - A->dsc$w_length , A->dsc$l_m[0]) -#define PSTRING_cfT(M,I,A,B,D) TTSTR( A->dsc$a_pointer,B,A->dsc$w_length) -#define PPSTRING_cfT(M,I,A,B,D) A->dsc$a_pointer -#else -#ifdef CRAYFortran -#define STRING_cfT(M,I,A,B,D) TTTTSTR( _fcdtocp(A),B,_fcdlen(A)) -#define STRINGV_cfT(M,I,A,B,D) TTTTSTRV(_fcdtocp(A),B,_fcdlen(A), \ - num_elem(_fcdtocp(A),_fcdlen(A),_3(M,_STRV_A,I))) -#define PSTRING_cfT(M,I,A,B,D) TTSTR( _fcdtocp(A),B,_fcdlen(A)) -#define PPSTRING_cfT(M,I,A,B,D) _fcdtocp(A) -#else -#define STRING_cfT(M,I,A,B,D) TTTTSTR( A,B,D) -#define STRINGV_cfT(M,I,A,B,D) TTTTSTRV(A,B,D, num_elem(A,D,_3(M,_STRV_A,I))) -#define PSTRING_cfT(M,I,A,B,D) TTSTR( A,B,D) -#define PPSTRING_cfT(M,I,A,B,D) A -#endif -#endif -#define PNSTRING_cfT(M,I,A,B,D) STRING_cfT(M,I,A,B,D) -#define PSTRINGV_cfT(M,I,A,B,D) STRINGV_cfT(M,I,A,B,D) -#define CF_0_cfT(M,I,A,B,D) - -#define RCF(TN,I) _(TN,_cfSTR)(3,R,_(A,I),_(B,I),_(C,I),0,0) -#define DEFAULT_cfR(A,B,D) -#define LOGICAL_cfR(A,B,D) -#define PLOGICAL_cfR(A,B,D) *A=C2FLOGICAL(*A); -#define STRING_cfR(A,B,D) if (B) free(B); -#define STRINGV_cfR(A,B,D) free(B); -/* A and D as defined above for TSTRING(V) */ -#define RRRRPSTR( A,B,D) if (B) memcpy(A,B, _cfMIN(strlen(B),D)), \ - (D>strlen(B)?memset(A+strlen(B),' ', D-strlen(B)):0), free(B); -#define RRRRPSTRV(A,B,D) c2fstrv(B,A,D+1,(D+1)*_(B,N)), free(B); -#ifdef vmsFortran -#define PSTRING_cfR(A,B,D) RRRRPSTR( A->dsc$a_pointer,B,A->dsc$w_length) -#define PSTRINGV_cfR(A,B,D) RRRRPSTRV(A->dsc$a_pointer,B,A->dsc$w_length) -#else -#ifdef CRAYFortran -#define PSTRING_cfR(A,B,D) RRRRPSTR( _fcdtocp(A),B,_fcdlen(A)) -#define PSTRINGV_cfR(A,B,D) RRRRPSTRV(_fcdtocp(A),B,_fcdlen(A)) -#else -#define PSTRING_cfR(A,B,D) RRRRPSTR( A,B,D) -#define PSTRINGV_cfR(A,B,D) RRRRPSTRV(A,B,D) -#endif -#endif -#define PNSTRING_cfR(A,B,D) PSTRING_cfR(A,B,D) -#define PPSTRING_cfR(A,B,D) - -#define BYTE_cfFZ(UN,LN) INTEGER_BYTE FCALLSC_QUALIFIER fcallsc(UN,LN)( -#define DOUBLE_cfFZ(UN,LN) DOUBLE_PRECISION FCALLSC_QUALIFIER fcallsc(UN,LN)( -#define INT_cfFZ(UN,LN) int FCALLSC_QUALIFIER fcallsc(UN,LN)( -#define LOGICAL_cfFZ(UN,LN) int FCALLSC_QUALIFIER fcallsc(UN,LN)( -#define LONG_cfFZ(UN,LN) long FCALLSC_QUALIFIER fcallsc(UN,LN)( -#define SHORT_cfFZ(UN,LN) short FCALLSC_QUALIFIER fcallsc(UN,LN)( -#define VOID_cfFZ(UN,LN) void FCALLSC_QUALIFIER fcallsc(UN,LN)( -#ifndef __CF__KnR -/* The void is req'd by the Apollo, to make this an ANSI function declaration. - The Apollo promotes K&R float functions to double. */ -#define FLOAT_cfFZ(UN,LN) float FCALLSC_QUALIFIER fcallsc(UN,LN)(void -#ifdef vmsFortran -#define STRING_cfFZ(UN,LN) void FCALLSC_QUALIFIER fcallsc(UN,LN)(fstring *AS -#else -#ifdef CRAYFortran -#define STRING_cfFZ(UN,LN) void FCALLSC_QUALIFIER fcallsc(UN,LN)(_fcd AS -#else -#if defined(AbsoftUNIXFortran) -#define STRING_cfFZ(UN,LN) void FCALLSC_QUALIFIER fcallsc(UN,LN)(char *AS -#else -#define STRING_cfFZ(UN,LN) void FCALLSC_QUALIFIER fcallsc(UN,LN)(char *AS, unsigned D0 -#endif -#endif -#endif -#else -#if ! (defined(FLOATFUNCTIONTYPE)&&defined(ASSIGNFLOAT)&&defined(RETURNFLOAT)) -#define FLOAT_cfFZ(UN,LN) float FCALLSC_QUALIFIER fcallsc(UN,LN)( -#else -#define FLOAT_cfFZ(UN,LN) FLOATFUNCTIONTYPE FCALLSC_QUALIFIER fcallsc(UN,LN)( -#endif -#if defined(vmsFortran) || defined(CRAYFortran) || defined(AbsoftUNIXFortran) -#define STRING_cfFZ(UN,LN) void FCALLSC_QUALIFIER fcallsc(UN,LN)(AS -#else -#define STRING_cfFZ(UN,LN) void FCALLSC_QUALIFIER fcallsc(UN,LN)(AS, D0 -#endif -#endif - -#define BYTE_cfF(UN,LN) BYTE_cfFZ(UN,LN) -#define DOUBLE_cfF(UN,LN) DOUBLE_cfFZ(UN,LN) -#ifndef __CF_KnR -#define FLOAT_cfF(UN,LN) float FCALLSC_QUALIFIER fcallsc(UN,LN)( -#else -#define FLOAT_cfF(UN,LN) FLOAT_cfFZ(UN,LN) -#endif -#define INT_cfF(UN,LN) INT_cfFZ(UN,LN) -#define LOGICAL_cfF(UN,LN) LOGICAL_cfFZ(UN,LN) -#define LONG_cfF(UN,LN) LONG_cfFZ(UN,LN) -#define SHORT_cfF(UN,LN) SHORT_cfFZ(UN,LN) -#define VOID_cfF(UN,LN) VOID_cfFZ(UN,LN) -#define STRING_cfF(UN,LN) STRING_cfFZ(UN,LN), - -#define INT_cfFF -#define VOID_cfFF -#ifdef vmsFortran -#define STRING_cfFF fstring *AS; -#else -#ifdef CRAYFortran -#define STRING_cfFF _fcd AS; -#else -#define STRING_cfFF char *AS; unsigned D0; -#endif -#endif - -#define INT_cfL A0= -#define STRING_cfL A0= -#define VOID_cfL - -#define INT_cfK -#define VOID_cfK -/* KSTRING copies the string into the position provided by the caller. */ -#ifdef vmsFortran -#define STRING_cfK \ - memcpy(AS->dsc$a_pointer,A0,_cfMIN(AS->dsc$w_length,(A0==NULL?0:strlen(A0))));\ - AS->dsc$w_length>(A0==NULL?0:strlen(A0))? \ - memset(AS->dsc$a_pointer+(A0==NULL?0:strlen(A0)),' ', \ - AS->dsc$w_length-(A0==NULL?0:strlen(A0))):0; -#else -#ifdef CRAYFortran -#define STRING_cfK \ - memcpy(_fcdtocp(AS),A0, _cfMIN(_fcdlen(AS),(A0==NULL?0:strlen(A0))) ); \ - _fcdlen(AS)>(A0==NULL?0:strlen(A0))? \ - memset(_fcdtocp(AS)+(A0==NULL?0:strlen(A0)),' ', \ - _fcdlen(AS)-(A0==NULL?0:strlen(A0))):0; -#else -#define STRING_cfK memcpy(AS,A0, _cfMIN(D0,(A0==NULL?0:strlen(A0))) ); \ - D0>(A0==NULL?0:strlen(A0))?memset(AS+(A0==NULL?0:strlen(A0)), \ - ' ', D0-(A0==NULL?0:strlen(A0))):0; -#endif -#endif - -/* Note that K.. and I.. can't be combined since K.. has to access data before -R.., in order for functions returning strings which are also passed in as -arguments to work correctly. Note that R.. frees and hence may corrupt the -string. */ -#define BYTE_cfI return A0; -#define DOUBLE_cfI return A0; -#if ! (defined(FLOATFUNCTIONTYPE)&&defined(ASSIGNFLOAT)&&defined(RETURNFLOAT)) -#define FLOAT_cfI return A0; -#else -#define FLOAT_cfI RETURNFLOAT(A0); -#endif -#define INT_cfI return A0; -#ifdef hpuxFortran800 -/* Incredibly, functions must return true as 1, elsewhere .true.==0x01000000. */ -#define LOGICAL_cfI return ((A0)?1:0); -#else -#define LOGICAL_cfI return C2FLOGICAL(A0); -#endif -#define LONG_cfI return A0; -#define SHORT_cfI return A0; -#define STRING_cfI return ; -#define VOID_cfI return ; - -#ifdef OLD_VAXC /* Allow %CC-I-PARAMNOTUSED. */ -#pragma standard -#endif - -#define FCALLSCSUB0( CN,UN,LN) FCALLSCFUN0(VOID,CN,UN,LN) -#define FCALLSCSUB1( CN,UN,LN,T1) FCALLSCFUN1(VOID,CN,UN,LN,T1) -#define FCALLSCSUB2( CN,UN,LN,T1,T2) FCALLSCFUN2(VOID,CN,UN,LN,T1,T2) -#define FCALLSCSUB3( CN,UN,LN,T1,T2,T3) FCALLSCFUN3(VOID,CN,UN,LN,T1,T2,T3) -#define FCALLSCSUB4( CN,UN,LN,T1,T2,T3,T4) \ - FCALLSCFUN4(VOID,CN,UN,LN,T1,T2,T3,T4) -#define FCALLSCSUB5( CN,UN,LN,T1,T2,T3,T4,T5) \ - FCALLSCFUN5(VOID,CN,UN,LN,T1,T2,T3,T4,T5) -#define FCALLSCSUB6( CN,UN,LN,T1,T2,T3,T4,T5,T6) \ - FCALLSCFUN6(VOID,CN,UN,LN,T1,T2,T3,T4,T5,T6) -#define FCALLSCSUB7( CN,UN,LN,T1,T2,T3,T4,T5,T6,T7) \ - FCALLSCFUN7(VOID,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7) -#define FCALLSCSUB8( CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8) \ - FCALLSCFUN8(VOID,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8) -#define FCALLSCSUB9( CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9) \ - FCALLSCFUN9(VOID,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9) -#define FCALLSCSUB10(CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA) \ - FCALLSCFUN10(VOID,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA) -#define FCALLSCSUB11(CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB) \ - FCALLSCFUN11(VOID,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB) -#define FCALLSCSUB12(CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC) \ - FCALLSCFUN12(VOID,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC) -#define FCALLSCSUB13(CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD) \ - FCALLSCFUN13(VOID,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD) -#define FCALLSCSUB14(CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - FCALLSCFUN14(VOID,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) - -#define FCALLSCFUN1( T0,CN,UN,LN,T1) \ - FCALLSCFUN5 (T0,CN,UN,LN,T1,CF_0,CF_0,CF_0,CF_0) -#define FCALLSCFUN2( T0,CN,UN,LN,T1,T2) \ - FCALLSCFUN5 (T0,CN,UN,LN,T1,T2,CF_0,CF_0,CF_0) -#define FCALLSCFUN3( T0,CN,UN,LN,T1,T2,T3) \ - FCALLSCFUN5 (T0,CN,UN,LN,T1,T2,T3,CF_0,CF_0) -#define FCALLSCFUN4( T0,CN,UN,LN,T1,T2,T3,T4) \ - FCALLSCFUN5 (T0,CN,UN,LN,T1,T2,T3,T4,CF_0) -#define FCALLSCFUN5( T0,CN,UN,LN,T1,T2,T3,T4,T5) \ - FCALLSCFUN10(T0,CN,UN,LN,T1,T2,T3,T4,T5,CF_0,CF_0,CF_0,CF_0,CF_0) -#define FCALLSCFUN6( T0,CN,UN,LN,T1,T2,T3,T4,T5,T6) \ - FCALLSCFUN10(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,CF_0,CF_0,CF_0,CF_0) -#define FCALLSCFUN7( T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7) \ - FCALLSCFUN10(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,CF_0,CF_0,CF_0) -#define FCALLSCFUN8( T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8) \ - FCALLSCFUN10(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,CF_0,CF_0) -#define FCALLSCFUN9( T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9) \ - FCALLSCFUN10(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,CF_0) -#define FCALLSCFUN10(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA) \ - FCALLSCFUN14(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,CF_0,CF_0,CF_0,CF_0) -#define FCALLSCFUN11(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB) \ - FCALLSCFUN14(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,CF_0,CF_0,CF_0) -#define FCALLSCFUN12(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC) \ - FCALLSCFUN14(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,CF_0,CF_0) -#define FCALLSCFUN13(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD) \ - FCALLSCFUN14(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,CF_0) - -#ifndef __CF__KnR -#define FCALLSCFUN0(T0,CN,UN,LN) CFextern _(T0,_cfFZ)(UN,LN) ABSOFT_cf2(T0)) \ - {_Icf(2,UU,T0,A0,0); _Icf(0,L,T0,0,0) CN(); _Icf(0,K,T0,0,0) _(T0,_cfI)} - -#define FCALLSCFUN14(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - CFextern _(T0,_cfF)(UN,LN) \ - CFARGT14(NCF,DCF,ABSOFT_cf2(T0),T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE)) \ - { CFARGT14S(QCF,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - _Icf(2,UU,T0,A0,0); _Icf(0,L,T0,0,0) CN( TCF(LN,T1,1,0) TCF(LN,T2,2,1) \ - TCF(LN,T3,3,1) TCF(LN,T4,4,1) TCF(LN,T5,5,1) TCF(LN,T6,6,1) TCF(LN,T7,7,1) \ - TCF(LN,T8,8,1) TCF(LN,T9,9,1) TCF(LN,TA,A,1) TCF(LN,TB,B,1) TCF(LN,TC,C,1) \ - TCF(LN,TD,D,1) TCF(LN,TE,E,1) ); _Icf(0,K,T0,0,0) \ - CFARGT14S(RCF,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) _(T0,_cfI) } -#else -#define FCALLSCFUN0(T0,CN,UN,LN) CFextern _(T0,_cfFZ)(UN,LN) ABSOFT_cf3(T0)) _Icf(0,FF,T0,0,0)\ - {_Icf(2,UU,T0,A0,0); _Icf(0,L,T0,0,0) CN(); _Icf(0,K,T0,0,0) _(T0,_cfI)} - -#define FCALLSCFUN14(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - CFextern _(T0,_cfF)(UN,LN) \ - CFARGT14(NNCF,DDCF,ABSOFT_cf3(T0),T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE)) _Icf(0,FF,T0,0,0) \ - CFARGT14FS(NNNCF,DDDCF,,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE); \ - { CFARGT14S(QCF,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - _Icf(2,UU,T0,A0,0); _Icf(0,L,T0,0,0) CN( TCF(LN,T1,1,0) TCF(LN,T2,2,1) \ - TCF(LN,T3,3,1) TCF(LN,T4,4,1) TCF(LN,T5,5,1) TCF(LN,T6,6,1) TCF(LN,T7,7,1) \ - TCF(LN,T8,8,1) TCF(LN,T9,9,1) TCF(LN,TA,A,1) TCF(LN,TB,B,1) TCF(LN,TC,C,1) \ - TCF(LN,TD,D,1) TCF(LN,TE,E,1) ); _Icf(0,K,T0,0,0) \ - CFARGT14S(RCF,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) _(T0,_cfI)} -#endif - - -#endif /* __CFORTRAN_LOADED */ diff --git a/benchmarks/chunky.cpp b/benchmarks/chunky.cpp deleted file mode 100644 index bb704400..00000000 --- a/benchmarks/chunky.cpp +++ /dev/null @@ -1,156 +0,0 @@ -#include - -using namespace blitz; - -void report(const char* name, Timer& timer, int N, - long int iterations); -void unfused(int N); -void fused(int N); -void chunky(int N); - -float* _bz_restrict a; -float* _bz_restrict b; -float* _bz_restrict c; -float* _bz_restrict d; -float* _bz_restrict e; -float* _bz_restrict f; - -/* - * Code to support the "fuse" macro - */ - -int _chunk; -bool _done_chunks; -int _chunk_size = 512; - -#define fuse _chunk = 0; _done_chunks = false; \ - for (; !_done_chunks; ++_chunk) - -int main(int argc, char** argv) -{ - if (argc == 2) - _chunk_size = atoi(argv[1]); - - cout << "Using chunk size " << _chunk_size << endl; - - const int N = 100000; - - a = new float[N]; - b = new float[N]; - c = new float[N]; - d = new float[N]; - e = new float[N]; - f = new float[N]; - - for (int i=0; i < N; ++i) - { - a[i] = i; - b[i] = i; - c[i] = i; - d[i] = i; - } - - Timer timer; - long int iterations = 100; - - timer.start(); - for (long i=0; i < iterations; ++i) - unfused(N); - timer.stop(); - - report("Unfused", timer, N, iterations); - - timer.start(); - for (long i=0; i < iterations; ++i) - fused(N); - timer.stop(); - - report("Fused", timer, N, iterations); - - timer.start(); - for (long i=0; i < iterations; ++i) - chunky(N); - timer.stop(); - - report("Chunky", timer, N, iterations); - - return 0; -} - -void report(const char* name, Timer& timer, int N, - long int iterations) -{ - float flops = float(N) * iterations * 2; - float Mflops = flops / timer.elapsedSeconds() / 1e+6; - cout << setw(20) << name << " " << Mflops << " Mflops/s" << endl; -} - -void __sink() { } - -void unfused(int N) -{ - for (int i=0; i < N; ++i) - e[i] = a[i] * b[i] + c[i] * d[i]; - - __sink(); - - for (int i=0; i < N; ++i) - f[i] = c[i] * b[i] + a[i] * d[i]; -} - -void fused(int N) -{ - for (int i=0; i < N; ++i) - { - e[i] = a[i] * b[i] + c[i] * d[i]; - f[i] = c[i] * b[i] + a[i] * d[i]; - } -} - - -// This "chunky" routine is a simulated implementation of -// expression templates with tiling across multiple statements -// (the "chunky fusion" approach). This code would be -// generated by: -// -// fuse { -// E = A*B + C*D; -// F = C*B + A*D; -// } - -void chunky(int N) -{ - fuse { - - { // Code generated by E = A*B + C*D; - int lbound = _chunk * _chunk_size; - int uboundp1 = lbound + _chunk_size; - - if (uboundp1 > N) - { - _done_chunks = true; - uboundp1 = N; - } - - for (int i=lbound; i < uboundp1; ++i) - e[i] = a[i] * b[i] + c[i] * d[i]; - } - - __sink(); - - { // Code generated by F = C*B + A*D; - int lbound = _chunk * _chunk_size; - int uboundp1 = lbound + _chunk_size; - - if (uboundp1 > N) - { - _done_chunks = true; - uboundp1 = N; - } - - for (int i=lbound; i < uboundp1; ++i) - f[i] = c[i] * b[i] + a[i] * d[i]; - } - } -} - diff --git a/benchmarks/compiletime.cpp b/benchmarks/compiletime.cpp deleted file mode 100644 index e69de29b..00000000 diff --git a/benchmarks/ctime-results b/benchmarks/ctime-results deleted file mode 100644 index f79285c2..00000000 --- a/benchmarks/ctime-results +++ /dev/null @@ -1,94 +0,0 @@ -On olympus.extreme.indiana.edu (sparc-sun-solaris2.6): -egcs 1.1b - -Initial version, with -O2 -ftemplate-depth-30 -O2 -funroll-loops - -fstrict-aliasing - -ctime1 17.7 0.9 -ctime2 25.7 1.2 -ctime3 52.0 2.1 -ctime4 sleep - -With -fno-gcse: -ctime1 17.3 1.0 -ctime2 26.3 1.3 -ctime3 1:02.0 2.1 -ctime4 sleep - -With -O: -ctime1 17.3 0.8 -ctime2 24.4 1.2 -ctime3 51.5 2.1 -ctime4 sleep - -With -O -fno-inline: -ctime1 16.9 0.8 -ctime2 20.0 1.0 -ctime3 24.7 1.2 -ctime4 31.2 1.6 - -Woohoo. Okay, obviously inlining is the key. - -Now try new expression templates: - -With -O -funroll-loops -DBZ_NEW_EXPRESSION_TEMPLATES -ctime1 14.1 0.9 -ctime2 22.3 1.2 -ctime3 58.8 2.2 - -With -O -funroll-loops -DBZ_NEW_EXPRESSION_TEMPLATES -DBZ_NO_INLINE_ET -ctime1 14.1 0.9 -ctime2 21.1 1.0 -ctime3 45.4 1.9 - -With -O -funroll-loops -DBZ_NEW_EXPRESSION_TEMPLATES -DBZ_NO_INLINE_ET -DBZ_ETPARMS_CONSTREF -ctime1 14.6 0.8 -ctime2 20.7 1.1 -ctime3 41.6 2.1 -ctime4 1:27.7 3.0 - -Things to try: --fno-inline - -Just -O (this will turn off -funroll-all-loops) --fno-expensive-optimizations --fno-unroll-all-loops --fno-strength-reduce --fno-rerun-cse-after-loop - - - - - - -On hgar1.cwru.edu (alpha), with KCC: - -With +K3 -O3 -DBZ_NEW_EXPRESSION_TEMPLATES -DBZ_NO_INLINE_ET -DBZ_ETPARMS_CONSTREF: -ctime1 13.1 0.8 -ctime2 20.9 1.0 -ctime3 27.3 1.0 -ctime4 36.2 1.1 -ctime5 48.7 1.2 - -With just +K3 -O3: -ctime1 15.8 0.9 -ctime2 25.3 1.0 -ctime3 46.2 1.2 -ctime4 79.9 1.5 - -So a speed up of about X 2 with KCC, not counting the overhead. - - -Here are the results for : -ctime1 0.9 0.2 -ctime2 2.1 0.2 -ctime3 9.4 0.3 -ctime4 33.2 0.4 -ctime5 1:13 0.6 - - -For C code: -ctime5 0.35 0.08 - -Pretty terrible. - diff --git a/benchmarks/ctime1.cpp b/benchmarks/ctime1.cpp deleted file mode 100644 index 368ffe1f..00000000 --- a/benchmarks/ctime1.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include - -using namespace blitz; - -int main() -{ -} - diff --git a/benchmarks/ctime1v.cpp b/benchmarks/ctime1v.cpp deleted file mode 100644 index 79739a27..00000000 --- a/benchmarks/ctime1v.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include - -using namespace std; - -int main() -{ -} - diff --git a/benchmarks/ctime2.cpp b/benchmarks/ctime2.cpp deleted file mode 100644 index b1060a01..00000000 --- a/benchmarks/ctime2.cpp +++ /dev/null @@ -1,15 +0,0 @@ -#include - -using namespace blitz; - -int main() -{ -} - -void foo() -{ - const int N = 10; - Array A(N), B(N), C(N), D(N), E(N); - A = B*C + D*E; -} - diff --git a/benchmarks/ctime2v.cpp b/benchmarks/ctime2v.cpp deleted file mode 100644 index 818829cb..00000000 --- a/benchmarks/ctime2v.cpp +++ /dev/null @@ -1,15 +0,0 @@ -#include - -using namespace std; - -int main() -{ -} - -void foo() -{ - const int N = 10; - valarray A(N), B(N), C(N), D(N), E(N); - A = B*C + D*E; -} - diff --git a/benchmarks/ctime3.cpp b/benchmarks/ctime3.cpp deleted file mode 100644 index 2fe8bbc2..00000000 --- a/benchmarks/ctime3.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include - -using namespace blitz; - -int main() -{ -} - -void foo() -{ - const int N = 10; - Array A(N), B(N), C(N), D(N), E(N); - A = B*C + D*E; - A = B+C + D*cos(E); - A = B*sin(C) + D*sin(E); - B = C + D; - A = A+B+C+D; -} - diff --git a/benchmarks/ctime3v.cpp b/benchmarks/ctime3v.cpp deleted file mode 100644 index c0463ff4..00000000 --- a/benchmarks/ctime3v.cpp +++ /dev/null @@ -1,32 +0,0 @@ - -#if defined(__GNUC__) && (__GNUC__ < 3) -#ifdef BZ_HAVE_STD -#include -#else -#include -#endif -inline float cos(float x) { return static_cast(cos(x)); } -inline float sin(float x) { return static_cast(sin(x)); } -inline float tan(float x) { return static_cast(tan(x)); } -inline float log(float x) { return static_cast(log(x)); } -inline float exp(float x) { return static_cast(exp(x)); } -#endif -#include - -using namespace std; - -int main() -{ -} - -void foo() -{ - const int N = 10; - valarray A(N), B(N), C(N), D(N), E(N); - A = B*C + D*E; - A = B+C + D*cos(E); - A = B*sin(C) + D*sin(E); - B = C + D; - A = A+B+C+D; -} - diff --git a/benchmarks/ctime4.cpp b/benchmarks/ctime4.cpp deleted file mode 100644 index 4b766733..00000000 --- a/benchmarks/ctime4.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include - -using namespace blitz; - -int main() -{ -} - -void foo() -{ - const int N = 10; - Array A(N), B(N), C(N), D(N), E(N); - A = B*C + D*E; - A = B+C + D*cos(E); - A = B*sin(C) + D*sin(E); - B = C + D; - A = A+B+C+D; - A = B*pow2(C) + D*E; - A = B-C + log(D)*cos(E); - A = B*sin(C) + tan(D)/E; - B = C - D; - A = A+B*C+D; -} - diff --git a/benchmarks/ctime4v.cpp b/benchmarks/ctime4v.cpp deleted file mode 100644 index 9fdb8ec1..00000000 --- a/benchmarks/ctime4v.cpp +++ /dev/null @@ -1,39 +0,0 @@ - -#if defined(__GNUC__) && (__GNUC__ < 3) -#ifdef BZ_HAVE_STD -#include -#else -#include -#endif -inline float cos(float x) { return static_cast(cos(x)); } -inline float sin(float x) { return static_cast(sin(x)); } -inline float tan(float x) { return static_cast(tan(x)); } -inline float log(float x) { return static_cast(log(x)); } -inline float exp(float x) { return static_cast(exp(x)); } -#endif -#include - - -using namespace std; - -int main() -{ -} - - -void foo() -{ - const int N = 10; - valarray A(N), B(N), C(N), D(N), E(N); - A = B*C + D*E; - A = B+C + D*cos(E); - A = B*sin(C) + D*sin(E); - B = C + D; - A = A+B+C+D; - A = B*C*C + D*E; - A = B-C + log(D)*cos(E); - A = B*sin(C) + tan(D)/E; - B = C - D; - A = A+B*C+D; -} - diff --git a/benchmarks/ctime5.cpp b/benchmarks/ctime5.cpp deleted file mode 100644 index 8ab97c90..00000000 --- a/benchmarks/ctime5.cpp +++ /dev/null @@ -1,29 +0,0 @@ -#include - -using namespace blitz; - -int main() -{ -} - -void foo() -{ - const int N = 10; - Array A(N), B(N), C(N), D(N), E(N); - A = B*C + D*E; - A = B+C + D*cos(E); - A = B*sin(C) + D*sin(E); - B = C + D; - A = A+B+C+D; - A = B*C*C + D*E; - A = B-C + log(D)*cos(E); - A = B*sin(C) + tan(D)/E; - B = C - D; - A = A+B*C+D; - C = A+B+C+D+E; - E = (A+B)*(C-D); - D = A/B*C/D; - B = (A*B) + (A/B); - D = sin(A) + sin(E); -} - diff --git a/benchmarks/ctime5c.cpp b/benchmarks/ctime5c.cpp deleted file mode 100644 index f6761316..00000000 --- a/benchmarks/ctime5c.cpp +++ /dev/null @@ -1,57 +0,0 @@ -#include - -int main() -{ -} - -int N; -float* A, * B, * C, * D, * E; - -void foo() -{ - for (int i=0; i < N; ++i) - A[i] = B[i]*C[i] + D[i]*E[i]; - - for (int i=0; i < N; ++i) - A[i] = B[i]+C[i] + D[i]*cos(E[i]); - - for (int i=0; i < N; ++i) - A[i] = B[i]*sin(C[i]) + D[i]*sin(E[i]); - - for (int i=0; i < N; ++i) - B[i] = C[i] + D[i]; - - for (int i=0; i < N; ++i) - A[i] = A[i] + B[i] + C[i] + D[i]; - - for (int i=0; i < N; ++i) - A[i] = B[i]*C[i]*C[i] + D[i]*E[i]; - - for (int i=0; i < N; ++i) - A[i] = B[i]-C[i] + log(D[i])*cos(E[i]); - - for (int i=0; i < N; ++i) - A[i] = B[i]*sin(C[i]) + tan(D[i])/E[i]; - - for (int i=0; i < N; ++i) - B[i] = C[i] - D[i]; - - for (int i=0; i < N; ++i) - A[i] = A[i]+B[i]*C[i]+D[i]; - - for (int i=0; i < N; ++i) - C[i] = A[i]+B[i]+C[i]+D[i]+E[i]; - - for (int i=0; i < N; ++i) - E[i] = (A[i]+B[i])*(C[i]-D[i]); - - for (int i=0; i < N; ++i) - D[i] = A[i]/B[i]*C[i]/D[i]; - - for (int i=0; i < N; ++i) - B[i] = (A[i]*B[i]) + (A[i]/B[i]); - - for (int i=0; i < N; ++i) - D[i] = sin(A[i]) + sin(E[i]); -} - diff --git a/benchmarks/ctime5v.cpp b/benchmarks/ctime5v.cpp deleted file mode 100644 index 58720218..00000000 --- a/benchmarks/ctime5v.cpp +++ /dev/null @@ -1,42 +0,0 @@ - -#if defined(__GNUC__) && (__GNUC__ < 3) -#ifdef BZ_HAVE_STD -#include -#else -#include -#endif -inline float cos(float x) { return static_cast(cos(x)); } -inline float sin(float x) { return static_cast(sin(x)); } -inline float tan(float x) { return static_cast(tan(x)); } -inline float log(float x) { return static_cast(log(x)); } -inline float exp(float x) { return static_cast(exp(x)); } -#endif -#include - -using namespace std; - -int main() -{ -} - -void foo() -{ - const int N = 10; - valarray A(N), B(N), C(N), D(N), E(N); - A = B*C + D*E; - A = B+C + D*cos(E); - A = B*sin(C) + D*sin(E); - B = C + D; - A = A+B+C+D; - A = B*C*C + D*E; - A = B-C + log(D)*cos(E); - A = B*sin(C) + tan(D)/E; - B = C - D; - A = A+B*C+D; - C = A+B+C+D+E; - E = (A+B)*(C-D); - D = A/B*C/D; - B = (A*B) + (A/B); - D = sin(A) + sin(E); -} - diff --git a/benchmarks/daxpy.cpp b/benchmarks/daxpy.cpp deleted file mode 100644 index 957d7914..00000000 --- a/benchmarks/daxpy.cpp +++ /dev/null @@ -1,346 +0,0 @@ -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define fdaxpy fdaxpy_ - #define daxpy daxpy_ - #define f90daxpy f90daxpy_ - #define fidaxpy fidaxpy_ - #define fidaxpyo fidaxpyo_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define fdaxpy fdaxpy__ - #define daxpy daxpy__ - #define f90daxpy f90daxpy__ - #define fidaxpy fidaxpy__ - #define fidaxpyo fidaxpyo__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define fdaxpy FDAXPY - #define daxpy DAXPY - #define f90daxpy F90DAXPY - #define fidaxpy FIDAXPY - #define fidaxpyo FIDAXPYO -#endif - -extern "C" { - void fdaxpy(const int& N, const double& da, double* x, - const int& xstride, const double* y, const int& ystride); - - void daxpy(const int& N, const double& da, double* x, - const int& xstride, const double* y, const int& ystride); - - void f90daxpy(const double& a, double* x, - const double* y, const int& length, const int& iters); - - void fidaxpy(const double& a, double* x, const double* y, - const int& length, const int& iters); - - void fidaxpyo(const double& a, double* x, const double* y, - const int& length, const int& iters); -} - -void daxpyVectorVersion(BenchmarkExt& bench, double a, double b); -void daxpyArrayVersion(BenchmarkExt& bench, double a); -void daxpyF77Version(BenchmarkExt& bench, double a); -void daxpyBLASVersion(BenchmarkExt& bench, double a); -#ifdef FORTRAN_90 -void daxpyF90Version(BenchmarkExt& bench, double a); -#endif -#ifdef BENCHMARK_VALARRAY -void daxpyValarrayVersion(BenchmarkExt& bench, double a); -#endif - -int main() -{ - int numBenchmarks = 6; -#ifndef BENCHMARK_VALARRAY - numBenchmarks--; // No valarray -#endif -#ifndef FORTRAN_90 - numBenchmarks--; // No fortran 90 -#endif - - BenchmarkExt bench("DAXPY Benchmark", numBenchmarks); - - const int numSizes = 19; - bench.setNumParameters(numSizes); - - Vector parameters(numSizes); - Vector iters(numSizes); - Vector flops(numSizes); - - for (int i=0; i < numSizes; ++i) - { - parameters(i) = static_cast(pow(10.0, 0.25*(i+1))); - iters(i) = 50000000L / parameters(i); - if (iters(i) < 2) - iters(i) = 2; - flops(i) = 2 * parameters(i) * 2; - } - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - - bench.beginBenchmarking(); - - float a = .398498293819823; - - daxpyVectorVersion(bench, a, -a); - daxpyArrayVersion(bench, a); - daxpyF77Version(bench, a); - daxpyBLASVersion(bench, a); -#ifdef FORTRAN_90 - daxpyF90Version(bench, a); -#endif -#ifdef BENCHMARK_VALARRAY - daxpyValarrayVersion(bench, a); -#endif - - bench.endBenchmarking(); - - bench.saveMatlabGraph("daxpy.m"); - - return 0; -} - - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void daxpyVectorVersion(BenchmarkExt& bench, double a, double b) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Vector: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Vector x(N), y(N); - initializeRandomDouble(x.data(), N); - initializeRandomDouble(y.data(), N); - - bench.start(); - for (long i=0; i < iters; ++i) - { - y += a * x; - y += b * x; - } - bench.stop(); - } - - bench.endImplementation(); -} - - -void daxpyArrayVersion(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Array: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array x(N), y(N); - initializeRandomDouble(x.data(), N); - initializeRandomDouble(y.data(), N); - - double b = - a; - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a * x; - y = y + b * x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -void daxpyF77Version(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 77: N = " << N << endl; - cout.flush(); - - int iters = bench.getIterations(); - - double* x = new double[N]; - double* y = new double[N]; - initializeRandomDouble(x, N); - initializeRandomDouble(y, N); - - bench.start(); - fidaxpy(a, x, y, N, iters); - bench.stop(); - - delete [] x; - delete [] y; - } - - bench.endImplementation(); -} - - -void daxpyBLASVersion(BenchmarkExt& bench, double a) -{ -#ifdef USE_LIBBLAS - bench.beginImplementation("Platform BLAS"); -#else - bench.beginImplementation("Fortran BLAS"); -#endif - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran BLAS: N = " << N << endl; - cout.flush(); - - int iters = bench.getIterations(); - - double* x = new double[N]; - double* y = new double[N]; - initializeRandomDouble(x, N); - initializeRandomDouble(y, N); - - int xstride = 1, ystride = 1; - double b = - a; - - bench.start(); - for (long i=0; i < iters; ++i) - { -#ifdef USE_LIBBLAS - daxpy(N, a, x, xstride, y, ystride); - daxpy(N, b, x, xstride, y, ystride); -#else - fdaxpy(N, a, x, xstride, y, ystride); - fdaxpy(N, b, x, xstride, y, ystride); -#endif - } - bench.stop(); - - delete [] x; - delete [] y; - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void daxpyF90Version(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 90: N = " << N << endl; - cout.flush(); - - int iters = bench.getIterations(); - - double* x = new double[N]; - double* y = new double[N]; - initializeRandomDouble(x, N); - initializeRandomDouble(y, N); - - bench.start(); - f90daxpy(a, x, y, N, iters); - bench.stop(); - - delete [] x; - delete [] y; - } - - bench.endImplementation(); -} -#endif - -#ifdef BENCHMARK_VALARRAY -void daxpyValarrayVersion(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "valarray: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - valarray x(N), y(N); - initializeRandomDouble(x, N); - initializeRandomDouble(y, N); - - double b = - a; - - bench.start(); - for (long i=0; i < iters; ++i) - { - y += a * x; - y += b * x; - } - bench.stop(); - } - - bench.endImplementation(); -} -#endif diff --git a/benchmarks/daxpy2.cpp b/benchmarks/daxpy2.cpp deleted file mode 100644 index 39ba7ccb..00000000 --- a/benchmarks/daxpy2.cpp +++ /dev/null @@ -1,312 +0,0 @@ -// In KAI C++ 3.2c, restrict causes problems for copy propagation. -// Temporary kludge is to disable use of the restrict keyword. - -#define BZ_DISABLE_RESTRICT - -#include -#include -#include -#include - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -using namespace blitz; - -#ifdef BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES - #define fdaxpy fdaxpy_ - #define f90daxpy f90daxpy_ - #define fidaxpy fidaxpy_ - #define fidaxpyo fidaxpyo_ -#endif - -#ifdef BZ_FORTRAN_SYMBOLS_CAPS - #define fdaxpy FDAXPY - #define f90daxpy F90DAXPY - #define fidaxpy FIDAXPY - #define fidaxpyo FIDAXPYO -#endif - -extern "C" { - void fdaxpy(const int& N, const double& da, double* x, - const int& xstride, const double* y, const int& ystride); - - void f90daxpy(const double& a, double* x, - const double* y, const int& length, const int& iters); - - void fidaxpy(const double& a, double* x, const double* y, - const int& length, const int& iters); - - void fidaxpyo(const double& a, double* x, const double* y, - const int& length, const int& iters); -} - -void daxpyVectorVersion(BenchmarkExt& bench, double a, double b); -void daxpyArrayVersion(BenchmarkExt& bench, double a); -void daxpyF77Version(BenchmarkExt& bench, double a); -void daxpyBLASVersion(BenchmarkExt& bench, double a); -void daxpyF90Version(BenchmarkExt& bench, double a); - -#ifdef BENCHMARK_VALARRAY -void daxpyValarrayVersion(BenchmarkExt& bench, double a); -#endif - -int main() -{ - -#ifdef BENCHMARK_VALARRAY - int numBenchmarks = 6; -#else - int numBenchmarks = 5; -#endif - - BenchmarkExt bench("DAXPY Benchmark", numBenchmarks); - - const int numSizes = 19; - bench.setNumParameters(numSizes); - bench.setRateDescription("Mflops/s"); - - Vector parameters(numSizes); - Vector iters(numSizes); - Vector flops(numSizes); - - for (int i=0; i < numSizes; ++i) - { - parameters[i] = pow(10.0, (i+1)/4.0); - iters[i] = 50000000L / parameters[i]; - if (iters[i] < 2) - iters[i] = 2; - flops[i] = 2 * parameters[i] * 2; - } - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - - bench.beginBenchmarking(); - - float a = .398498293819823; - - daxpyVectorVersion(bench, a, -a); - daxpyArrayVersion(bench, a); - daxpyF77Version(bench, a); - daxpyBLASVersion(bench, a); - daxpyF90Version(bench, a); - -#ifdef BENCHMARK_VALARRAY - daxpyValarrayVersion(bench, a); -#endif - - bench.endBenchmarking(); - - bench.saveMatlabGraph("daxpy2.m"); - - return 0; -} - -void initializeRandomDouble(double* data, int numElements, int stride = 1) -{ - static Random rnd; - - for (int i=0; i < numElements; ++i) - data[i*stride] = rnd.random(); -} - -template -void initializeArray(T& array, int numElements) -{ - static Random rnd; - - for (size_t i=0; i < numElements; ++i) - array[i] = rnd.random(); -} - -void daxpyVectorVersion(BenchmarkExt& bench, double a, double b) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Vector: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Vector x(N), y(N); - initializeRandomDouble(x.data(), N); - initializeRandomDouble(y.data(), N); - - bench.start(); - for (long i=0; i < iters; ++i) - { - y += a * x; - y += b * x; - } - bench.stop(); - } - - bench.endImplementation(); -} - - -void daxpyArrayVersion(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Array: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array x(N), y(N); - initializeRandomDouble(x.data(), N); - initializeRandomDouble(y.data(), N); - - double b = - a; - - bench.start(); - for (long i=0; i < iters; ++i) - { - y += a * x; - y += b * x; - } - bench.stop(); - } - - bench.endImplementation(); -} - -void daxpyF77Version(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 77: N = " << N << endl; - cout.flush(); - - int iters = bench.getIterations(); - - double* x = new double[N]; - double* y = new double[N]; - initializeRandomDouble(x, N); - initializeRandomDouble(y, N); - - bench.start(); - fidaxpy(a, x, y, N, iters); - bench.stop(); - - delete [] x; - delete [] y; - } - - bench.endImplementation(); -} - - -void daxpyBLASVersion(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Fortran BLAS"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran BLAS: N = " << N << endl; - cout.flush(); - - int iters = bench.getIterations(); - - double* x = new double[N]; - double* y = new double[N]; - initializeRandomDouble(x, N); - initializeRandomDouble(y, N); - - int xstride = 1, ystride = 1; - double b = - a; - - bench.start(); - for (long i=0; i < iters; ++i) - { - fdaxpy(N, a, x, xstride, y, ystride); - fdaxpy(N, b, x, xstride, y, ystride); - } - bench.stop(); - - delete [] x; - delete [] y; - } - - bench.endImplementation(); -} - -void daxpyF90Version(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 90: N = " << N << endl; - cout.flush(); - - int iters = bench.getIterations(); - - double* x = new double[N]; - double* y = new double[N]; - initializeRandomDouble(x, N); - initializeRandomDouble(y, N); - - bench.start(); - f90daxpy(a, x, y, N, iters); - bench.stop(); - - delete [] x; - delete [] y; - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void daxpyValarrayVersion(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "valarray: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - valarray x(N), y(N); - initializeArray(x, N); - initializeArray(y, N); - - double b = - a; - - bench.start(); - for (long i=0; i < iters; ++i) - { - y += a * x; - y += b * x; - } - bench.stop(); - } - - bench.endImplementation(); -} -#endif diff --git a/benchmarks/daxpyf90-2.f90 b/benchmarks/daxpyf90-2.f90 deleted file mode 100644 index e0d2578d..00000000 --- a/benchmarks/daxpyf90-2.f90 +++ /dev/null @@ -1,15 +0,0 @@ -! Fortran 90 DAXPY using arrays -SUBROUTINE f90daxpy(a, x, y, n, iters) - IMPLICIT NONE - INTEGER, INTENT( IN ) :: n, iters - DOUBLE PRECISION, DIMENSION (n) :: x, y - DOUBLE PRECISION, INTENT( IN ) :: a - DOUBLE PRECISION :: b - - b = - a - - DO i=1,iters - y = y + a * y - y = y + b * y - END DO -END SUBROUTINE diff --git a/benchmarks/daxpyf90.f90 b/benchmarks/daxpyf90.f90 deleted file mode 100644 index 03e4344a..00000000 --- a/benchmarks/daxpyf90.f90 +++ /dev/null @@ -1,12 +0,0 @@ - SUBROUTINE f90daxpy(a, x, y, n, iters) - INTEGER n, iters - DOUBLE PRECISION, DIMENSION (n) :: x, y - DOUBLE PRECISION a, b - - b = - a - - DO i=1,iters - y = y + a * x - y = y + b * x - END DO - END SUBROUTINE diff --git a/benchmarks/dot.cpp b/benchmarks/dot.cpp deleted file mode 100644 index 78a9471f..00000000 --- a/benchmarks/dot.cpp +++ /dev/null @@ -1,137 +0,0 @@ -#include -#include - -using namespace blitz; - -double dot(const double* a, const double* b, int n) -{ - double result = 0.; - for (int i=0; i < n; ++i) - result += a[i] * b[i]; - - return result; -} - -template -void sink(T&) -{ -} - -void sink(double,double,double,double,double,double,double,double,double,double) -{ -} - -int main() -{ - Timer timer; - const int iterations = 1000000; - - double a1[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b1[3] = { 0.3989421, 0.9854983, 0.58439328 }; - double a2[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b2[3] = { 0.3989421, 0.9854983, 0.58439328 }; - double a3[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b3[3] = { 0.3989421, 0.9854983, 0.58439328 }; - double a4[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b4[3] = { 0.3989421, 0.9854983, 0.58439328 }; - double a5[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b5[3] = { 0.3989421, 0.9854983, 0.58439328 }; - double a6[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b6[3] = { 0.3989421, 0.9854983, 0.58439328 }; - double a7[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b7[3] = { 0.3989421, 0.9854983, 0.58439328 }; - double a8[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b8[3] = { 0.3989421, 0.9854983, 0.58439328 }; - double a9[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b9[3] = { 0.3989421, 0.9854983, 0.58439328 }; - double a10[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b10[3] = { 0.3989421, 0.9854983, 0.58439328 }; - - timer.start(); - for (int i=0; i < iterations; ++i) - { - double result1 = dot(a1,b1,3); - double result2 = dot(a2,b2,3); - double result3 = dot(a3,b3,3); - double result4 = dot(a4,b4,3); - double result5 = dot(a5,b5,3); - double result6 = dot(a6,b6,3); - double result7 = dot(a7,b7,3); - double result8 = dot(a8,b8,3); - double result9 = dot(a9,b9,3); - double result10 = dot(a10,b10,3); - sink(result1,result2,result3,result4,result5,result6,result7,result8, - result9,result10); - } - timer.stop(); - - double Mflops = 10.0 * 5.0 * iterations / 1e+6; - cout << "Nonspecialized algorithm: " << (Mflops/timer.elapsedSeconds()) - << endl; - - TinyVector c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, - d1, d2, d3, d4, d5, d6, d7, d8, d9, d10; - c1 = 0.3242343, 0.1429833, 0.43988583; - d1 = 0.3989421, 0.9854983, 0.58439328; - sink(c1); - sink(d1); - c2 = 0.3242343, 0.1429833, 0.43988583; - d2 = 0.3989421, 0.9854983, 0.58439328; - sink(c2); - sink(d2); - c3 = 0.3242343, 0.1429833, 0.43988583; - d3 = 0.3989421, 0.9854983, 0.58439328; - sink(c3); - sink(d3); - c4 = 0.3242343, 0.1429833, 0.43988583; - d4 = 0.3989421, 0.9854983, 0.58439328; - sink(c4); - sink(d4); - c5 = 0.3242343, 0.1429833, 0.43988583; - d5 = 0.3989421, 0.9854983, 0.58439328; - sink(c5); - sink(d5); - c6 = 0.3242343, 0.1429833, 0.43988583; - d6 = 0.3989421, 0.9854983, 0.58439328; - sink(c6); - sink(d6); - c7 = 0.3242343, 0.1429833, 0.43988583; - d7 = 0.3989421, 0.9854983, 0.58439328; - sink(c7); - sink(d7); - c8 = 0.3242343, 0.1429833, 0.43988583; - d8 = 0.3989421, 0.9854983, 0.58439328; - sink(c8); - sink(d8); - c9 = 0.3242343, 0.1429833, 0.43988583; - d9 = 0.3989421, 0.9854983, 0.58439328; - sink(c9); - sink(d9); - c10 = 0.3242343, 0.1429833, 0.43988583; - d10 = 0.3989421, 0.9854983, 0.58439328; - sink(c10); - sink(d10); - - timer.start(); - for (int i=0; i < iterations; ++i) - { - double result1 = dot(c1, d1); - double result2 = dot(c2, d2); - double result3 = dot(c3, d3); - double result4 = dot(c4, d4); - double result5 = dot(c5, d5); - double result6 = dot(c6, d6); - double result7 = dot(c7, d7); - double result8 = dot(c8, d8); - double result9 = dot(c9, d9); - double result10 = dot(c10, d10); - sink(result1, result2, result3, result4, result5, result6, result7, - result8, result9, result10); - } - timer.stop(); - cout << "Metaprogram: " << (Mflops/timer.elapsedSeconds()) - << endl; - - return 0; -} - diff --git a/benchmarks/dot2.cpp b/benchmarks/dot2.cpp deleted file mode 100644 index 97ee26fc..00000000 --- a/benchmarks/dot2.cpp +++ /dev/null @@ -1,89 +0,0 @@ -#include - -using namespace blitz; - -double dot(const double* a, const double* b, int n) -{ - double result = 0.; - for (int i=0; i < n; ++i) - result += a[i] * b[i]; - - return result; -} - -template -void sink(T&) -{ -} - -void sink(double,double,double,double,double,double,double,double,double,double) -{ -} - -void init(double* x, int n) -{ - // Completely arbitrary - for (int i=0; i < n; ++i) - x[i] = 3.4982938192839824982 * i; -} - -const int nmax = 40; - -int main() -{ - Timer timer; - const int iterations1 = 5000000; - - double a1[nmax],a2[nmax],a3[nmax],a4[nmax],a5[nmax],a6[nmax],a7[nmax],a8[nmax],a9[nmax], - a10[nmax],b1[nmax],b2[nmax],b3[nmax],b4[nmax],b5[nmax],b6[nmax],b7[nmax],b8[nmax], - b9[nmax],b10[nmax]; - init(a1,nmax); - init(a2,nmax); - init(a3,nmax); - init(a4,nmax); - init(a5,nmax); - init(a6,nmax); - init(a7,nmax); - init(a8,nmax); - init(a9,nmax); - init(a10,nmax); - init(b1,nmax); - init(b2,nmax); - init(b3,nmax); - init(b4,nmax); - init(b5,nmax); - init(b6,nmax); - init(b7,nmax); - init(b8,nmax); - init(b9,nmax); - init(b10,nmax); - - for (int n=1; n < nmax; ++n) - { - int iterations = iterations1 / n; - - timer.start(); - for (int i=0; i < iterations; ++i) - { - double result1 = dot(a1,b1,n); - double result2 = dot(a2,b2,n); - double result3 = dot(a3,b3,n); - double result4 = dot(a4,b4,n); - double result5 = dot(a5,b5,n); - double result6 = dot(a6,b6,n); - double result7 = dot(a7,b7,n); - double result8 = dot(a8,b8,n); - double result9 = dot(a9,b9,n); - double result10 = dot(a10,b10,n); - sink(result1,result2,result3,result4,result5,result6,result7,result8, - result9,result10); - } - timer.stop(); - - double Mflops = 10.0 * (n + (n-1)) * iterations / 1e+6; - cout << n << '\t' << (Mflops/timer.elapsedSeconds()) << endl; - } - - return 0; -} - diff --git a/benchmarks/echof2-back.f b/benchmarks/echof2-back.f deleted file mode 100644 index 02c7c32e..00000000 --- a/benchmarks/echof2-back.f +++ /dev/null @@ -1,133 +0,0 @@ -! Tuned Fortran 77 version -! Optimizations: -! - blocked stencil algorithm to improve cache use -! - arrays interlaced by making one big 3-dimensional array -! - copying of arrays avoided by cycling indices into the -! 3-d array - - SUBROUTINE echo_f77Tuned(N, niters, check) - INTEGER N, niters, iter - REAL check - REAL A(N,N,4) -! P1 = A(N,N,1), P2 = A(N,N,2), P3 = A(N,N,3), C = A(N,N,4) - INTEGER P1, P2, P3, C - INTEGER i, j - INTEGER bi,bj,ni,nj,blockSize - - P1 = 1 - P2 = 2 - P3 = 3 - C = 4 - - CALL echo_f77Tuned_setInitialConditions(A,C,P1,P2,P3,N) - - blockSize = 128 - - DO iter=1, niters - DO bj=2,N-1,blockSize - nj = min(bj+blockSize-1,N-1) - DO bi=2,N-1,blockSize - ni = min(bi+blockSize-1,N-1) - DO j=bj,nj - DO i=bi,ni - A(i,j,P3) = (2-4*A(i,j,C))*A(i,j,P2) + A(i,j,C) - . *(A(i,j-1,P2) + A(i,j+1,P2) + A(i-1,j,P2) - . + A(i+1,j,P2)) - A(i,j,P1) - END DO - END DO - END DO - END DO - P1 = P2 - P2 = P3 - END DO - - check = A(N/2,N/2,P1) - - RETURN - END - - - - - SUBROUTINE echo_f77Tuned_setInitialConditions(A, C, P1, P2, P3, N) - INTEGER N - REAL A(N,N,4) - INTEGER C, P1, P2, P3 - INTEGER i, j, blockLeft, blockRight, blockTop, blockBottom - INTEGER channelLeft, channelRight, channel1Height, channel2Height - INTEGER cr, cc - REAL s2 - -! Default velocity in the air - - DO j=1,N - DO i=1,N - A(i,j,C) = 0.2; - END DO - END DO - -! Solid block with which the pulse collids - - blockLeft = 1 - blockRight = 2 * N / 5.0 - blockTop = N / 3.0 - blockBottom = 2 * N / 3.0 - - DO j=blockLeft,blockRight - DO i=blockTop,blockBottom - A(i,j,C) = 0.5 - END DO - END DO - -! Channel directing the pulse leftwards - - channelLeft = 4 * N / 5.0 - channelRight = N - channel1Height = 3 * N / 8.0 - channel2Height = 5 * N / 8.0 - - DO j = channelLeft,channelRight - A(channel1Height,j,C) = 0.0; - A(channel2Height,j,C) = 0.0; - END DO - -! Initial pressure distribution: a gaussian pulse inside the channel - - cr = N / 2 - cc = 7 * N / 8.0 - s2 = 64.0 * 9.0 / ((N/2.0) ** 2) - - DO j=1,N - DO i=1,N - A(i,j,P1) = 0.0 - A(i,j,P2) = exp(-((i-cr)**2 + (j-cc)**2) * s2); - A(i,j,P3) = 0.0 - END DO - END DO - - CALL checkArray2(A,P2,N) - CALL checkArray2(A,C,N) - - RETURN - END - - - - - - SUBROUTINE checkArray2(A, P, N) - INTEGER N, P - REAL A(N,N,4) - - INTEGER i,j - REAL check - check = 0.0 - DO j=1,N - DO i=1,N - check = check + (i*n+j)*A(i,j,P) - END DO - END DO - - PRINT *, 'Array check: ', check - RETURN - END diff --git a/benchmarks/echotune.cpp b/benchmarks/echotune.cpp deleted file mode 100644 index 85b0a2ac..00000000 --- a/benchmarks/echotune.cpp +++ /dev/null @@ -1,49 +0,0 @@ -#include -#include -#include - -using namespace blitz; -using namespace std; - -extern "C" { - void echo_f77tuned(int& N, int& niters, float& check, int& blockSize); -} - -int main() -{ - int N = 1024; - int niters = 48; - float check; - double Mflops = niters * 9; - Timer timer; - - ofstream ofs("echotune.log"); - - cout << "This program decides on the best block size for a typical 2D " - << endl << "stencil operation. Pick the block size which has the " - << endl << "maximum Mflops/s." << endl << endl; - - cout << "Block size\tMflops/s" << endl; - - int blockSize; - - for (blockSize=1; blockSize < 32; ++blockSize) - { - timer.start(); - echo_f77tuned(N, niters, check, blockSize); - timer.stop(); - cout << blockSize << "\t" << (Mflops/timer.elapsedSeconds()) << endl; - ofs << blockSize << "\t" << (Mflops/timer.elapsedSeconds()) << endl; - } - for (; blockSize < 1024; blockSize += 32) - { - timer.start(); - echo_f77tuned(N, niters, check, blockSize); - timer.stop(); - cout << blockSize << "\t" << (Mflops/timer.elapsedSeconds()) << endl; - ofs << blockSize << "\t" << (Mflops/timer.elapsedSeconds()) << endl; - } - - return 0; -} - diff --git a/benchmarks/echotune.m b/benchmarks/echotune.m deleted file mode 100644 index 6646ecfa..00000000 --- a/benchmarks/echotune.m +++ /dev/null @@ -1,63 +0,0 @@ -A = [ 2 11.2062 -4 14.2105 -6 15.6069 -8 16.5138 -10 17.0481 -12 17.2869 -14 17.6543 -16 17.7778 -18 18.0225 -20 18.0225 -22 18.2896 -24 18.3362 -26 18.2741 -28 18.4537 -30 18.5647 -32 18.6368 -34 18.799 -36 18.7013 -38 18.5886 -40 18.6127 -42 18.799 -44 18.8811 -46 18.9225 -48 18.8317 -50 18.9391 -52 18.9391 -54 18.9723 -56 19.0225 -58 19.0225 -60 19.0728 -62 19.056 -64 19.115 -66 19.1066 -68 19.056 -70 19.1235 -72 19.0644 -74 19.0644 -76 18.9308 -78 18.9723 -80 19.115 -82 19.1489 -84 19.0392 -86 19.2342 -88 19.132 -90 18.8153 -92 19.056 -94 19.1574 -96 18.8235 -98 19.0476 -100 19.0813 -102 19.1066 -104 18.989 -106 19.1829 -108 19.1066 -110 19.0728 -112 19.1744 -114 19.0813 -116 19.1066 -118 19.1659 -120 19.0141 -122 19.115 -124 19.1744 -126 19.132 ]; diff --git a/benchmarks/echotunef.f b/benchmarks/echotunef.f deleted file mode 100644 index d710892b..00000000 --- a/benchmarks/echotunef.f +++ /dev/null @@ -1,128 +0,0 @@ -! Tuned Fortran 77 version -! Optimizations: -! - blocked stencil algorithm to improve cache use -! - arrays interlaced by making one big 3-dimensional array -! - copying of arrays avoided by cycling indices into the -! 3-d array - - SUBROUTINE echo_f77Tuned(N, niters, check, blockSize) - INTEGER N, niters, iter, blockSize - REAL check - REAL A(N,N,4) -! P1 = A(N,N,1), P2 = A(N,N,2), P3 = A(N,N,3), C = A(N,N,4) - INTEGER P1, P2, P3, C - INTEGER i, j - INTEGER bi,bj,ni,nj - - P1 = 1 - P2 = 2 - P3 = 3 - C = 4 - - CALL echo_f77Tuned_setInitialConditions(A,C,P1,P2,P3,N) - - DO iter=1, niters - DO bj=2,N-1,blockSize - nj = min(bj+blockSize-1,N-1) - DO bi=2,N-1,blockSize - ni = min(bi+blockSize-1,N-1) - DO j=bj,nj - DO i=bi,ni - A(i,j,P3) = (2-4*A(i,j,C))*A(i,j,P2) + A(i,j,C) - . *(A(i,j-1,P2) + A(i,j+1,P2) + A(i-1,j,P2) - . + A(i+1,j,P2)) - A(i,j,P1) - END DO - END DO - END DO - END DO - P1 = P2 - P2 = P3 - END DO - - check = A(N/2,N/2,P1) - - RETURN - END - - - - - SUBROUTINE echo_f77Tuned_setInitialConditions(A, C, P1, P2, P3, N) - INTEGER N - REAL A(N,N,4) - INTEGER C, P1, P2, P3 - INTEGER i, j, blockLeft, blockRight, blockTop, blockBottom - INTEGER channelLeft, channelRight, channel1Height, channel2Height - INTEGER cr, cc - REAL s2 - -! Default velocity in the air - - DO j=1,N - DO i=1,N - A(i,j,C) = 0.2; - END DO - END DO - -! Solid block with which the pulse collids - - blockLeft = 1 - blockRight = 2 * N / 5.0 - blockTop = N / 3.0 - blockBottom = 2 * N / 3.0 - - DO j=blockLeft,blockRight - DO i=blockTop,blockBottom - A(i,j,C) = 0.5 - END DO - END DO - -! Channel directing the pulse leftwards - - channelLeft = 4 * N / 5.0 - channelRight = N - channel1Height = 3 * N / 8.0 - channel2Height = 5 * N / 8.0 - - DO j = channelLeft,channelRight - A(channel1Height,j,C) = 0.0; - A(channel2Height,j,C) = 0.0; - END DO - -! Initial pressure distribution: a gaussian pulse inside the channel - - cr = N / 2 - cc = 7 * N / 8.0 - s2 = 64.0 * 9.0 / ((N/2.0) ** 2) - - DO j=1,N - DO i=1,N - A(i,j,P1) = 0.0 - A(i,j,P2) = exp(-((i-cr)**2 + (j-cc)**2) * s2); - A(i,j,P3) = 0.0 - END DO - END DO - - RETURN - END - - - - - - SUBROUTINE checkArray2(A, P, N) - INTEGER N, P - REAL A(N,N,4) - - INTEGER i,j - REAL check - check = 0.0 - DO j=1,N - DO i=1,N - check = check + (i*n+j)*A(i,j,P) - END DO - END DO - - PRINT *, 'Array check: ', check - RETURN - END diff --git a/benchmarks/fdaxpy.f b/benchmarks/fdaxpy.f deleted file mode 100644 index cfc3737a..00000000 --- a/benchmarks/fdaxpy.f +++ /dev/null @@ -1,48 +0,0 @@ - subroutine fdaxpy(n,da,dx,incx,dy,incy) -c -c constant times a vector plus a vector. -c uses unrolled loops for increments equal to one. -c jack dongarra, linpack, 3/11/78. -c modified 12/3/93, array(1) declarations changed to array(*) -c - double precision dx(*),dy(*),da - integer i,incx,incy,ix,iy,m,mp1,n -c - if(n.le.0)return - if (da .eq. 0.0d0) return - if(incx.eq.1.and.incy.eq.1)go to 20 -c -c code for unequal increments or equal increments -c not equal to 1 -c - ix = 1 - iy = 1 - if(incx.lt.0)ix = (-n+1)*incx + 1 - if(incy.lt.0)iy = (-n+1)*incy + 1 - do 10 i = 1,n - dy(iy) = dy(iy) + da*dx(ix) - ix = ix + incx - iy = iy + incy - 10 continue - return -c -c code for both increments equal to 1 -c -c -c clean-up loop -c - 20 m = mod(n,4) - if( m .eq. 0 ) go to 40 - do 30 i = 1,m - dy(i) = dy(i) + da*dx(i) - 30 continue - if( n .lt. 4 ) return - 40 mp1 = m + 1 - do 50 i = mp1,n,4 - dy(i) = dy(i) + da*dx(i) - dy(i + 1) = dy(i + 1) + da*dx(i + 1) - dy(i + 2) = dy(i + 2) + da*dx(i + 2) - dy(i + 3) = dy(i + 3) + da*dx(i + 3) - 50 continue - return - end diff --git a/benchmarks/fidaxpy.f b/benchmarks/fidaxpy.f deleted file mode 100644 index d1fec4ac..00000000 --- a/benchmarks/fidaxpy.f +++ /dev/null @@ -1,27 +0,0 @@ - subroutine fidaxpy(a, x, y, n, iters) - - integer i - double precision x(n), y(n), a - double precision b - b = - a - - do 2000 j = 1, iters - do 1000 i = 1, n - y(i) = y(i) + a * x(i) - 1000 continue - - do 3000 i = 1, n - y(i) = y(i) + b * x(i) - 3000 continue - 2000 continue - - return - end - - subroutine fidaxpyo(a, x, y, n, iters) - - double precision x(n), y(n), a - - return - end - diff --git a/benchmarks/floop1.cpp b/benchmarks/floop1.cpp deleted file mode 100644 index 4dd6139b..00000000 --- a/benchmarks/floop1.cpp +++ /dev/null @@ -1,438 +0,0 @@ - -// floop1 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop1_f77 floop1_f77_ - #define floop1_f77overhead floop1_f77overhead_ - #define floop1_f90 floop1_f90_ - #define floop1_f90overhead floop1_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop1_f77 floop1_f77__ - #define floop1_f77overhead floop1_f77overhead__ - #define floop1_f90 floop1_f90__ - #define floop1_f90overhead floop1_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop1_f77 FLOOP1_F77 - #define floop1_f77overhead FLOOP1_F77OVERHEAD - #define floop1_f90 FLOOP1_F90 - #define floop1_f90overhead FLOOP1_F90OVERHEAD -#endif - -extern "C" { - void floop1_f77(const int& N, float* x, float* y); - void floop1_f77overhead(const int& N, float* x, float* y); - void floop1_f90(const int& N, float* x, float* y); - void floop1_f90overhead(const int& N, float* x, float* y); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop1: $x = sqrt($y)", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop1.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y(tensor::i));; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+2); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+2); - Array y(yfill(Range(1,N+1-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop1_f77(N, x, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop1_f77overhead(N, x, y); - - bench.stopOverhead(); - - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop1_f90(N, x, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop1_f90overhead(N, x, y); - - bench.stopOverhead(); - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop10.cpp b/benchmarks/floop10.cpp deleted file mode 100644 index 495dabb0..00000000 --- a/benchmarks/floop10.cpp +++ /dev/null @@ -1,483 +0,0 @@ - -// floop10 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop10_f77 floop10_f77_ - #define floop10_f77overhead floop10_f77overhead_ - #define floop10_f90 floop10_f90_ - #define floop10_f90overhead floop10_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop10_f77 floop10_f77__ - #define floop10_f77overhead floop10_f77overhead__ - #define floop10_f90 floop10_f90__ - #define floop10_f90overhead floop10_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop10_f77 FLOOP10_F77 - #define floop10_f77overhead FLOOP10_F77OVERHEAD - #define floop10_f90 FLOOP10_F90 - #define floop10_f90overhead FLOOP10_F90OVERHEAD -#endif - -extern "C" { - void floop10_f77(const int& N, float* x, float* a, float* b, float* c, const float& u); - void floop10_f77overhead(const int& N, float* x, float* a, float* b, float* c, const float& u); - void floop10_f90(const int& N, float* x, float* a, float* b, float* c, const float& u); - void floop10_f90overhead(const int& N, float* x, float* a, float* b, float* c, const float& u); - -} - -void VectorVersion(BenchmarkExt& bench, float u); -void ArrayVersion(BenchmarkExt& bench, float u); -void ArrayVersion_unaligned(BenchmarkExt& bench, float u); -void ArrayVersion_misaligned(BenchmarkExt& bench, float u); -void ArrayVersion_index(BenchmarkExt& bench, float u); -void doTinyVectorVersion(BenchmarkExt& bench, float u); -void F77Version(BenchmarkExt& bench, float u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop10: $x = u+$a+$b+$c", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop10.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a(tensor::i)+b(tensor::i)+c(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+4); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+4); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+4); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+4); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop10_f77(N, x, a, b, c, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop10_f77overhead(N, x, a, b, c, u); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop10_f90(N, x, a, b, c, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop10_f90overhead(N, x, a, b, c, u); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop10f.f b/benchmarks/floop10f.f deleted file mode 100644 index 0943d67f..00000000 --- a/benchmarks/floop10f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop10_F77(N, x, a, b, c, u) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), u - - DO i=1,N - x(i) = u+a(i)+b(i)+c(i); - END DO - RETURN - END - - - SUBROUTINE floop10_F77Overhead(N, x, a, b, c, u) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), u - RETURN - END diff --git a/benchmarks/floop10f90.f90 b/benchmarks/floop10f90.f90 deleted file mode 100644 index bcccb4c5..00000000 --- a/benchmarks/floop10f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop10_F90(N, x, a, b, c, u) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), u - - x = u+a+b+c - RETURN - END - - - SUBROUTINE floop10_F90Overhead(N, x, a, b, c, u) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), u - - RETURN - END diff --git a/benchmarks/floop11.cpp b/benchmarks/floop11.cpp deleted file mode 100644 index 04ceca19..00000000 --- a/benchmarks/floop11.cpp +++ /dev/null @@ -1,504 +0,0 @@ - -// floop11 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop11_f77 floop11_f77_ - #define floop11_f77overhead floop11_f77overhead_ - #define floop11_f90 floop11_f90_ - #define floop11_f90overhead floop11_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop11_f77 floop11_f77__ - #define floop11_f77overhead floop11_f77overhead__ - #define floop11_f90 floop11_f90__ - #define floop11_f90overhead floop11_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop11_f77 FLOOP11_F77 - #define floop11_f77overhead FLOOP11_F77OVERHEAD - #define floop11_f90 FLOOP11_F90 - #define floop11_f90overhead FLOOP11_F90OVERHEAD -#endif - -extern "C" { - void floop11_f77(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop11_f77overhead(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop11_f90(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop11_f90overhead(const int& N, float* x, float* a, float* b, float* c, float* d); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop11: $x = $a+$b+$c+$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop11.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i)+c(tensor::i)+d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+5); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop11_f77(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop11_f77overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop11_f90(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop11_f90overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop11f.f b/benchmarks/floop11f.f deleted file mode 100644 index b1e89d5a..00000000 --- a/benchmarks/floop11f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop11_F77(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - DO i=1,N - x(i) = a(i)+b(i)+c(i)+d(i); - END DO - RETURN - END - - - SUBROUTINE floop11_F77Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - RETURN - END diff --git a/benchmarks/floop11f90.f90 b/benchmarks/floop11f90.f90 deleted file mode 100644 index 342effdd..00000000 --- a/benchmarks/floop11f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop11_F90(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - x = a+b+c+d - RETURN - END - - - SUBROUTINE floop11_F90Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - RETURN - END diff --git a/benchmarks/floop12.cpp b/benchmarks/floop12.cpp deleted file mode 100644 index 4d0fbfb6..00000000 --- a/benchmarks/floop12.cpp +++ /dev/null @@ -1,527 +0,0 @@ - -// floop12 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop12_f77 floop12_f77_ - #define floop12_f77overhead floop12_f77overhead_ - #define floop12_f90 floop12_f90_ - #define floop12_f90overhead floop12_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop12_f77 floop12_f77__ - #define floop12_f77overhead floop12_f77overhead__ - #define floop12_f90 floop12_f90__ - #define floop12_f90overhead floop12_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop12_f77 FLOOP12_F77 - #define floop12_f77overhead FLOOP12_F77OVERHEAD - #define floop12_f90 FLOOP12_F90 - #define floop12_f90overhead FLOOP12_F90OVERHEAD -#endif - -extern "C" { - void floop12_f77(const int& N, float* y, float* x, float* a, float* b, float* c, float* d, const float& u); - void floop12_f77overhead(const int& N, float* y, float* x, float* a, float* b, float* c, float* d, const float& u); - void floop12_f90(const int& N, float* y, float* x, float* a, float* b, float* c, float* d, const float& u); - void floop12_f90overhead(const int& N, float* y, float* x, float* a, float* b, float* c, float* d, const float& u); - -} - -void VectorVersion(BenchmarkExt& bench, float u); -void ArrayVersion(BenchmarkExt& bench, float u); -void ArrayVersion_unaligned(BenchmarkExt& bench, float u); -void ArrayVersion_misaligned(BenchmarkExt& bench, float u); -void ArrayVersion_index(BenchmarkExt& bench, float u); -void doTinyVectorVersion(BenchmarkExt& bench, float u); -void F77Version(BenchmarkExt& bench, float u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop12: $y = u+$a; $x = $a+$b+$c+$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop12.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a(tensor::i); x = a(tensor::i)+b(tensor::i)+c(tensor::i)+d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+6); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+6); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+6); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+6); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+6); - Array c(cfill(Range(4,N+4-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+6); - Array d(dfill(Range(5,N+5-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop12_f77(N, y, x, a, b, c, d, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop12_f77overhead(N, y, x, a, b, c, d, u); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop12_f90(N, y, x, a, b, c, d, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop12_f90overhead(N, y, x, a, b, c, d, u); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop12f.f b/benchmarks/floop12f.f deleted file mode 100644 index f7b8698d..00000000 --- a/benchmarks/floop12f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop12_F77(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), d(N), u - - DO i=1,N - y(i) = u+a(i); x(i) = a(i)+b(i)+c(i)+d(i); - END DO - RETURN - END - - - SUBROUTINE floop12_F77Overhead(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), d(N), u - RETURN - END diff --git a/benchmarks/floop12f90.f90 b/benchmarks/floop12f90.f90 deleted file mode 100644 index 6ba41087..00000000 --- a/benchmarks/floop12f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop12_F90(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), d(N), u - - y = u+a; x = a+b+c+d - RETURN - END - - - SUBROUTINE floop12_F90Overhead(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), d(N), u - - RETURN - END diff --git a/benchmarks/floop13.cpp b/benchmarks/floop13.cpp deleted file mode 100644 index 368fc2ba..00000000 --- a/benchmarks/floop13.cpp +++ /dev/null @@ -1,527 +0,0 @@ - -// floop13 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop13_f77 floop13_f77_ - #define floop13_f77overhead floop13_f77overhead_ - #define floop13_f90 floop13_f90_ - #define floop13_f90overhead floop13_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop13_f77 floop13_f77__ - #define floop13_f77overhead floop13_f77overhead__ - #define floop13_f90 floop13_f90__ - #define floop13_f90overhead floop13_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop13_f77 FLOOP13_F77 - #define floop13_f77overhead FLOOP13_F77OVERHEAD - #define floop13_f90 FLOOP13_F90 - #define floop13_f90overhead FLOOP13_F90OVERHEAD -#endif - -extern "C" { - void floop13_f77(const int& N, float* y, float* x, float* a, float* b, float* c, float* d, const float& u); - void floop13_f77overhead(const int& N, float* y, float* x, float* a, float* b, float* c, float* d, const float& u); - void floop13_f90(const int& N, float* y, float* x, float* a, float* b, float* c, float* d, const float& u); - void floop13_f90overhead(const int& N, float* y, float* x, float* a, float* b, float* c, float* d, const float& u); - -} - -void VectorVersion(BenchmarkExt& bench, float u); -void ArrayVersion(BenchmarkExt& bench, float u); -void ArrayVersion_unaligned(BenchmarkExt& bench, float u); -void ArrayVersion_misaligned(BenchmarkExt& bench, float u); -void ArrayVersion_index(BenchmarkExt& bench, float u); -void doTinyVectorVersion(BenchmarkExt& bench, float u); -void F77Version(BenchmarkExt& bench, float u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop13: $x = $a+$b+$c+$d; $y = u+$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop13.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i)+c(tensor::i)+d(tensor::i); y = u+d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+6); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+6); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+6); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+6); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+6); - Array c(cfill(Range(4,N+4-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+6); - Array d(dfill(Range(5,N+5-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop13_f77(N, y, x, a, b, c, d, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop13_f77overhead(N, y, x, a, b, c, d, u); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop13_f90(N, y, x, a, b, c, d, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop13_f90overhead(N, y, x, a, b, c, d, u); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop13f.f b/benchmarks/floop13f.f deleted file mode 100644 index 8f261d16..00000000 --- a/benchmarks/floop13f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop13_F77(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), d(N), u - - DO i=1,N - x(i) = a(i)+b(i)+c(i)+d(i); y(i) = u+d(i); - END DO - RETURN - END - - - SUBROUTINE floop13_F77Overhead(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), d(N), u - RETURN - END diff --git a/benchmarks/floop13f90.f90 b/benchmarks/floop13f90.f90 deleted file mode 100644 index 0a690c8b..00000000 --- a/benchmarks/floop13f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop13_F90(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), d(N), u - - x = a+b+c+d; y = u+d - RETURN - END - - - SUBROUTINE floop13_F90Overhead(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), d(N), u - - RETURN - END diff --git a/benchmarks/floop14.cpp b/benchmarks/floop14.cpp deleted file mode 100644 index b7b81b80..00000000 --- a/benchmarks/floop14.cpp +++ /dev/null @@ -1,482 +0,0 @@ - -// floop14 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop14_f77 floop14_f77_ - #define floop14_f77overhead floop14_f77overhead_ - #define floop14_f90 floop14_f90_ - #define floop14_f90overhead floop14_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop14_f77 floop14_f77__ - #define floop14_f77overhead floop14_f77overhead__ - #define floop14_f90 floop14_f90__ - #define floop14_f90overhead floop14_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop14_f77 FLOOP14_F77 - #define floop14_f77overhead FLOOP14_F77OVERHEAD - #define floop14_f90 FLOOP14_F90 - #define floop14_f90overhead FLOOP14_F90OVERHEAD -#endif - -extern "C" { - void floop14_f77(const int& N, float* y, float* x, float* a, float* b); - void floop14_f77overhead(const int& N, float* y, float* x, float* a, float* b); - void floop14_f90(const int& N, float* y, float* x, float* a, float* b); - void floop14_f90overhead(const int& N, float* y, float* x, float* a, float* b); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop14: $x = $a+$b; $y = $a-$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 2 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop14.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i); y = a(tensor::i)-b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+4); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+4); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+4); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+4); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop14_f77(N, y, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop14_f77overhead(N, y, x, a, b); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop14_f90(N, y, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop14_f90overhead(N, y, x, a, b); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop14f.f b/benchmarks/floop14f.f deleted file mode 100644 index f68a1048..00000000 --- a/benchmarks/floop14f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop14_F77(N, y, x, a, b) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N) - - DO i=1,N - x(i) = a(i)+b(i); y(i) = a(i)-b(i); - END DO - RETURN - END - - - SUBROUTINE floop14_F77Overhead(N, y, x, a, b) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N) - RETURN - END diff --git a/benchmarks/floop14f90.f90 b/benchmarks/floop14f90.f90 deleted file mode 100644 index 88878168..00000000 --- a/benchmarks/floop14f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop14_F90(N, y, x, a, b) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N) - - x = a+b; y = a-b - RETURN - END - - - SUBROUTINE floop14_F90Overhead(N, y, x, a, b) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N) - - RETURN - END diff --git a/benchmarks/floop15.cpp b/benchmarks/floop15.cpp deleted file mode 100644 index fa1d1307..00000000 --- a/benchmarks/floop15.cpp +++ /dev/null @@ -1,482 +0,0 @@ - -// floop15 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop15_f77 floop15_f77_ - #define floop15_f77overhead floop15_f77overhead_ - #define floop15_f90 floop15_f90_ - #define floop15_f90overhead floop15_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop15_f77 floop15_f77__ - #define floop15_f77overhead floop15_f77overhead__ - #define floop15_f90 floop15_f90__ - #define floop15_f90overhead floop15_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop15_f77 FLOOP15_F77 - #define floop15_f77overhead FLOOP15_F77OVERHEAD - #define floop15_f90 FLOOP15_F90 - #define floop15_f90overhead FLOOP15_F90OVERHEAD -#endif - -extern "C" { - void floop15_f77(const int& N, float* x, float* a, float* b, float* c); - void floop15_f77overhead(const int& N, float* x, float* a, float* b, float* c); - void floop15_f90(const int& N, float* x, float* a, float* b, float* c); - void floop15_f90overhead(const int& N, float* x, float* a, float* b, float* c); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop15: $x = $c + $a*$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop15.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c(tensor::i) + a(tensor::i)*b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+4); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+4); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+4); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+4); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop15_f77(N, x, a, b, c); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop15_f77overhead(N, x, a, b, c); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop15_f90(N, x, a, b, c); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop15_f90overhead(N, x, a, b, c); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop15f.f b/benchmarks/floop15f.f deleted file mode 100644 index e01b6c10..00000000 --- a/benchmarks/floop15f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop15_F77(N, x, a, b, c) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N) - - DO i=1,N - x(i) = c(i) + a(i)*b(i); - END DO - RETURN - END - - - SUBROUTINE floop15_F77Overhead(N, x, a, b, c) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N) - RETURN - END diff --git a/benchmarks/floop15f90.f90 b/benchmarks/floop15f90.f90 deleted file mode 100644 index 91b3e201..00000000 --- a/benchmarks/floop15f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop15_F90(N, x, a, b, c) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N) - - x = c + a*b - RETURN - END - - - SUBROUTINE floop15_F90Overhead(N, x, a, b, c) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N) - - RETURN - END diff --git a/benchmarks/floop16.cpp b/benchmarks/floop16.cpp deleted file mode 100644 index d86927b0..00000000 --- a/benchmarks/floop16.cpp +++ /dev/null @@ -1,505 +0,0 @@ - -// floop16 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop16_f77 floop16_f77_ - #define floop16_f77overhead floop16_f77overhead_ - #define floop16_f90 floop16_f90_ - #define floop16_f90overhead floop16_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop16_f77 floop16_f77__ - #define floop16_f77overhead floop16_f77overhead__ - #define floop16_f90 floop16_f90__ - #define floop16_f90overhead floop16_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop16_f77 FLOOP16_F77 - #define floop16_f77overhead FLOOP16_F77OVERHEAD - #define floop16_f90 FLOOP16_F90 - #define floop16_f90overhead FLOOP16_F90OVERHEAD -#endif - -extern "C" { - void floop16_f77(const int& N, float* y, float* x, float* a, float* b, float* c, const float& u); - void floop16_f77overhead(const int& N, float* y, float* x, float* a, float* b, float* c, const float& u); - void floop16_f90(const int& N, float* y, float* x, float* a, float* b, float* c, const float& u); - void floop16_f90overhead(const int& N, float* y, float* x, float* a, float* b, float* c, const float& u); - -} - -void VectorVersion(BenchmarkExt& bench, float u); -void ArrayVersion(BenchmarkExt& bench, float u); -void ArrayVersion_unaligned(BenchmarkExt& bench, float u); -void ArrayVersion_misaligned(BenchmarkExt& bench, float u); -void ArrayVersion_index(BenchmarkExt& bench, float u); -void doTinyVectorVersion(BenchmarkExt& bench, float u); -void F77Version(BenchmarkExt& bench, float u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop16: $x = $a+$b+$c; $y = $x+$c+u", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop16.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i)+c(tensor::i); y = x(tensor::i)+c(tensor::i)+u;; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+5); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+5); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(4,N+4-1))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop16_f77(N, y, x, a, b, c, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop16_f77overhead(N, y, x, a, b, c, u); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop16_f90(N, y, x, a, b, c, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop16_f90overhead(N, y, x, a, b, c, u); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop16f.f b/benchmarks/floop16f.f deleted file mode 100644 index de0c0935..00000000 --- a/benchmarks/floop16f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop16_F77(N, y, x, a, b, c, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), u - - DO i=1,N - x(i) = a(i)+b(i)+c(i); y(i) = x(i)+c(i)+u; - END DO - RETURN - END - - - SUBROUTINE floop16_F77Overhead(N, y, x, a, b, c, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), u - RETURN - END diff --git a/benchmarks/floop16f90.f90 b/benchmarks/floop16f90.f90 deleted file mode 100644 index 5956a269..00000000 --- a/benchmarks/floop16f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop16_F90(N, y, x, a, b, c, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), u - - x = a+b+c; y = x+c+u - RETURN - END - - - SUBROUTINE floop16_F90Overhead(N, y, x, a, b, c, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), u - - RETURN - END diff --git a/benchmarks/floop17.cpp b/benchmarks/floop17.cpp deleted file mode 100644 index d70b156d..00000000 --- a/benchmarks/floop17.cpp +++ /dev/null @@ -1,504 +0,0 @@ - -// floop17 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop17_f77 floop17_f77_ - #define floop17_f77overhead floop17_f77overhead_ - #define floop17_f90 floop17_f90_ - #define floop17_f90overhead floop17_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop17_f77 floop17_f77__ - #define floop17_f77overhead floop17_f77overhead__ - #define floop17_f90 floop17_f90__ - #define floop17_f90overhead floop17_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop17_f77 FLOOP17_F77 - #define floop17_f77overhead FLOOP17_F77OVERHEAD - #define floop17_f90 FLOOP17_F90 - #define floop17_f90overhead FLOOP17_F90OVERHEAD -#endif - -extern "C" { - void floop17_f77(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop17_f77overhead(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop17_f90(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop17_f90overhead(const int& N, float* x, float* a, float* b, float* c, float* d); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop17: $x = ($a+$b)*($c+$d)", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop17.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a(tensor::i)+b(tensor::i))*(c(tensor::i)+d(tensor::i));; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+5); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop17_f77(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop17_f77overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop17_f90(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop17_f90overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop17f.f b/benchmarks/floop17f.f deleted file mode 100644 index 6dbd154b..00000000 --- a/benchmarks/floop17f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop17_F77(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - DO i=1,N - x(i) = (a(i)+b(i))*(c(i)+d(i)); - END DO - RETURN - END - - - SUBROUTINE floop17_F77Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - RETURN - END diff --git a/benchmarks/floop17f90.f90 b/benchmarks/floop17f90.f90 deleted file mode 100644 index 2a676fa4..00000000 --- a/benchmarks/floop17f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop17_F90(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - x = (a+b)*(c+d) - RETURN - END - - - SUBROUTINE floop17_F90Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - RETURN - END diff --git a/benchmarks/floop18.cpp b/benchmarks/floop18.cpp deleted file mode 100644 index 9e075c9d..00000000 --- a/benchmarks/floop18.cpp +++ /dev/null @@ -1,462 +0,0 @@ - -// floop18 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop18_f77 floop18_f77_ - #define floop18_f77overhead floop18_f77overhead_ - #define floop18_f90 floop18_f90_ - #define floop18_f90overhead floop18_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop18_f77 floop18_f77__ - #define floop18_f77overhead floop18_f77overhead__ - #define floop18_f90 floop18_f90__ - #define floop18_f90overhead floop18_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop18_f77 FLOOP18_F77 - #define floop18_f77overhead FLOOP18_F77OVERHEAD - #define floop18_f90 FLOOP18_F90 - #define floop18_f90overhead FLOOP18_F90OVERHEAD -#endif - -extern "C" { - void floop18_f77(const int& N, float* x, float* a, float* b, const float& u, const float& v); - void floop18_f77overhead(const int& N, float* x, float* a, float* b, const float& u, const float& v); - void floop18_f90(const int& N, float* x, float* a, float* b, const float& u, const float& v); - void floop18_f90overhead(const int& N, float* x, float* a, float* b, const float& u, const float& v); - -} - -void VectorVersion(BenchmarkExt& bench, float u, float v); -void ArrayVersion(BenchmarkExt& bench, float u, float v); -void ArrayVersion_unaligned(BenchmarkExt& bench, float u, float v); -void ArrayVersion_misaligned(BenchmarkExt& bench, float u, float v); -void ArrayVersion_index(BenchmarkExt& bench, float u, float v); -void doTinyVectorVersion(BenchmarkExt& bench, float u, float v); -void F77Version(BenchmarkExt& bench, float u, float v); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u, float v); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u, float v); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop18: $x = (u+$a)*(v+$b)", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float u = 0.39123982498157938742; - float v = 0.39123982498157938742; - - - ArrayVersion(bench, u, v); - ArrayVersion_unaligned(bench, u, v); - ArrayVersion_misaligned(bench, u, v); - ArrayVersion_index(bench, u, v); - //doTinyVectorVersion(bench, u, v); - F77Version(bench, u, v); -#ifdef FORTRAN_90 - F90Version(bench, u, v); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u, v); -#endif - - if(runvector) - VectorVersion(bench, u, v); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop18.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a(tensor::i))*(v+b(tensor::i));; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+3); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+3); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+3); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop18_f77(N, x, a, b, u, v); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop18_f77overhead(N, x, a, b, u, v); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop18_f90(N, x, a, b, u, v); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop18_f90overhead(N, x, a, b, u, v); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop18f.f b/benchmarks/floop18f.f deleted file mode 100644 index 7d24ebf0..00000000 --- a/benchmarks/floop18f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop18_F77(N, x, a, b, u, v) - INTEGER i, N - REAL*4 x(N), a(N), b(N), u, v - - DO i=1,N - x(i) = (u+a(i))*(v+b(i)); - END DO - RETURN - END - - - SUBROUTINE floop18_F77Overhead(N, x, a, b, u, v) - INTEGER i, N - REAL*4 x(N), a(N), b(N), u, v - RETURN - END diff --git a/benchmarks/floop18f90.f90 b/benchmarks/floop18f90.f90 deleted file mode 100644 index 1e758c40..00000000 --- a/benchmarks/floop18f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop18_F90(N, x, a, b, u, v) - INTEGER i, N - REAL*4 x(N), a(N), b(N), u, v - - x = (u+a)*(v+b) - RETURN - END - - - SUBROUTINE floop18_F90Overhead(N, x, a, b, u, v) - INTEGER i, N - REAL*4 x(N), a(N), b(N), u, v - - RETURN - END diff --git a/benchmarks/floop19.cpp b/benchmarks/floop19.cpp deleted file mode 100644 index 8a2461ec..00000000 --- a/benchmarks/floop19.cpp +++ /dev/null @@ -1,484 +0,0 @@ - -// floop19 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop19_f77 floop19_f77_ - #define floop19_f77overhead floop19_f77overhead_ - #define floop19_f90 floop19_f90_ - #define floop19_f90overhead floop19_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop19_f77 floop19_f77__ - #define floop19_f77overhead floop19_f77overhead__ - #define floop19_f90 floop19_f90__ - #define floop19_f90overhead floop19_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop19_f77 FLOOP19_F77 - #define floop19_f77overhead FLOOP19_F77OVERHEAD - #define floop19_f90 FLOOP19_F90 - #define floop19_f90overhead FLOOP19_F90OVERHEAD -#endif - -extern "C" { - void floop19_f77(const int& N, float* y, float* x, float* a, float* b, const float& u, const float& v); - void floop19_f77overhead(const int& N, float* y, float* x, float* a, float* b, const float& u, const float& v); - void floop19_f90(const int& N, float* y, float* x, float* a, float* b, const float& u, const float& v); - void floop19_f90overhead(const int& N, float* y, float* x, float* a, float* b, const float& u, const float& v); - -} - -void VectorVersion(BenchmarkExt& bench, float u, float v); -void ArrayVersion(BenchmarkExt& bench, float u, float v); -void ArrayVersion_unaligned(BenchmarkExt& bench, float u, float v); -void ArrayVersion_misaligned(BenchmarkExt& bench, float u, float v); -void ArrayVersion_index(BenchmarkExt& bench, float u, float v); -void doTinyVectorVersion(BenchmarkExt& bench, float u, float v); -void F77Version(BenchmarkExt& bench, float u, float v); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u, float v); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u, float v); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop19: $x = u*$a; $y = v*$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 2 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float u = 0.39123982498157938742; - float v = 0.39123982498157938742; - - - ArrayVersion(bench, u, v); - ArrayVersion_unaligned(bench, u, v); - ArrayVersion_misaligned(bench, u, v); - ArrayVersion_index(bench, u, v); - //doTinyVectorVersion(bench, u, v); - F77Version(bench, u, v); -#ifdef FORTRAN_90 - F90Version(bench, u, v); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u, v); -#endif - - if(runvector) - VectorVersion(bench, u, v); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop19.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a(tensor::i); y = v*b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+4); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+4); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+4); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+4); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop19_f77(N, y, x, a, b, u, v); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop19_f77overhead(N, y, x, a, b, u, v); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop19_f90(N, y, x, a, b, u, v); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop19_f90overhead(N, y, x, a, b, u, v); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop19f.f b/benchmarks/floop19f.f deleted file mode 100644 index 04207d2a..00000000 --- a/benchmarks/floop19f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop19_F77(N, y, x, a, b, u, v) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), u, v - - DO i=1,N - x(i) = u*a(i); y(i) = v*b(i); - END DO - RETURN - END - - - SUBROUTINE floop19_F77Overhead(N, y, x, a, b, u, v) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), u, v - RETURN - END diff --git a/benchmarks/floop19f90.f90 b/benchmarks/floop19f90.f90 deleted file mode 100644 index c7d26d3d..00000000 --- a/benchmarks/floop19f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop19_F90(N, y, x, a, b, u, v) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), u, v - - x = u*a; y = v*b - RETURN - END - - - SUBROUTINE floop19_F90Overhead(N, y, x, a, b, u, v) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), u, v - - RETURN - END diff --git a/benchmarks/floop1f.f b/benchmarks/floop1f.f deleted file mode 100644 index cece4298..00000000 --- a/benchmarks/floop1f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop1_F77(N, x, y) - INTEGER i, N - REAL*4 x(N), y(N) - - DO i=1,N - x(i) = sqrt(y(i)); - END DO - RETURN - END - - - SUBROUTINE floop1_F77Overhead(N, x, y) - INTEGER i, N - REAL*4 x(N), y(N) - RETURN - END diff --git a/benchmarks/floop1f90.f90 b/benchmarks/floop1f90.f90 deleted file mode 100644 index 8a37efbc..00000000 --- a/benchmarks/floop1f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop1_F90(N, x, y) - INTEGER i, N - REAL*4 x(N), y(N) - - x = sqrt(y) - RETURN - END - - - SUBROUTINE floop1_F90Overhead(N, x, y) - INTEGER i, N - REAL*4 x(N), y(N) - - RETURN - END diff --git a/benchmarks/floop2.cpp b/benchmarks/floop2.cpp deleted file mode 100644 index 36afc01d..00000000 --- a/benchmarks/floop2.cpp +++ /dev/null @@ -1,439 +0,0 @@ - -// floop2 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop2_f77 floop2_f77_ - #define floop2_f77overhead floop2_f77overhead_ - #define floop2_f90 floop2_f90_ - #define floop2_f90overhead floop2_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop2_f77 floop2_f77__ - #define floop2_f77overhead floop2_f77overhead__ - #define floop2_f90 floop2_f90__ - #define floop2_f90overhead floop2_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop2_f77 FLOOP2_F77 - #define floop2_f77overhead FLOOP2_F77OVERHEAD - #define floop2_f90 FLOOP2_F90 - #define floop2_f90overhead FLOOP2_F90OVERHEAD -#endif - -extern "C" { - void floop2_f77(const int& N, float* x, float* y, const float& u); - void floop2_f77overhead(const int& N, float* x, float* y, const float& u); - void floop2_f90(const int& N, float* x, float* y, const float& u); - void floop2_f90overhead(const int& N, float* x, float* y, const float& u); - -} - -void VectorVersion(BenchmarkExt& bench, float u); -void ArrayVersion(BenchmarkExt& bench, float u); -void ArrayVersion_unaligned(BenchmarkExt& bench, float u); -void ArrayVersion_misaligned(BenchmarkExt& bench, float u); -void ArrayVersion_index(BenchmarkExt& bench, float u); -void doTinyVectorVersion(BenchmarkExt& bench, float u); -void F77Version(BenchmarkExt& bench, float u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop2: $x = $y/u", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop2.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y(tensor::i)/u;; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+2); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+2); - Array y(yfill(Range(1,N+1-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop2_f77(N, x, y, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop2_f77overhead(N, x, y, u); - - bench.stopOverhead(); - - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop2_f90(N, x, y, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop2_f90overhead(N, x, y, u); - - bench.stopOverhead(); - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop21.cpp b/benchmarks/floop21.cpp deleted file mode 100644 index d91b5bba..00000000 --- a/benchmarks/floop21.cpp +++ /dev/null @@ -1,504 +0,0 @@ - -// floop21 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop21_f77 floop21_f77_ - #define floop21_f77overhead floop21_f77overhead_ - #define floop21_f90 floop21_f90_ - #define floop21_f90overhead floop21_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop21_f77 floop21_f77__ - #define floop21_f77overhead floop21_f77overhead__ - #define floop21_f90 floop21_f90__ - #define floop21_f90overhead floop21_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop21_f77 FLOOP21_F77 - #define floop21_f77overhead FLOOP21_F77OVERHEAD - #define floop21_f90 FLOOP21_F90 - #define floop21_f90overhead FLOOP21_F90OVERHEAD -#endif - -extern "C" { - void floop21_f77(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop21_f77overhead(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop21_f90(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop21_f90overhead(const int& N, float* x, float* a, float* b, float* c, float* d); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop21: $x = $a*$b + $c*$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop21.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)*b(tensor::i) + c(tensor::i)*d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+5); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop21_f77(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop21_f77overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop21_f90(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop21_f90overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop21f.f b/benchmarks/floop21f.f deleted file mode 100644 index 00c1164d..00000000 --- a/benchmarks/floop21f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop21_F77(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - DO i=1,N - x(i) = a(i)*b(i) + c(i)*d(i); - END DO - RETURN - END - - - SUBROUTINE floop21_F77Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - RETURN - END diff --git a/benchmarks/floop21f90.f90 b/benchmarks/floop21f90.f90 deleted file mode 100644 index 78a37e70..00000000 --- a/benchmarks/floop21f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop21_F90(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - x = a*b + c*d - RETURN - END - - - SUBROUTINE floop21_F90Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - RETURN - END diff --git a/benchmarks/floop22.cpp b/benchmarks/floop22.cpp deleted file mode 100644 index e0b94b51..00000000 --- a/benchmarks/floop22.cpp +++ /dev/null @@ -1,504 +0,0 @@ - -// floop22 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop22_f77 floop22_f77_ - #define floop22_f77overhead floop22_f77overhead_ - #define floop22_f90 floop22_f90_ - #define floop22_f90overhead floop22_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop22_f77 floop22_f77__ - #define floop22_f77overhead floop22_f77overhead__ - #define floop22_f90 floop22_f90__ - #define floop22_f90overhead floop22_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop22_f77 FLOOP22_F77 - #define floop22_f77overhead FLOOP22_F77OVERHEAD - #define floop22_f90 FLOOP22_F90 - #define floop22_f90overhead FLOOP22_F90OVERHEAD -#endif - -extern "C" { - void floop22_f77(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop22_f77overhead(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop22_f90(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop22_f90overhead(const int& N, float* x, float* a, float* b, float* c, float* d); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop22: $x = $x + $a*$b + $c*$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop22.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x(tensor::i) + a(tensor::i)*b(tensor::i) + c(tensor::i)*d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+5); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop22_f77(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop22_f77overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop22_f90(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop22_f90overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop22f.f b/benchmarks/floop22f.f deleted file mode 100644 index c1548f4c..00000000 --- a/benchmarks/floop22f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop22_F77(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - DO i=1,N - x(i) = x(i) + a(i)*b(i) + c(i)*d(i); - END DO - RETURN - END - - - SUBROUTINE floop22_F77Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - RETURN - END diff --git a/benchmarks/floop22f90.f90 b/benchmarks/floop22f90.f90 deleted file mode 100644 index cd47a44b..00000000 --- a/benchmarks/floop22f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop22_F90(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - x = x + a*b + c*d - RETURN - END - - - SUBROUTINE floop22_F90Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - RETURN - END diff --git a/benchmarks/floop23.cpp b/benchmarks/floop23.cpp deleted file mode 100644 index 7aab79df..00000000 --- a/benchmarks/floop23.cpp +++ /dev/null @@ -1,526 +0,0 @@ - -// floop23 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop23_f77 floop23_f77_ - #define floop23_f77overhead floop23_f77overhead_ - #define floop23_f90 floop23_f90_ - #define floop23_f90overhead floop23_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop23_f77 floop23_f77__ - #define floop23_f77overhead floop23_f77overhead__ - #define floop23_f90 floop23_f90__ - #define floop23_f90overhead floop23_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop23_f77 FLOOP23_F77 - #define floop23_f77overhead FLOOP23_F77OVERHEAD - #define floop23_f90 FLOOP23_F90 - #define floop23_f90overhead FLOOP23_F90OVERHEAD -#endif - -extern "C" { - void floop23_f77(const int& N, float* x, float* a, float* b, float* c, float* d, float* y); - void floop23_f77overhead(const int& N, float* x, float* a, float* b, float* c, float* d, float* y); - void floop23_f90(const int& N, float* x, float* a, float* b, float* c, float* d, float* y); - void floop23_f90overhead(const int& N, float* x, float* a, float* b, float* c, float* d, float* y); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop23: $x = $a*$b + $c*$d; $y = $b+$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop23.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)*b(tensor::i) + c(tensor::i)*d(tensor::i); y = b(tensor::i)+d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+6); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+6); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+6); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+6); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+6); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - Array yfill(N+6); - Array y(yfill(Range(5,N+5-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop23_f77(N, x, a, b, c, d, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop23_f77overhead(N, x, a, b, c, d, y); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop23_f90(N, x, a, b, c, d, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop23_f90overhead(N, x, a, b, c, d, y); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop23f.f b/benchmarks/floop23f.f deleted file mode 100644 index 3990cba0..00000000 --- a/benchmarks/floop23f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop23_F77(N, x, a, b, c, d, y) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N), y(N) - - DO i=1,N - x(i) = a(i)*b(i) + c(i)*d(i); y(i) = b(i)+d(i); - END DO - RETURN - END - - - SUBROUTINE floop23_F77Overhead(N, x, a, b, c, d, y) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N), y(N) - RETURN - END diff --git a/benchmarks/floop23f90.f90 b/benchmarks/floop23f90.f90 deleted file mode 100644 index af620d32..00000000 --- a/benchmarks/floop23f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop23_F90(N, x, a, b, c, d, y) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N), y(N) - - x = a*b + c*d; y = b+d - RETURN - END - - - SUBROUTINE floop23_F90Overhead(N, x, a, b, c, d, y) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N), y(N) - - RETURN - END diff --git a/benchmarks/floop24.cpp b/benchmarks/floop24.cpp deleted file mode 100644 index da16973c..00000000 --- a/benchmarks/floop24.cpp +++ /dev/null @@ -1,526 +0,0 @@ - -// floop24 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop24_f77 floop24_f77_ - #define floop24_f77overhead floop24_f77overhead_ - #define floop24_f90 floop24_f90_ - #define floop24_f90overhead floop24_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop24_f77 floop24_f77__ - #define floop24_f77overhead floop24_f77overhead__ - #define floop24_f90 floop24_f90__ - #define floop24_f90overhead floop24_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop24_f77 FLOOP24_F77 - #define floop24_f77overhead FLOOP24_F77OVERHEAD - #define floop24_f90 FLOOP24_F90 - #define floop24_f90overhead FLOOP24_F90OVERHEAD -#endif - -extern "C" { - void floop24_f77(const int& N, float* x, float* a, float* b, float* c, float* d, float* y); - void floop24_f77overhead(const int& N, float* x, float* a, float* b, float* c, float* d, float* y); - void floop24_f90(const int& N, float* x, float* a, float* b, float* c, float* d, float* y); - void floop24_f90overhead(const int& N, float* x, float* a, float* b, float* c, float* d, float* y); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop24: $x = $a*$c - $b*$c; $y = $a*$d + $b+$c", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 6 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop24.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)*c(tensor::i) - b(tensor::i)*c(tensor::i); y = a(tensor::i)*d(tensor::i) + b(tensor::i)+c(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+6); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+6); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+6); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+6); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+6); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - Array yfill(N+6); - Array y(yfill(Range(5,N+5-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop24_f77(N, x, a, b, c, d, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop24_f77overhead(N, x, a, b, c, d, y); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop24_f90(N, x, a, b, c, d, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop24_f90overhead(N, x, a, b, c, d, y); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop24f.f b/benchmarks/floop24f.f deleted file mode 100644 index e0985ba3..00000000 --- a/benchmarks/floop24f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop24_F77(N, x, a, b, c, d, y) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N), y(N) - - DO i=1,N - x(i) = a(i)*c(i) - b(i)*c(i); y(i) = a(i)*d(i) + b(i)+c(i); - END DO - RETURN - END - - - SUBROUTINE floop24_F77Overhead(N, x, a, b, c, d, y) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N), y(N) - RETURN - END diff --git a/benchmarks/floop24f90.f90 b/benchmarks/floop24f90.f90 deleted file mode 100644 index 84e3451a..00000000 --- a/benchmarks/floop24f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop24_F90(N, x, a, b, c, d, y) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N), y(N) - - x = a*c - b*c; y = a*d + b+c - RETURN - END - - - SUBROUTINE floop24_F90Overhead(N, x, a, b, c, d, y) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N), y(N) - - RETURN - END diff --git a/benchmarks/floop25.cpp b/benchmarks/floop25.cpp deleted file mode 100644 index 84011208..00000000 --- a/benchmarks/floop25.cpp +++ /dev/null @@ -1,507 +0,0 @@ - -// floop25 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop25_f77 floop25_f77_ - #define floop25_f77overhead floop25_f77overhead_ - #define floop25_f90 floop25_f90_ - #define floop25_f90overhead floop25_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop25_f77 floop25_f77__ - #define floop25_f77overhead floop25_f77overhead__ - #define floop25_f90 floop25_f90__ - #define floop25_f90overhead floop25_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop25_f77 FLOOP25_F77 - #define floop25_f77overhead FLOOP25_F77OVERHEAD - #define floop25_f90 FLOOP25_F90 - #define floop25_f90overhead FLOOP25_F90OVERHEAD -#endif - -extern "C" { - void floop25_f77(const int& N, float* x, float* a, float* b, float* c, float* y, const float& u, const float& v, const float& w); - void floop25_f77overhead(const int& N, float* x, float* a, float* b, float* c, float* y, const float& u, const float& v, const float& w); - void floop25_f90(const int& N, float* x, float* a, float* b, float* c, float* y, const float& u, const float& v, const float& w); - void floop25_f90overhead(const int& N, float* x, float* a, float* b, float* c, float* y, const float& u, const float& v, const float& w); - -} - -void VectorVersion(BenchmarkExt& bench, float u, float v, float w); -void ArrayVersion(BenchmarkExt& bench, float u, float v, float w); -void ArrayVersion_unaligned(BenchmarkExt& bench, float u, float v, float w); -void ArrayVersion_misaligned(BenchmarkExt& bench, float u, float v, float w); -void ArrayVersion_index(BenchmarkExt& bench, float u, float v, float w); -void doTinyVectorVersion(BenchmarkExt& bench, float u, float v, float w); -void F77Version(BenchmarkExt& bench, float u, float v, float w); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u, float v, float w); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u, float v, float w); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop25: $x = u*$b; $y = v*$b + w*$a + u*$c", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 6 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float u = 0.39123982498157938742; - float v = 0.39123982498157938742; - float w = 0.39123982498157938742; - - - ArrayVersion(bench, u, v, w); - ArrayVersion_unaligned(bench, u, v, w); - ArrayVersion_misaligned(bench, u, v, w); - ArrayVersion_index(bench, u, v, w); - //doTinyVectorVersion(bench, u, v, w); - F77Version(bench, u, v, w); -#ifdef FORTRAN_90 - F90Version(bench, u, v, w); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u, v, w); -#endif - - if(runvector) - VectorVersion(bench, u, v, w); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop25.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float u, float v, float w) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float u, float v, float w) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float u, float v, float w) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b(tensor::i); y = v*b(tensor::i) + w*a(tensor::i) + u*c(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float u, float v, float w) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float u, float v, float w) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array yfill(N+5); - Array y(yfill(Range(4,N+4-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u, float v, float w) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float u, float v, float w) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop25_f77(N, x, a, b, c, y, u, v, w); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop25_f77overhead(N, x, a, b, c, y, u, v, w); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u, float v, float w) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop25_f90(N, x, a, b, c, y, u, v, w); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop25_f90overhead(N, x, a, b, c, y, u, v, w); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop25f.f b/benchmarks/floop25f.f deleted file mode 100644 index 9f007c6d..00000000 --- a/benchmarks/floop25f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop25_F77(N, x, a, b, c, y, u, v, w) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), y(N), u, v, w - - DO i=1,N - x(i) = u*b(i); y(i) = v*b(i) + w*a(i) + u*c(i); - END DO - RETURN - END - - - SUBROUTINE floop25_F77Overhead(N, x, a, b, c, y, u, v, w) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), y(N), u, v, w - RETURN - END diff --git a/benchmarks/floop25f90.f90 b/benchmarks/floop25f90.f90 deleted file mode 100644 index 33a45013..00000000 --- a/benchmarks/floop25f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop25_F90(N, x, a, b, c, y, u, v, w) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), y(N), u, v, w - - x = u*b; y = v*b + w*a + u*c - RETURN - END - - - SUBROUTINE floop25_F90Overhead(N, x, a, b, c, y, u, v, w) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), y(N), u, v, w - - RETURN - END diff --git a/benchmarks/floop2f.f b/benchmarks/floop2f.f deleted file mode 100644 index e993794d..00000000 --- a/benchmarks/floop2f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop2_F77(N, x, y, u) - INTEGER i, N - REAL*4 x(N), y(N), u - - DO i=1,N - x(i) = y(i)/u; - END DO - RETURN - END - - - SUBROUTINE floop2_F77Overhead(N, x, y, u) - INTEGER i, N - REAL*4 x(N), y(N), u - RETURN - END diff --git a/benchmarks/floop2f90.f90 b/benchmarks/floop2f90.f90 deleted file mode 100644 index 4bccc903..00000000 --- a/benchmarks/floop2f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop2_F90(N, x, y, u) - INTEGER i, N - REAL*4 x(N), y(N), u - - x = y/u - RETURN - END - - - SUBROUTINE floop2_F90Overhead(N, x, y, u) - INTEGER i, N - REAL*4 x(N), y(N), u - - RETURN - END diff --git a/benchmarks/floop3.cpp b/benchmarks/floop3.cpp deleted file mode 100644 index 95ec16c0..00000000 --- a/benchmarks/floop3.cpp +++ /dev/null @@ -1,439 +0,0 @@ - -// floop3 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop3_f77 floop3_f77_ - #define floop3_f77overhead floop3_f77overhead_ - #define floop3_f90 floop3_f90_ - #define floop3_f90overhead floop3_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop3_f77 floop3_f77__ - #define floop3_f77overhead floop3_f77overhead__ - #define floop3_f90 floop3_f90__ - #define floop3_f90overhead floop3_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop3_f77 FLOOP3_F77 - #define floop3_f77overhead FLOOP3_F77OVERHEAD - #define floop3_f90 FLOOP3_F90 - #define floop3_f90overhead FLOOP3_F90OVERHEAD -#endif - -extern "C" { - void floop3_f77(const int& N, float* x, float* y, const float& a); - void floop3_f77overhead(const int& N, float* x, float* y, const float& a); - void floop3_f90(const int& N, float* x, float* y, const float& a); - void floop3_f90overhead(const int& N, float* x, float* y, const float& a); - -} - -void VectorVersion(BenchmarkExt& bench, float a); -void ArrayVersion(BenchmarkExt& bench, float a); -void ArrayVersion_unaligned(BenchmarkExt& bench, float a); -void ArrayVersion_misaligned(BenchmarkExt& bench, float a); -void ArrayVersion_index(BenchmarkExt& bench, float a); -void doTinyVectorVersion(BenchmarkExt& bench, float a); -void F77Version(BenchmarkExt& bench, float a); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float a); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float a); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop3: $y = $y + a*$x", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 2 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float a = 0.39123982498157938742; - - - ArrayVersion(bench, a); - ArrayVersion_unaligned(bench, a); - ArrayVersion_misaligned(bench, a); - ArrayVersion_index(bench, a); - //doTinyVectorVersion(bench, a); - F77Version(bench, a); -#ifdef FORTRAN_90 - F90Version(bench, a); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, a); -#endif - - if(runvector) - VectorVersion(bench, a); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop3.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float a) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float a) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float a) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y(tensor::i) + a*x(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float a) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float a) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+2); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+2); - Array y(yfill(Range(1,N+1-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float a) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float a) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop3_f77(N, x, y, a); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop3_f77overhead(N, x, y, a); - - bench.stopOverhead(); - - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float a) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop3_f90(N, x, y, a); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop3_f90overhead(N, x, y, a); - - bench.stopOverhead(); - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop36.cpp b/benchmarks/floop36.cpp deleted file mode 100644 index 63e0b584..00000000 --- a/benchmarks/floop36.cpp +++ /dev/null @@ -1,438 +0,0 @@ - -// floop36 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop36_f77 floop36_f77_ - #define floop36_f77overhead floop36_f77overhead_ - #define floop36_f90 floop36_f90_ - #define floop36_f90overhead floop36_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop36_f77 floop36_f77__ - #define floop36_f77overhead floop36_f77overhead__ - #define floop36_f90 floop36_f90__ - #define floop36_f90overhead floop36_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop36_f77 FLOOP36_F77 - #define floop36_f77overhead FLOOP36_F77OVERHEAD - #define floop36_f90 FLOOP36_F90 - #define floop36_f90overhead FLOOP36_F90OVERHEAD -#endif - -extern "C" { - void floop36_f77(const int& N, float* x, float* e); - void floop36_f77overhead(const int& N, float* x, float* e); - void floop36_f90(const int& N, float* x, float* e); - void floop36_f90overhead(const int& N, float* x, float* e); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop36: $x = exp($e)", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop36.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector e(N); - initializeRandomDouble(e.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array e(N); - initializeRandomDouble(e.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array e(N); - initializeRandomDouble(e.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e(tensor::i));; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array efill(N+1); - Array e(efill(Range(1,N))); - initializeRandomDouble(e.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+2); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array efill(N+2); - Array e(efill(Range(1,N+1-1))); - initializeRandomDouble(e.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray e(N); - initializeRandomDouble(e, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* e = new float[N]; - initializeRandomDouble(e, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop36_f77(N, x, e); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop36_f77overhead(N, x, e); - - bench.stopOverhead(); - - delete [] x; - delete [] e; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* e = new float[N]; - initializeRandomDouble(e, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop36_f90(N, x, e); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop36_f90overhead(N, x, e); - - bench.stopOverhead(); - delete [] x; - delete [] e; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop36f.f b/benchmarks/floop36f.f deleted file mode 100644 index 5850e759..00000000 --- a/benchmarks/floop36f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop36_F77(N, x, e) - INTEGER i, N - REAL*4 x(N), e(N) - - DO i=1,N - x(i) = exp(e(i)); - END DO - RETURN - END - - - SUBROUTINE floop36_F77Overhead(N, x, e) - INTEGER i, N - REAL*4 x(N), e(N) - RETURN - END diff --git a/benchmarks/floop36f90.f90 b/benchmarks/floop36f90.f90 deleted file mode 100644 index ba4b5586..00000000 --- a/benchmarks/floop36f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop36_F90(N, x, e) - INTEGER i, N - REAL*4 x(N), e(N) - - x = exp(e) - RETURN - END - - - SUBROUTINE floop36_F90Overhead(N, x, e) - INTEGER i, N - REAL*4 x(N), e(N) - - RETURN - END diff --git a/benchmarks/floop3f.f b/benchmarks/floop3f.f deleted file mode 100644 index 22b53534..00000000 --- a/benchmarks/floop3f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop3_F77(N, x, y, a) - INTEGER i, N - REAL*4 x(N), y(N), a - - DO i=1,N - y(i) = y(i) + a*x(i); - END DO - RETURN - END - - - SUBROUTINE floop3_F77Overhead(N, x, y, a) - INTEGER i, N - REAL*4 x(N), y(N), a - RETURN - END diff --git a/benchmarks/floop3f90.f90 b/benchmarks/floop3f90.f90 deleted file mode 100644 index 17c976a4..00000000 --- a/benchmarks/floop3f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop3_F90(N, x, y, a) - INTEGER i, N - REAL*4 x(N), y(N), a - - y = y + a*x - RETURN - END - - - SUBROUTINE floop3_F90Overhead(N, x, y, a) - INTEGER i, N - REAL*4 x(N), y(N), a - - RETURN - END diff --git a/benchmarks/floop5.cpp b/benchmarks/floop5.cpp deleted file mode 100644 index 23e69c6e..00000000 --- a/benchmarks/floop5.cpp +++ /dev/null @@ -1,460 +0,0 @@ - -// floop5 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop5_f77 floop5_f77_ - #define floop5_f77overhead floop5_f77overhead_ - #define floop5_f90 floop5_f90_ - #define floop5_f90overhead floop5_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop5_f77 floop5_f77__ - #define floop5_f77overhead floop5_f77overhead__ - #define floop5_f90 floop5_f90__ - #define floop5_f90overhead floop5_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop5_f77 FLOOP5_F77 - #define floop5_f77overhead FLOOP5_F77OVERHEAD - #define floop5_f90 FLOOP5_F90 - #define floop5_f90overhead FLOOP5_F90OVERHEAD -#endif - -extern "C" { - void floop5_f77(const int& N, float* x, float* a, float* b); - void floop5_f77overhead(const int& N, float* x, float* a, float* b); - void floop5_f90(const int& N, float* x, float* a, float* b); - void floop5_f90overhead(const int& N, float* x, float* a, float* b); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop5: $x = $a+$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop5.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+3); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+3); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+3); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop5_f77(N, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop5_f77overhead(N, x, a, b); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop5_f90(N, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop5_f90overhead(N, x, a, b); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop5f.f b/benchmarks/floop5f.f deleted file mode 100644 index fca4a10a..00000000 --- a/benchmarks/floop5f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop5_F77(N, x, a, b) - INTEGER i, N - REAL*4 x(N), a(N), b(N) - - DO i=1,N - x(i) = a(i)+b(i); - END DO - RETURN - END - - - SUBROUTINE floop5_F77Overhead(N, x, a, b) - INTEGER i, N - REAL*4 x(N), a(N), b(N) - RETURN - END diff --git a/benchmarks/floop5f90.f90 b/benchmarks/floop5f90.f90 deleted file mode 100644 index f93915c7..00000000 --- a/benchmarks/floop5f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop5_F90(N, x, a, b) - INTEGER i, N - REAL*4 x(N), a(N), b(N) - - x = a+b - RETURN - END - - - SUBROUTINE floop5_F90Overhead(N, x, a, b) - INTEGER i, N - REAL*4 x(N), a(N), b(N) - - RETURN - END diff --git a/benchmarks/floop6.cpp b/benchmarks/floop6.cpp deleted file mode 100644 index 59710fe2..00000000 --- a/benchmarks/floop6.cpp +++ /dev/null @@ -1,460 +0,0 @@ - -// floop6 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop6_f77 floop6_f77_ - #define floop6_f77overhead floop6_f77overhead_ - #define floop6_f90 floop6_f90_ - #define floop6_f90overhead floop6_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop6_f77 floop6_f77__ - #define floop6_f77overhead floop6_f77overhead__ - #define floop6_f90 floop6_f90__ - #define floop6_f90overhead floop6_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop6_f77 FLOOP6_F77 - #define floop6_f77overhead FLOOP6_F77OVERHEAD - #define floop6_f90 FLOOP6_F90 - #define floop6_f90overhead FLOOP6_F90OVERHEAD -#endif - -extern "C" { - void floop6_f77(const int& N, float* x, float* a, float* b); - void floop6_f77overhead(const int& N, float* x, float* a, float* b); - void floop6_f90(const int& N, float* x, float* a, float* b); - void floop6_f90overhead(const int& N, float* x, float* a, float* b); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop6: $x = $a*$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop6.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)*b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+3); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+3); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+3); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop6_f77(N, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop6_f77overhead(N, x, a, b); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop6_f90(N, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop6_f90overhead(N, x, a, b); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop6f.f b/benchmarks/floop6f.f deleted file mode 100644 index 44e34043..00000000 --- a/benchmarks/floop6f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop6_F77(N, x, a, b) - INTEGER i, N - REAL*4 x(N), a(N), b(N) - - DO i=1,N - x(i) = a(i)*b(i); - END DO - RETURN - END - - - SUBROUTINE floop6_F77Overhead(N, x, a, b) - INTEGER i, N - REAL*4 x(N), a(N), b(N) - RETURN - END diff --git a/benchmarks/floop6f90.f90 b/benchmarks/floop6f90.f90 deleted file mode 100644 index 12bb2e5b..00000000 --- a/benchmarks/floop6f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop6_F90(N, x, a, b) - INTEGER i, N - REAL*4 x(N), a(N), b(N) - - x = a*b - RETURN - END - - - SUBROUTINE floop6_F90Overhead(N, x, a, b) - INTEGER i, N - REAL*4 x(N), a(N), b(N) - - RETURN - END diff --git a/benchmarks/floop8.cpp b/benchmarks/floop8.cpp deleted file mode 100644 index afc2e678..00000000 --- a/benchmarks/floop8.cpp +++ /dev/null @@ -1,439 +0,0 @@ - -// floop8 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop8_f77 floop8_f77_ - #define floop8_f77overhead floop8_f77overhead_ - #define floop8_f90 floop8_f90_ - #define floop8_f90overhead floop8_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop8_f77 floop8_f77__ - #define floop8_f77overhead floop8_f77overhead__ - #define floop8_f90 floop8_f90__ - #define floop8_f90overhead floop8_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop8_f77 FLOOP8_F77 - #define floop8_f77overhead FLOOP8_F77OVERHEAD - #define floop8_f90 FLOOP8_F90 - #define floop8_f90overhead FLOOP8_F90OVERHEAD -#endif - -extern "C" { - void floop8_f77(const int& N, float* a, float* x, const float& u); - void floop8_f77overhead(const int& N, float* a, float* x, const float& u); - void floop8_f90(const int& N, float* a, float* x, const float& u); - void floop8_f90overhead(const int& N, float* a, float* x, const float& u); - -} - -void VectorVersion(BenchmarkExt& bench, float u); -void ArrayVersion(BenchmarkExt& bench, float u); -void ArrayVersion_unaligned(BenchmarkExt& bench, float u); -void ArrayVersion_misaligned(BenchmarkExt& bench, float u); -void ArrayVersion_index(BenchmarkExt& bench, float u); -void doTinyVectorVersion(BenchmarkExt& bench, float u); -void F77Version(BenchmarkExt& bench, float u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop8: $x = u/$a", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop8.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+2); - Array a(afill(Range(0,N+0-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array xfill(N+2); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray a(N); - initializeRandomDouble(a, N); - valarray x(N); - initializeRandomDouble(x, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* a = new float[N]; - initializeRandomDouble(a, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop8_f77(N, a, x, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop8_f77overhead(N, a, x, u); - - bench.stopOverhead(); - - delete [] a; - delete [] x; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* a = new float[N]; - initializeRandomDouble(a, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop8_f90(N, a, x, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop8_f90overhead(N, a, x, u); - - bench.stopOverhead(); - delete [] a; - delete [] x; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop8f.f b/benchmarks/floop8f.f deleted file mode 100644 index 709e3dc6..00000000 --- a/benchmarks/floop8f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop8_F77(N, a, x, u) - INTEGER i, N - REAL*4 a(N), x(N), u - - DO i=1,N - x(i) = u/a(i); - END DO - RETURN - END - - - SUBROUTINE floop8_F77Overhead(N, a, x, u) - INTEGER i, N - REAL*4 a(N), x(N), u - RETURN - END diff --git a/benchmarks/floop8f90.f90 b/benchmarks/floop8f90.f90 deleted file mode 100644 index 12467c9f..00000000 --- a/benchmarks/floop8f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop8_F90(N, a, x, u) - INTEGER i, N - REAL*4 a(N), x(N), u - - x = u/a - RETURN - END - - - SUBROUTINE floop8_F90Overhead(N, a, x, u) - INTEGER i, N - REAL*4 a(N), x(N), u - - RETURN - END diff --git a/benchmarks/floop9.cpp b/benchmarks/floop9.cpp deleted file mode 100644 index 3b8d95f3..00000000 --- a/benchmarks/floop9.cpp +++ /dev/null @@ -1,438 +0,0 @@ - -// floop9 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop9_f77 floop9_f77_ - #define floop9_f77overhead floop9_f77overhead_ - #define floop9_f90 floop9_f90_ - #define floop9_f90overhead floop9_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop9_f77 floop9_f77__ - #define floop9_f77overhead floop9_f77overhead__ - #define floop9_f90 floop9_f90__ - #define floop9_f90overhead floop9_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop9_f77 FLOOP9_F77 - #define floop9_f77overhead FLOOP9_F77OVERHEAD - #define floop9_f90 FLOOP9_F90 - #define floop9_f90overhead FLOOP9_F90OVERHEAD -#endif - -extern "C" { - void floop9_f77(const int& N, float* a, float* x); - void floop9_f77overhead(const int& N, float* a, float* x); - void floop9_f90(const int& N, float* a, float* x); - void floop9_f90overhead(const int& N, float* a, float* x); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop9: $x = $x+$a", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop9.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x(tensor::i)+a(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+2); - Array a(afill(Range(0,N+0-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array xfill(N+2); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray a(N); - initializeRandomDouble(a, N); - valarray x(N); - initializeRandomDouble(x, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* a = new float[N]; - initializeRandomDouble(a, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop9_f77(N, a, x); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop9_f77overhead(N, a, x); - - bench.stopOverhead(); - - delete [] a; - delete [] x; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* a = new float[N]; - initializeRandomDouble(a, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop9_f90(N, a, x); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop9_f90overhead(N, a, x); - - bench.stopOverhead(); - delete [] a; - delete [] x; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop9f.f b/benchmarks/floop9f.f deleted file mode 100644 index c6f00d9f..00000000 --- a/benchmarks/floop9f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop9_F77(N, a, x) - INTEGER i, N - REAL*4 a(N), x(N) - - DO i=1,N - x(i) = x(i)+a(i); - END DO - RETURN - END - - - SUBROUTINE floop9_F77Overhead(N, a, x) - INTEGER i, N - REAL*4 a(N), x(N) - RETURN - END diff --git a/benchmarks/floop9f90.f90 b/benchmarks/floop9f90.f90 deleted file mode 100644 index 8055aa94..00000000 --- a/benchmarks/floop9f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop9_F90(N, a, x) - INTEGER i, N - REAL*4 a(N), x(N) - - x = x+a - RETURN - END - - - SUBROUTINE floop9_F90Overhead(N, a, x) - INTEGER i, N - REAL*4 a(N), x(N) - - RETURN - END diff --git a/benchmarks/frek.m b/benchmarks/frek.m deleted file mode 100644 index efe9885e..00000000 --- a/benchmarks/frek.m +++ /dev/null @@ -1,7 +0,0 @@ -function frek(Mf) - A = zeros(1,4); - A = max(Mf); - A./A(1,3) - A = Mf(19,:); - A./A(1,3) - diff --git a/benchmarks/haney.cpp b/benchmarks/haney.cpp deleted file mode 100644 index 9226272a..00000000 --- a/benchmarks/haney.cpp +++ /dev/null @@ -1,207 +0,0 @@ -// Haney's induction calculation benchmark. -// -// See: Scott W. Haney, Is C++ Fast Enough for Scientific Computing? -// Computers in Physics Vol. 8 No. 6 (1994), p. 690 -// -// Arch D. Robison, C++ Gets Faster for Scientific Computing, -// Computers in Physics Vol. 10 No. 5 (1996), p. 458 -// - -#include -#include -#include -#ifdef BZ_HAVE_STD -#include -#else -#include -#endif - -using namespace blitz; - -#ifndef M_PI - #define M_PI 3.14159265358979323846 -#endif - -#ifdef BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES -#define vecopsf vecopsf_ -#define vecopsfo vecopsfo_ -#endif - -extern "C" -{ - void vecopsf(float *li, const float *R, const float *w, const int &N, - const int& iters); - void vecopsfo(float *li, const float *R, const float *w, const int &N, - const int& iters); -} - -inline float sqr(float x) -{ - return x*x; -} - -const float Mu0 = 4.0 * M_PI * 1.0e-7; - -void HaneyCVersion(BenchmarkExt& bench); -void HaneyFortranVersion(BenchmarkExt& bench); -void HaneyBlitzVersion(BenchmarkExt& bench); - -int main() -{ - BenchmarkExt bench("Haney Inductance Calculation", 3); - - bench.setRateDescription("Operations/s"); - - bench.beginBenchmarking(); - - HaneyCVersion(bench); - HaneyFortranVersion(bench); - HaneyBlitzVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("haney.m"); - - return 0; -} - -void initializeRandom(float* data, int length) -{ - Random unif(1.0, 2.0); - for (int i=0; i < length; ++i) - data[i] = unif.random(); -} - -void HaneyCVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Inlined C"); - - while (!bench.doneImplementationBenchmark()) { - int length = bench.getParameter(); - long iters = bench.getIterations(); - - cout << "length = " << length << " iters = " << iters << endl; - - float* li = new float[length]; - float* R = new float[length]; - float* w = new float[length]; - - initializeRandom(li, length); - initializeRandom(R, length); - initializeRandom(w, length); - - // Tickle the cache - for (int i=0; i < length; ++i) - li[i] = R[i] + log(w[i]); - - bench.start(); - - for (long j=0; j < iters; ++j) { - for (int i=0; i < length; ++i) { - li[i] = Mu0 * R[i] * - (0.5 * (1.0 + (1.0/24.0) - * sqr(w[i]/R[i])) * log(32.0 * sqr(R[i]/w[i])) - + 0.05 * sqr(w[i]/R[i]) - 0.85); - } - } - - bench.stop(); - - // Subtract the loop overhead - bench.startOverhead(); - - for (long j=0; j < iters; ++j) {} - - - - bench.stopOverhead(); - - delete [] li; - delete [] w; - delete [] R; - } - - bench.endImplementation(); -} - -void HaneyFortranVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran"); - - while (!bench.doneImplementationBenchmark()) { - int length = bench.getParameter(); - int iters = (int)bench.getIterations(); - - cout << "length = " << length << " iters = " << iters << endl; - - float* li = new float[length]; - float* R = new float[length]; - float* w = new float[length]; - - initializeRandom(li, length); - initializeRandom(R, length); - initializeRandom(w, length); - - // Tickle - int oneIter = 1; - vecopsf(li, R, w, length, oneIter); - - // Time - bench.start(); - vecopsf(li, R, w, length, iters); - bench.stop(); - - // Time overhead - bench.startOverhead(); - vecopsfo(li, R, w, length, iters); - bench.stopOverhead(); - - delete [] li; - delete [] w; - delete [] R; - } - - bench.endImplementation(); -} - -void HaneyBlitzVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++"); - - while (!bench.doneImplementationBenchmark()) { - int length = bench.getParameter(); - int iters = (int)bench.getIterations(); - - Vector li(length), R(length), w(length); - initializeRandom(li.data(), length); - initializeRandom(R.data(), length); - initializeRandom(w.data(), length); - - cout << "length = " << length << " iters = " << iters << endl; - - // Tickle - li = w + log(R); - - // Time - bench.start(); - for (long i=0; i < iters; ++i) { -#if defined(__GNUC__) && (__GNUC__ < 3) - li = Mu0 * R * ( (0.5 + (0.5/24.0) * sqr(w/R) ) - * log(32.0 * sqr(R/w)) + 0.05 * sqr(w/R) - 0.85); -#else - li = Mu0 * R * (0.5 * (1.0 + (1.0/24.0) * sqr(w/R)) - * log(32.0 * sqr(R/w)) + 0.05 * sqr(w/R) - 0.85); -#endif - } - bench.stop(); - - // Time overhead - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} - diff --git a/benchmarks/haneyf.f b/benchmarks/haneyf.f deleted file mode 100644 index 4731d716..00000000 --- a/benchmarks/haneyf.f +++ /dev/null @@ -1,87 +0,0 @@ -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC -C -C AUTHOR -C Scott Haney, LLNL, swhaney@llnl.gov, (510) 423-6308 -C -C COPYRIGHT -C Copyright (c) 1995. The Regents of the University of California. -C All rights reserved. -C -C Permission to use, copy, modify, and distribute this software for any -C purpose without fee is hereby granted, provided that this entire -C notice is included in all copies of any software which is or includes -C a copy or modification of this software and in all copies of the -C a copy or modification of this software and in all copies of the -C supporting documentation for such software. -C -C This work was produced at the University of California, Lawrence -C Livermore National Laboratory under contract no. W-7405-ENG-48 between -C the U.S. Department of Energy and The Regents of the University of -C California for the operation of UC LLNL. -C -C DISCLAIMER -C This software was prepared as an account of work sponsored by an -C agency of the United States Government. Neither the United States -C Government nor the University of California nor any of their -C employees, makes any warranty, express or implied, or assumes any -C liability or responsibility for the accuracy, completeness, or -C usefulness of any information, apparatus, product, or process -C disclosed, or represents that its use would not infringe -C privately-owned rights. Reference herein to any specific commercial -C products, process, or service by trade name, trademark, manufacturer, -C privately-owned rights. Reference herein to any specific commercial -C products, process, or service by trade name, trademark, manufacturer, -C or otherwise, does not necessarily constitute or imply its -C endorsement, recommendation, or favoring by the United States -C Government or the University of California. The views and opinions of -C authors expressed herein do not necessarily state or reflect those of -C the United States Government or the University of California, and -C shall not be used for advertising or product endorsement purposes. -C -C -C CREATED -C July 22, 1994 -C -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - - subroutine vecopsf(li, R, a, n, iters) - - integer i - real li(n),R(n),a(n) - real xmu0, xm24 - parameter(xmu0 = 4.0 * 3.1415926535 * 1.0e-7) - parameter(xm24 = 1.0 / 24.0) - - do 2000 j = 1, iters - do 1000 i = 1, n - li(i) = xmu0 * R(i) * - . (0.5 * (1.0 + xm24 * (a(i) / R(i))**2) * - . log(32.0 * (R(i) / a(i))**2) + - . 0.84 * (a(i) / R(i))**2 - 0.2) - - 1000 continue - 2000 continue - - return - end - -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC -C -C Overhead timing -C Added January 1997, Todd Veldhuizen -C -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - - subroutine vecopsfo(li, R, a, n, iters) - - integer i - real li(n),R(n),a(n) - real xmu0, xm24 - parameter(xmu0 = 4.0 * 3.1415926535 * 1.0e-7) - parameter(xm24 = 1.0 / 24.0) - - do 2000 j = 1, iters - 2000 continue - - return - end diff --git a/benchmarks/hao-he-mark.cpp b/benchmarks/hao-he-mark.cpp deleted file mode 100644 index b6c9577a..00000000 --- a/benchmarks/hao-he-mark.cpp +++ /dev/null @@ -1,56 +0,0 @@ -struct Complex { - - Complex(double _re, double _im) - { - re = _re; - im = _im; - } - - Complex& operator+=(const Complex& a) - { - re += a.re; - im += a.im; - } - - Complex operator*(const Complex& a) const - { - return Complex(re*a.re-im*a.im, re*a.im+im*a.re); - } - - double real() const { return re; } - double imag() const { return im; } - - double re, im; -}; - -#if 0 -inline Complex sqr1(const Complex& a) -{ - return Complex(a.real() * a.real() - a.imag() * a.imag(), - 2 * a.real() * a.imag()); -} - -inline Complex sqr2(const Complex& a) -{ - return Complex(a.re * a.re - a.im * a.im, 2 * a.re * a.im); -} - -inline Complex sqr3(const Complex& a) -{ - double r = a.re; - double i = a.im; - return Complex(r*r-i*i, 2*r*i); -} -#endif - -void foo(Complex& a, const Complex& b) -{ - a += sqr(b); -} - -void foo2(Complex* __restrict__ a, Complex* __restrict__ b, int N) -{ - for (int i=0; i < N; ++i) - a[i] += sqr(b[i]); -} - diff --git a/benchmarks/hao-he.cpp b/benchmarks/hao-he.cpp deleted file mode 100644 index 601fb989..00000000 --- a/benchmarks/hao-he.cpp +++ /dev/null @@ -1,150 +0,0 @@ -/* - * This benchmark times the performance of B += sqr(A), where A and B - * are complex arrays. - * - * Note: need to use -mv8 for SPARC v8. - */ - -#include -#include - -using namespace blitz; - -#include - -typedef Array,1> CArray; - -void setup(Array,1>& A, Array,1>& B) -{ - int n = A.extent(firstDim); - - for (int i=0; i < n; ++i) - { - double x=-10.+20./(n-1.0)*i; - A(i)=sin(x); - B(i)=sin(x); - } -} - -void version1(CArray& A, CArray& B, int nIters) -{ - Timer timer; - - // Array notation - setup(A, B); - timer.start(); - for (int i=0; i < nIters; ++i) - { - B += A*A; - } - timer.stop(); - cout << "Time using array notation b += a*a: " << timer.elapsedSeconds() - << endl; -} - -void version2(CArray& A, CArray& B, int nIters) -{ - // Array notation, using sqr(a) - Timer timer; - - setup(A,B); - timer.start(); - for (int i=0; i < nIters; ++i) - { - B += sqr(A); - } - timer.stop(); - cout << "Time using array notation b += sqr(a): " << timer.elapsedSeconds() - << endl; -} - -void version2c(CArray& A, CArray& B) -{ - B += sqr(A); -} - -void version2b(CArray& A, CArray& B, int nIters) -{ - // Array notation, using sqr(a) - Timer timer; - - setup(A,B); - timer.start(); - for (int i=0; i < nIters; ++i) - { - version2c(A,B); - } - timer.stop(); - cout << "Time using array notation b += sqr(a): " << timer.elapsedSeconds() - << endl; -} - -void version3(CArray& A, CArray& B, int nIters) -{ - Timer timer; - - int N = A.extent(firstDim); - - // Low-level implementation - setup(A,B); - timer.start(); - for (int i=0; i < nIters; ++i) - { - for (int j=0; j < N; ++j) - B(j) += A(j) * A(j); - } - timer.stop(); - cout << "Time using low-level version: " << timer.elapsedSeconds() - << endl; -} - -void version4(CArray& A, CArray& B, int nIters) -{ - Timer timer; - - struct cmplx { - double re, im; - }; - cmplx* a = (cmplx*)A.data(); - cmplx* b = (cmplx*)B.data(); - setup(A,B); - int N = A.extent(firstDim); - - timer.start(); - for (int i=0; i < nIters; ++i) - { - for (int j=0; j < N; ++j) - { - double ar = a[j].re; - double ai = a[j].im; - b[j].re += ar*ar - ai*ai; - b[j].im += 2 * ar * ai; - } - } - timer.stop(); - cout << "Time using really low-level version: " << timer.elapsedSeconds() - << endl; -} - -int run(int N, int nIters) -{ - Array,1> A(N), B(N); - - version1(A,B,nIters); - version2(A,B,nIters); - version2b(A,B,nIters); - version3(A,B,nIters); - version4(A,B,nIters); - - return 0; -} - -int main() -{ - cout << "In-cache:" << endl; - run(256,39063); - - cout << endl << "Out-of-cache:" << endl; - run(1000000,10); -} - diff --git a/benchmarks/iter.cpp b/benchmarks/iter.cpp deleted file mode 100644 index 352bb602..00000000 --- a/benchmarks/iter.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include -#include -#include - -int main() { - using namespace blitz; - - typedef Array Image; - Image A(512,512,512); - Timer timer; - - A = 0.0; - timer.start(); - for (unsigned j=0;j<10;++j) { - for (Image::iterator i=A.begin(),end=A.end();i!=end;++i) { - const TinyVector pos = i.position(); - *i += pos(0)+pos(1)+pos(2); - } - } - timer.stop(); - double flops = 10.0*512*512*512*2; - double seconds = timer.elapsed(); - - double timePerOp = seconds / flops; - - cout << "ops = " << flops << endl - << "seconds = " << seconds << endl; - - double Mflops = flops / seconds / 1.0e+6; - cout << "Mflops = " << Mflops << endl; - - return 0; -} diff --git a/benchmarks/kepler.cpp b/benchmarks/kepler.cpp deleted file mode 100644 index b3b2d791..00000000 --- a/benchmarks/kepler.cpp +++ /dev/null @@ -1,243 +0,0 @@ -#include -#include - -using namespace blitz; - -/* - * Test a 12th order symmetric multistep method for solving the equations - * of motion of a single planet circling the Sun. The Sun is fixed in - * space. - * - * Original F77 version written by John K. Prentice, Quetzal Computational - * Associates, 21 Decmber 1992 - * - * Blitz++ version by Todd Veldhuizen, 17 August 1997 - * The C++ version is a faithful translation of the Fortran 90 version, - * so apologies for the "C++Tran" style. - */ - -inline double relativeError(double a, double b) -{ - if (b != 0.0) - return (a - b) / b; - else - return a; -} - -int main() -{ - Vector x_position_numerical(13), y_position_numerical(13), - alpha(13), beta(13), gamma(13), x_acceleration(13), y_acceleration(13); - - /* - * 12th order symmetric method coefficients - * - * Reference: "Symmetric Multistep Methods for the Numerical - * Integration of Planetary Orbits", G. D. Quinlan and - * S. Tremaine, The Astronomical Journal, 100 (1990), page 1695. - * - * Note!! The beta below are actually 53,222,400 times the - * real beta. This common factor is divided out in the - * symmetric multistep calculation itself, in order to minimize - * round-off - */ - - const double beta_factor = 53222400.0; - alpha = 1.0, -2.0, 2.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 2.0, -2.0, 1.0; - beta = 0.0, 90987349.0, -229596838.0, 812627169.0, -1628539944.0, - 2714971338.0, -3041896548.0, 2714971338.0, -1628539944.0, - 812627169.0, -229596838.0, 90987349.0, 0.0; - - /* - * 12th order Cowell predictor coefficients - * - * Reference: "Astronomical Papers Prepared for the Use of the - * American Ephemeris and Nautical Almanac", C. J. Cohen, E. C. - * Hubbard, and C. Oesterwinter, 22 (1973), page 20-21. - * - * Note!! The gamma below are actually 1,743,565,824,000 times - * the real gamma. This common factor is divided out in the - * Cowell predictor calculation itself, in order to minimize - * round-off - */ - - const double gamma_factor = 1743565824000.0; - gamma = 9072652009253.0, -39726106418680.0, 140544566352762.0, - -344579280210129.0, 613137294629235.0, -811345852376496.0, - 807012356281740.0, -602852367932304.0, 333888089374395.0, - -133228219027160.0, 36262456774618.0, -6033724094760.0, - 463483373517.0; - - // Initialize variables - - const double time_step = 0.25, - stop_time = 365000.0, - radius = 1.0; - double time = - time_step; - - cout << " Position solution via 12th order symmetric multistep method\n" - << " Velocity solution via 12th order Cowell predictor method\n" - << " radius = " << radius << ", time step = " << time_step - << endl; - - // Define a constant which is needed later by the exact solution - const double gaussian_constant = 0.01720209895; - const double gravitational_constant = pow(gaussian_constant,2); - const double constant = sqrt(gravitational_constant/pow(radius,3)); - - // Initialize the first 12 numerical values using the exact values - - double x_position_exact, y_position_exact; - - for (int j=-1; j <= 11; ++j) - { - if (j >= 0) - time += time_step; - - x_position_exact = radius * cos(constant * time); - y_position_exact = radius * sin(constant * time); - - if (j >= 0) - { - x_position_numerical(j) = x_position_exact; - y_position_numerical(j) = y_position_exact; - } - - x_acceleration(j+1) = -gravitational_constant/pow(radius,3) - * x_position_exact; - y_acceleration(j+1) = -gravitational_constant/pow(radius,3) - * y_position_exact; - } - - /* - * Compute exact kinetic and potential energies, and the - * angular momentum. These values are all divided by the mass - * of the object. Since they are conserved, they will never change - * and hence do not have to be recalculated later. - */ - - double x_dot_exact = -radius * constant * sin(constant*time), - y_dot_exact = radius * constant * cos(constant*time), - exact_velocity_squared = pow(x_dot_exact,2) + pow(y_dot_exact,2), - exact_kinetic_energy = 0.5 * exact_velocity_squared, - exact_potential_energy = -gravitational_constant / radius, - exact_total_energy = exact_potential_energy + exact_kinetic_energy, - exact_angular_momentum = x_position_exact * y_dot_exact - - y_position_exact * x_dot_exact; - - double x_dot_numerical, y_dot_numerical; - - // Perform loop over time - - while (time <= stop_time) - { - // Advance time step (eek!) - time += time_step; - - // Calculate new acceleration of body at time=time-time_step - double numerical_radius_squared = pow(x_position_numerical(11),2) - + pow(y_position_numerical(11),2); - x_acceleration(12) = -gravitational_constant - / pow(numerical_radius_squared, 1.5) * x_position_numerical(11); - y_acceleration(12) = -gravitational_constant - / pow(numerical_radius_squared, 1.5) * y_position_numerical(11); - - // Numerically solve for the new positions using a 12th order - // symmetric multistep method. - - // First sum the first and second terms - - double x_alpha_sum = dot(alpha(Range(0,11)), - x_position_numerical(Range(0,11))); - double y_alpha_sum = dot(alpha(Range(0,11)), - y_position_numerical(Range(0,11))); - - double x_beta_sum = dot(beta(Range(0,11)), x_acceleration(Range(1,12))); - double y_beta_sum = dot(beta(Range(0,11)), y_acceleration(Range(1,12))); - x_position_numerical(12) = (-x_alpha_sum) + pow(time_step,2) - * (x_beta_sum / beta_factor); - y_position_numerical(12) = (-y_alpha_sum) + pow(time_step,2) - * (y_beta_sum / beta_factor); - - // Numerically solve for the new velocities using a 12th order - // Cowell predictor method. - - // First sum the gamma terms - - double x_gamma_sum = dot(gamma, x_acceleration.reverse()), - y_gamma_sum = dot(gamma, y_acceleration.reverse()); - - x_dot_numerical = (x_position_numerical(11) - - x_position_numerical(10)) / time_step + time_step - * (x_gamma_sum / gamma_factor); - y_dot_numerical = (y_position_numerical(11) - - y_position_numerical(10)) / time_step + time_step - * (y_gamma_sum / gamma_factor); - - // Push the stack down one - - for (int j=0; j <= 11; ++j) - { - x_position_numerical(j) = x_position_numerical(j+1); - y_position_numerical(j) = y_position_numerical(j+1); - x_acceleration(j) = x_acceleration(j+1); - y_acceleration(j) = y_acceleration(j+1); - } - } - - // Print results - - // First compute energies and angular momenta (add divided by the mass - // of the object) - - double numerical_velocity_squared = pow(x_dot_numerical,2) + - pow(y_dot_numerical,2), - numerical_radius = sqrt(pow(x_position_numerical(12),2) - + pow(y_position_numerical(12),2)), - numerical_kinetic_energy = 0.5 * numerical_velocity_squared, - numerical_potential_energy = -gravitational_constant - / numerical_radius, - numerical_total_energy = numerical_potential_energy - + numerical_kinetic_energy, - numerical_angular_momentum = x_position_numerical(12) - * y_dot_numerical - y_position_numerical(12) * x_dot_numerical; - - // Compute exact results for comparison to the numerical results - - x_position_exact = radius * cos(constant * time); - y_position_exact = radius * sin(constant * time); - x_dot_exact = -radius * constant * sin(constant * time); - y_dot_exact = radius * constant * cos(constant * time); - - // Next compute relative errors - - double radius_error = relativeError(numerical_radius, radius), - x_error = relativeError(x_position_numerical(12), x_position_exact), - y_error = relativeError(y_position_numerical(12), y_position_exact), - x_dot_error = relativeError(x_dot_numerical, x_dot_exact), - y_dot_error = relativeError(y_dot_numerical, y_dot_exact); - - double kinetic_energy_error = relativeError(numerical_kinetic_energy, - exact_kinetic_energy), - potential_energy_error = relativeError(numerical_potential_energy, - exact_potential_energy), - total_energy_error = relativeError(numerical_total_energy, - exact_total_energy), - angular_momentum_error = relativeError(numerical_angular_momentum, - exact_angular_momentum); - - cout << " Time = " << time << endl - << " x rel error = " << x_error << " y rel error = " << y_error - << endl - << " vx rel error = " << x_dot_error << " vy rel error = " - << y_dot_error << endl - << " KE rel error = " << kinetic_energy_error - << " PE rel error = " << potential_energy_error << endl - << " TE rel error = " << total_energy_error << " AM rel error = " - << angular_momentum_error << endl - << " numerical radius = " << numerical_radius - << " radius rel error = " << radius_error << endl; - - return 0; -} - diff --git a/benchmarks/loop1-bug.cpp b/benchmarks/loop1-bug.cpp deleted file mode 100644 index c136ff06..00000000 --- a/benchmarks/loop1-bug.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// In KAI C++ 3.2, restrict causes problems for copy propagation. -// Temporary fix: disable restrict - -#define BZ_DISABLE_RESTRICT - -#include - -using namespace blitz; - -template void sink(T&) { } - -void foo(int N) -{ - Vector x(N); - Vector y(N); - - x=sqrt(y); - - sink(x); - sink(y); -} - diff --git a/benchmarks/loop1.cpp b/benchmarks/loop1.cpp deleted file mode 100644 index a23a2a14..00000000 --- a/benchmarks/loop1.cpp +++ /dev/null @@ -1,438 +0,0 @@ - -// loop1 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop1_f77 loop1_f77_ - #define loop1_f77overhead loop1_f77overhead_ - #define loop1_f90 loop1_f90_ - #define loop1_f90overhead loop1_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop1_f77 loop1_f77__ - #define loop1_f77overhead loop1_f77overhead__ - #define loop1_f90 loop1_f90__ - #define loop1_f90overhead loop1_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop1_f77 LOOP1_F77 - #define loop1_f77overhead LOOP1_F77OVERHEAD - #define loop1_f90 LOOP1_F90 - #define loop1_f90overhead LOOP1_F90OVERHEAD -#endif - -extern "C" { - void loop1_f77(const int& N, double* x, double* y); - void loop1_f77overhead(const int& N, double* x, double* y); - void loop1_f90(const int& N, double* x, double* y); - void loop1_f90overhead(const int& N, double* x, double* y); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop1: $x = sqrt($y)", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop1.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y(tensor::i));; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+2); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+2); - Array y(yfill(Range(1,N+1-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop1_f77(N, x, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop1_f77overhead(N, x, y); - - bench.stopOverhead(); - - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop1_f90(N, x, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop1_f90overhead(N, x, y); - - bench.stopOverhead(); - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop10.cpp b/benchmarks/loop10.cpp deleted file mode 100644 index d1c581a1..00000000 --- a/benchmarks/loop10.cpp +++ /dev/null @@ -1,483 +0,0 @@ - -// loop10 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop10_f77 loop10_f77_ - #define loop10_f77overhead loop10_f77overhead_ - #define loop10_f90 loop10_f90_ - #define loop10_f90overhead loop10_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop10_f77 loop10_f77__ - #define loop10_f77overhead loop10_f77overhead__ - #define loop10_f90 loop10_f90__ - #define loop10_f90overhead loop10_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop10_f77 LOOP10_F77 - #define loop10_f77overhead LOOP10_F77OVERHEAD - #define loop10_f90 LOOP10_F90 - #define loop10_f90overhead LOOP10_F90OVERHEAD -#endif - -extern "C" { - void loop10_f77(const int& N, double* x, double* a, double* b, double* c, const double& u); - void loop10_f77overhead(const int& N, double* x, double* a, double* b, double* c, const double& u); - void loop10_f90(const int& N, double* x, double* a, double* b, double* c, const double& u); - void loop10_f90overhead(const int& N, double* x, double* a, double* b, double* c, const double& u); - -} - -void VectorVersion(BenchmarkExt& bench, double u); -void ArrayVersion(BenchmarkExt& bench, double u); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u); -void ArrayVersion_index(BenchmarkExt& bench, double u); -void doTinyVectorVersion(BenchmarkExt& bench, double u); -void F77Version(BenchmarkExt& bench, double u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop10: $x = u+$a+$b+$c", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop10.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a(tensor::i)+b(tensor::i)+c(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+4); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+4); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+4); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+4); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop10_f77(N, x, a, b, c, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop10_f77overhead(N, x, a, b, c, u); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop10_f90(N, x, a, b, c, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop10_f90overhead(N, x, a, b, c, u); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop100.cpp b/benchmarks/loop100.cpp deleted file mode 100644 index f8f44c86..00000000 --- a/benchmarks/loop100.cpp +++ /dev/null @@ -1,508 +0,0 @@ - -// loop100 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop100_f77 loop100_f77_ - #define loop100_f77overhead loop100_f77overhead_ - #define loop100_f90 loop100_f90_ - #define loop100_f90overhead loop100_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop100_f77 loop100_f77__ - #define loop100_f77overhead loop100_f77overhead__ - #define loop100_f90 loop100_f90__ - #define loop100_f90overhead loop100_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop100_f77 LOOP100_F77 - #define loop100_f77overhead LOOP100_F77OVERHEAD - #define loop100_f90 LOOP100_F90 - #define loop100_f90overhead LOOP100_F90OVERHEAD -#endif - -extern "C" { - void loop100_f77(const int& N, double* a, double* b, double* c, double* d, double* x, const double& u, const double& v, const double& w, const double& z); - void loop100_f77overhead(const int& N, double* a, double* b, double* c, double* d, double* x, const double& u, const double& v, const double& w, const double& z); - void loop100_f90(const int& N, double* a, double* b, double* c, double* d, double* x, const double& u, const double& v, const double& w, const double& z); - void loop100_f90overhead(const int& N, double* a, double* b, double* c, double* d, double* x, const double& u, const double& v, const double& w, const double& z); - -} - -void VectorVersion(BenchmarkExt& bench, double u, double v, double w, double z); -void ArrayVersion(BenchmarkExt& bench, double u, double v, double w, double z); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u, double v, double w, double z); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u, double v, double w, double z); -void ArrayVersion_index(BenchmarkExt& bench, double u, double v, double w, double z); -void doTinyVectorVersion(BenchmarkExt& bench, double u, double v, double w, double z); -void F77Version(BenchmarkExt& bench, double u, double v, double w, double z); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u, double v, double w, double z); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u, double v, double w, double z); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop100: $x=(1.0-$c*$c)/((4*w)*sin(1.0+$c*$c-2*v*$c))*$a*$b*u*exp(-z*$d)", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 18 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - double v = 0.39123982498157938742; - double w = 0.39123982498157938742; - double z = 0.39123982498157938742; - - - ArrayVersion(bench, u, v, w, z); - ArrayVersion_unaligned(bench, u, v, w, z); - ArrayVersion_misaligned(bench, u, v, w, z); - ArrayVersion_index(bench, u, v, w, z); - //doTinyVectorVersion(bench, u, v, w, z); - F77Version(bench, u, v, w, z); -#ifdef FORTRAN_90 - F90Version(bench, u, v, w, z); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u, v, w, z); -#endif - - if(runvector) - VectorVersion(bench, u, v, w, z); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop100.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u, double v, double w, double z) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x=(1.0-c*c)/((4*w)*sin(1.0+c*c-2*v*c))*a*b*u*exp(-z*d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u, double v, double w, double z) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x=(1.0-c*c)/((4*w)*sin(1.0+c*c-2*v*c))*a*b*u*exp(-z*d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u, double v, double w, double z) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x=(1.0-c(tensor::i)*c(tensor::i))/((4*w)*sin(1.0+c(tensor::i)*c(tensor::i)-2*v*c(tensor::i)))*a(tensor::i)*b(tensor::i)*u*exp(-z*d(tensor::i));; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u, double v, double w, double z) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x=(1.0-c*c)/((4*w)*sin(1.0+c*c-2*v*c))*a*b*u*exp(-z*d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u, double v, double w, double z) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+5); - Array a(afill(Range(0,N+0-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(1,N+1-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(2,N+2-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+5); - Array d(dfill(Range(3,N+3-1))); - initializeRandomDouble(d.dataFirst(), N); - - Array xfill(N+5); - Array x(xfill(Range(4,N+4-1))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x=(1.0-c*c)/((4*w)*sin(1.0+c*c-2*v*c))*a*b*u*exp(-z*d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u, double v, double w, double z) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - valarray x(N); - initializeRandomDouble(x, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x=(1.0-c*c)/((4*w)*sin(1.0+c*c-2*v*c))*a*b*u*exp(-z*d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u, double v, double w, double z) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop100_f77(N, a, b, c, d, x, u, v, w, z); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop100_f77overhead(N, a, b, c, d, x, u, v, w, z); - - bench.stopOverhead(); - - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] x; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u, double v, double w, double z) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop100_f90(N, a, b, c, d, x, u, v, w, z); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop100_f90overhead(N, a, b, c, d, x, u, v, w, z); - - bench.stopOverhead(); - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] x; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop100f.f b/benchmarks/loop100f.f deleted file mode 100644 index e5d29204..00000000 --- a/benchmarks/loop100f.f +++ /dev/null @@ -1,18 +0,0 @@ - - SUBROUTINE loop100_F77(N, a, b, c, d, x, u, v, w, z) - INTEGER i, N - REAL*8 a(N), b(N), c(N), d(N), x(N), u, v, w, z - - DO i=1,N - x(i)=(1.0-c(i)*c(i))/((4*w)*sin(1.0+c(i)*c(i)-2*v*c(i)))*a(i - !)*b(i)*u*exp(-z*d(i)); - END DO - RETURN - END - - - SUBROUTINE loop100_F77Overhead(N, a, b, c, d, x, u, v, w, z) - INTEGER i, N - REAL*8 a(N), b(N), c(N), d(N), x(N), u, v, w, z - RETURN - END diff --git a/benchmarks/loop100f90.f90 b/benchmarks/loop100f90.f90 deleted file mode 100644 index 2df34734..00000000 --- a/benchmarks/loop100f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop100_F90(N, a, b, c, d, x, u, v, w, z) - INTEGER i, N - REAL*8 a(N), b(N), c(N), d(N), x(N), u, v, w, z - - x=(1.0-c*c)/((4*w)*sin(1.0+c*c-2*v*c))*a*b*u*exp(-z*d) - RETURN - END - - - SUBROUTINE loop100_F90Overhead(N, a, b, c, d, x, u, v, w, z) - INTEGER i, N - REAL*8 a(N), b(N), c(N), d(N), x(N), u, v, w, z - - RETURN - END diff --git a/benchmarks/loop10f.f b/benchmarks/loop10f.f deleted file mode 100644 index fb4a71c9..00000000 --- a/benchmarks/loop10f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop10_F77(N, x, a, b, c, u) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), u - - DO i=1,N - x(i) = u+a(i)+b(i)+c(i); - END DO - RETURN - END - - - SUBROUTINE loop10_F77Overhead(N, x, a, b, c, u) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), u - RETURN - END diff --git a/benchmarks/loop10f90.f90 b/benchmarks/loop10f90.f90 deleted file mode 100644 index 7db1ba9f..00000000 --- a/benchmarks/loop10f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop10_F90(N, x, a, b, c, u) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), u - - x = u+a+b+c - RETURN - END - - - SUBROUTINE loop10_F90Overhead(N, x, a, b, c, u) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), u - - RETURN - END diff --git a/benchmarks/loop11.cpp b/benchmarks/loop11.cpp deleted file mode 100644 index 785dd498..00000000 --- a/benchmarks/loop11.cpp +++ /dev/null @@ -1,504 +0,0 @@ - -// loop11 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop11_f77 loop11_f77_ - #define loop11_f77overhead loop11_f77overhead_ - #define loop11_f90 loop11_f90_ - #define loop11_f90overhead loop11_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop11_f77 loop11_f77__ - #define loop11_f77overhead loop11_f77overhead__ - #define loop11_f90 loop11_f90__ - #define loop11_f90overhead loop11_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop11_f77 LOOP11_F77 - #define loop11_f77overhead LOOP11_F77OVERHEAD - #define loop11_f90 LOOP11_F90 - #define loop11_f90overhead LOOP11_F90OVERHEAD -#endif - -extern "C" { - void loop11_f77(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop11_f77overhead(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop11_f90(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop11_f90overhead(const int& N, double* x, double* a, double* b, double* c, double* d); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop11: $x = $a+$b+$c+$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop11.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i)+c(tensor::i)+d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+5); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop11_f77(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop11_f77overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop11_f90(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop11_f90overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop11f.f b/benchmarks/loop11f.f deleted file mode 100644 index 79a2cce1..00000000 --- a/benchmarks/loop11f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop11_F77(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - DO i=1,N - x(i) = a(i)+b(i)+c(i)+d(i); - END DO - RETURN - END - - - SUBROUTINE loop11_F77Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - RETURN - END diff --git a/benchmarks/loop11f90.f90 b/benchmarks/loop11f90.f90 deleted file mode 100644 index f8066b3c..00000000 --- a/benchmarks/loop11f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop11_F90(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - x = a+b+c+d - RETURN - END - - - SUBROUTINE loop11_F90Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - RETURN - END diff --git a/benchmarks/loop12.cpp b/benchmarks/loop12.cpp deleted file mode 100644 index 5a8aaf5f..00000000 --- a/benchmarks/loop12.cpp +++ /dev/null @@ -1,527 +0,0 @@ - -// loop12 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop12_f77 loop12_f77_ - #define loop12_f77overhead loop12_f77overhead_ - #define loop12_f90 loop12_f90_ - #define loop12_f90overhead loop12_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop12_f77 loop12_f77__ - #define loop12_f77overhead loop12_f77overhead__ - #define loop12_f90 loop12_f90__ - #define loop12_f90overhead loop12_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop12_f77 LOOP12_F77 - #define loop12_f77overhead LOOP12_F77OVERHEAD - #define loop12_f90 LOOP12_F90 - #define loop12_f90overhead LOOP12_F90OVERHEAD -#endif - -extern "C" { - void loop12_f77(const int& N, double* y, double* x, double* a, double* b, double* c, double* d, const double& u); - void loop12_f77overhead(const int& N, double* y, double* x, double* a, double* b, double* c, double* d, const double& u); - void loop12_f90(const int& N, double* y, double* x, double* a, double* b, double* c, double* d, const double& u); - void loop12_f90overhead(const int& N, double* y, double* x, double* a, double* b, double* c, double* d, const double& u); - -} - -void VectorVersion(BenchmarkExt& bench, double u); -void ArrayVersion(BenchmarkExt& bench, double u); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u); -void ArrayVersion_index(BenchmarkExt& bench, double u); -void doTinyVectorVersion(BenchmarkExt& bench, double u); -void F77Version(BenchmarkExt& bench, double u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop12: $y = u+$a; $x = $a+$b+$c+$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop12.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a(tensor::i); x = a(tensor::i)+b(tensor::i)+c(tensor::i)+d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+6); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+6); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+6); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+6); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+6); - Array c(cfill(Range(4,N+4-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+6); - Array d(dfill(Range(5,N+5-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop12_f77(N, y, x, a, b, c, d, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop12_f77overhead(N, y, x, a, b, c, d, u); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop12_f90(N, y, x, a, b, c, d, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop12_f90overhead(N, y, x, a, b, c, d, u); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop12f.f b/benchmarks/loop12f.f deleted file mode 100644 index 076c9ebf..00000000 --- a/benchmarks/loop12f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop12_F77(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), d(N), u - - DO i=1,N - y(i) = u+a(i); x(i) = a(i)+b(i)+c(i)+d(i); - END DO - RETURN - END - - - SUBROUTINE loop12_F77Overhead(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), d(N), u - RETURN - END diff --git a/benchmarks/loop12f90.f90 b/benchmarks/loop12f90.f90 deleted file mode 100644 index 479305a6..00000000 --- a/benchmarks/loop12f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop12_F90(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), d(N), u - - y = u+a; x = a+b+c+d - RETURN - END - - - SUBROUTINE loop12_F90Overhead(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), d(N), u - - RETURN - END diff --git a/benchmarks/loop13.cpp b/benchmarks/loop13.cpp deleted file mode 100644 index 7688b5e3..00000000 --- a/benchmarks/loop13.cpp +++ /dev/null @@ -1,527 +0,0 @@ - -// loop13 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop13_f77 loop13_f77_ - #define loop13_f77overhead loop13_f77overhead_ - #define loop13_f90 loop13_f90_ - #define loop13_f90overhead loop13_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop13_f77 loop13_f77__ - #define loop13_f77overhead loop13_f77overhead__ - #define loop13_f90 loop13_f90__ - #define loop13_f90overhead loop13_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop13_f77 LOOP13_F77 - #define loop13_f77overhead LOOP13_F77OVERHEAD - #define loop13_f90 LOOP13_F90 - #define loop13_f90overhead LOOP13_F90OVERHEAD -#endif - -extern "C" { - void loop13_f77(const int& N, double* y, double* x, double* a, double* b, double* c, double* d, const double& u); - void loop13_f77overhead(const int& N, double* y, double* x, double* a, double* b, double* c, double* d, const double& u); - void loop13_f90(const int& N, double* y, double* x, double* a, double* b, double* c, double* d, const double& u); - void loop13_f90overhead(const int& N, double* y, double* x, double* a, double* b, double* c, double* d, const double& u); - -} - -void VectorVersion(BenchmarkExt& bench, double u); -void ArrayVersion(BenchmarkExt& bench, double u); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u); -void ArrayVersion_index(BenchmarkExt& bench, double u); -void doTinyVectorVersion(BenchmarkExt& bench, double u); -void F77Version(BenchmarkExt& bench, double u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop13: $x = $a+$b+$c+$d; $y = u+$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop13.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i)+c(tensor::i)+d(tensor::i); y = u+d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+6); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+6); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+6); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+6); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+6); - Array c(cfill(Range(4,N+4-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+6); - Array d(dfill(Range(5,N+5-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop13_f77(N, y, x, a, b, c, d, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop13_f77overhead(N, y, x, a, b, c, d, u); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop13_f90(N, y, x, a, b, c, d, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop13_f90overhead(N, y, x, a, b, c, d, u); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop13f.f b/benchmarks/loop13f.f deleted file mode 100644 index b9b97814..00000000 --- a/benchmarks/loop13f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop13_F77(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), d(N), u - - DO i=1,N - x(i) = a(i)+b(i)+c(i)+d(i); y(i) = u+d(i); - END DO - RETURN - END - - - SUBROUTINE loop13_F77Overhead(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), d(N), u - RETURN - END diff --git a/benchmarks/loop13f90.f90 b/benchmarks/loop13f90.f90 deleted file mode 100644 index dbfa0218..00000000 --- a/benchmarks/loop13f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop13_F90(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), d(N), u - - x = a+b+c+d; y = u+d - RETURN - END - - - SUBROUTINE loop13_F90Overhead(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), d(N), u - - RETURN - END diff --git a/benchmarks/loop14.cpp b/benchmarks/loop14.cpp deleted file mode 100644 index ba1cbbea..00000000 --- a/benchmarks/loop14.cpp +++ /dev/null @@ -1,482 +0,0 @@ - -// loop14 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop14_f77 loop14_f77_ - #define loop14_f77overhead loop14_f77overhead_ - #define loop14_f90 loop14_f90_ - #define loop14_f90overhead loop14_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop14_f77 loop14_f77__ - #define loop14_f77overhead loop14_f77overhead__ - #define loop14_f90 loop14_f90__ - #define loop14_f90overhead loop14_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop14_f77 LOOP14_F77 - #define loop14_f77overhead LOOP14_F77OVERHEAD - #define loop14_f90 LOOP14_F90 - #define loop14_f90overhead LOOP14_F90OVERHEAD -#endif - -extern "C" { - void loop14_f77(const int& N, double* y, double* x, double* a, double* b); - void loop14_f77overhead(const int& N, double* y, double* x, double* a, double* b); - void loop14_f90(const int& N, double* y, double* x, double* a, double* b); - void loop14_f90overhead(const int& N, double* y, double* x, double* a, double* b); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop14: $x = $a+$b; $y = $a-$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 2 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop14.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i); y = a(tensor::i)-b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+4); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+4); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+4); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+4); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop14_f77(N, y, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop14_f77overhead(N, y, x, a, b); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop14_f90(N, y, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop14_f90overhead(N, y, x, a, b); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop14f.f b/benchmarks/loop14f.f deleted file mode 100644 index f4c348bd..00000000 --- a/benchmarks/loop14f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop14_F77(N, y, x, a, b) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N) - - DO i=1,N - x(i) = a(i)+b(i); y(i) = a(i)-b(i); - END DO - RETURN - END - - - SUBROUTINE loop14_F77Overhead(N, y, x, a, b) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N) - RETURN - END diff --git a/benchmarks/loop14f90.f90 b/benchmarks/loop14f90.f90 deleted file mode 100644 index d513b0ae..00000000 --- a/benchmarks/loop14f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop14_F90(N, y, x, a, b) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N) - - x = a+b; y = a-b - RETURN - END - - - SUBROUTINE loop14_F90Overhead(N, y, x, a, b) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N) - - RETURN - END diff --git a/benchmarks/loop15.cpp b/benchmarks/loop15.cpp deleted file mode 100644 index 10d05874..00000000 --- a/benchmarks/loop15.cpp +++ /dev/null @@ -1,482 +0,0 @@ - -// loop15 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop15_f77 loop15_f77_ - #define loop15_f77overhead loop15_f77overhead_ - #define loop15_f90 loop15_f90_ - #define loop15_f90overhead loop15_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop15_f77 loop15_f77__ - #define loop15_f77overhead loop15_f77overhead__ - #define loop15_f90 loop15_f90__ - #define loop15_f90overhead loop15_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop15_f77 LOOP15_F77 - #define loop15_f77overhead LOOP15_F77OVERHEAD - #define loop15_f90 LOOP15_F90 - #define loop15_f90overhead LOOP15_F90OVERHEAD -#endif - -extern "C" { - void loop15_f77(const int& N, double* x, double* a, double* b, double* c); - void loop15_f77overhead(const int& N, double* x, double* a, double* b, double* c); - void loop15_f90(const int& N, double* x, double* a, double* b, double* c); - void loop15_f90overhead(const int& N, double* x, double* a, double* b, double* c); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop15: $x = $c + $a*$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop15.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c(tensor::i) + a(tensor::i)*b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+4); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+4); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+4); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+4); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop15_f77(N, x, a, b, c); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop15_f77overhead(N, x, a, b, c); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop15_f90(N, x, a, b, c); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop15_f90overhead(N, x, a, b, c); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop15f.f b/benchmarks/loop15f.f deleted file mode 100644 index aa3622b5..00000000 --- a/benchmarks/loop15f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop15_F77(N, x, a, b, c) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N) - - DO i=1,N - x(i) = c(i) + a(i)*b(i); - END DO - RETURN - END - - - SUBROUTINE loop15_F77Overhead(N, x, a, b, c) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N) - RETURN - END diff --git a/benchmarks/loop15f90.f90 b/benchmarks/loop15f90.f90 deleted file mode 100644 index 45c33e18..00000000 --- a/benchmarks/loop15f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop15_F90(N, x, a, b, c) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N) - - x = c + a*b - RETURN - END - - - SUBROUTINE loop15_F90Overhead(N, x, a, b, c) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N) - - RETURN - END diff --git a/benchmarks/loop16.cpp b/benchmarks/loop16.cpp deleted file mode 100644 index 37e0cd53..00000000 --- a/benchmarks/loop16.cpp +++ /dev/null @@ -1,505 +0,0 @@ - -// loop16 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop16_f77 loop16_f77_ - #define loop16_f77overhead loop16_f77overhead_ - #define loop16_f90 loop16_f90_ - #define loop16_f90overhead loop16_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop16_f77 loop16_f77__ - #define loop16_f77overhead loop16_f77overhead__ - #define loop16_f90 loop16_f90__ - #define loop16_f90overhead loop16_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop16_f77 LOOP16_F77 - #define loop16_f77overhead LOOP16_F77OVERHEAD - #define loop16_f90 LOOP16_F90 - #define loop16_f90overhead LOOP16_F90OVERHEAD -#endif - -extern "C" { - void loop16_f77(const int& N, double* y, double* x, double* a, double* b, double* c, const double& u); - void loop16_f77overhead(const int& N, double* y, double* x, double* a, double* b, double* c, const double& u); - void loop16_f90(const int& N, double* y, double* x, double* a, double* b, double* c, const double& u); - void loop16_f90overhead(const int& N, double* y, double* x, double* a, double* b, double* c, const double& u); - -} - -void VectorVersion(BenchmarkExt& bench, double u); -void ArrayVersion(BenchmarkExt& bench, double u); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u); -void ArrayVersion_index(BenchmarkExt& bench, double u); -void doTinyVectorVersion(BenchmarkExt& bench, double u); -void F77Version(BenchmarkExt& bench, double u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop16: $x = $a+$b+$c; $y = $x+$c+u", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop16.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i)+c(tensor::i); y = x(tensor::i)+c(tensor::i)+u;; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+5); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+5); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(4,N+4-1))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop16_f77(N, y, x, a, b, c, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop16_f77overhead(N, y, x, a, b, c, u); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop16_f90(N, y, x, a, b, c, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop16_f90overhead(N, y, x, a, b, c, u); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop16f.f b/benchmarks/loop16f.f deleted file mode 100644 index f96274db..00000000 --- a/benchmarks/loop16f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop16_F77(N, y, x, a, b, c, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), u - - DO i=1,N - x(i) = a(i)+b(i)+c(i); y(i) = x(i)+c(i)+u; - END DO - RETURN - END - - - SUBROUTINE loop16_F77Overhead(N, y, x, a, b, c, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), u - RETURN - END diff --git a/benchmarks/loop16f90.f90 b/benchmarks/loop16f90.f90 deleted file mode 100644 index f81226c2..00000000 --- a/benchmarks/loop16f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop16_F90(N, y, x, a, b, c, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), u - - x = a+b+c; y = x+c+u - RETURN - END - - - SUBROUTINE loop16_F90Overhead(N, y, x, a, b, c, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), u - - RETURN - END diff --git a/benchmarks/loop17.cpp b/benchmarks/loop17.cpp deleted file mode 100644 index 108110b1..00000000 --- a/benchmarks/loop17.cpp +++ /dev/null @@ -1,504 +0,0 @@ - -// loop17 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop17_f77 loop17_f77_ - #define loop17_f77overhead loop17_f77overhead_ - #define loop17_f90 loop17_f90_ - #define loop17_f90overhead loop17_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop17_f77 loop17_f77__ - #define loop17_f77overhead loop17_f77overhead__ - #define loop17_f90 loop17_f90__ - #define loop17_f90overhead loop17_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop17_f77 LOOP17_F77 - #define loop17_f77overhead LOOP17_F77OVERHEAD - #define loop17_f90 LOOP17_F90 - #define loop17_f90overhead LOOP17_F90OVERHEAD -#endif - -extern "C" { - void loop17_f77(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop17_f77overhead(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop17_f90(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop17_f90overhead(const int& N, double* x, double* a, double* b, double* c, double* d); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop17: $x = ($a+$b)*($c+$d)", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop17.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a(tensor::i)+b(tensor::i))*(c(tensor::i)+d(tensor::i));; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+5); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop17_f77(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop17_f77overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop17_f90(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop17_f90overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop17f.f b/benchmarks/loop17f.f deleted file mode 100644 index 795c1f93..00000000 --- a/benchmarks/loop17f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop17_F77(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - DO i=1,N - x(i) = (a(i)+b(i))*(c(i)+d(i)); - END DO - RETURN - END - - - SUBROUTINE loop17_F77Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - RETURN - END diff --git a/benchmarks/loop17f90.f90 b/benchmarks/loop17f90.f90 deleted file mode 100644 index 64beabe2..00000000 --- a/benchmarks/loop17f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop17_F90(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - x = (a+b)*(c+d) - RETURN - END - - - SUBROUTINE loop17_F90Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - RETURN - END diff --git a/benchmarks/loop18.cpp b/benchmarks/loop18.cpp deleted file mode 100644 index b7890461..00000000 --- a/benchmarks/loop18.cpp +++ /dev/null @@ -1,462 +0,0 @@ - -// loop18 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop18_f77 loop18_f77_ - #define loop18_f77overhead loop18_f77overhead_ - #define loop18_f90 loop18_f90_ - #define loop18_f90overhead loop18_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop18_f77 loop18_f77__ - #define loop18_f77overhead loop18_f77overhead__ - #define loop18_f90 loop18_f90__ - #define loop18_f90overhead loop18_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop18_f77 LOOP18_F77 - #define loop18_f77overhead LOOP18_F77OVERHEAD - #define loop18_f90 LOOP18_F90 - #define loop18_f90overhead LOOP18_F90OVERHEAD -#endif - -extern "C" { - void loop18_f77(const int& N, double* x, double* a, double* b, const double& u, const double& v); - void loop18_f77overhead(const int& N, double* x, double* a, double* b, const double& u, const double& v); - void loop18_f90(const int& N, double* x, double* a, double* b, const double& u, const double& v); - void loop18_f90overhead(const int& N, double* x, double* a, double* b, const double& u, const double& v); - -} - -void VectorVersion(BenchmarkExt& bench, double u, double v); -void ArrayVersion(BenchmarkExt& bench, double u, double v); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u, double v); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u, double v); -void ArrayVersion_index(BenchmarkExt& bench, double u, double v); -void doTinyVectorVersion(BenchmarkExt& bench, double u, double v); -void F77Version(BenchmarkExt& bench, double u, double v); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u, double v); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u, double v); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop18: $x = (u+$a)*(v+$b)", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - double v = 0.39123982498157938742; - - - ArrayVersion(bench, u, v); - ArrayVersion_unaligned(bench, u, v); - ArrayVersion_misaligned(bench, u, v); - ArrayVersion_index(bench, u, v); - //doTinyVectorVersion(bench, u, v); - F77Version(bench, u, v); -#ifdef FORTRAN_90 - F90Version(bench, u, v); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u, v); -#endif - - if(runvector) - VectorVersion(bench, u, v); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop18.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a(tensor::i))*(v+b(tensor::i));; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+3); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+3); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+3); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop18_f77(N, x, a, b, u, v); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop18_f77overhead(N, x, a, b, u, v); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop18_f90(N, x, a, b, u, v); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop18_f90overhead(N, x, a, b, u, v); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop18f.f b/benchmarks/loop18f.f deleted file mode 100644 index 9bc77997..00000000 --- a/benchmarks/loop18f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop18_F77(N, x, a, b, u, v) - INTEGER i, N - REAL*8 x(N), a(N), b(N), u, v - - DO i=1,N - x(i) = (u+a(i))*(v+b(i)); - END DO - RETURN - END - - - SUBROUTINE loop18_F77Overhead(N, x, a, b, u, v) - INTEGER i, N - REAL*8 x(N), a(N), b(N), u, v - RETURN - END diff --git a/benchmarks/loop18f90.f90 b/benchmarks/loop18f90.f90 deleted file mode 100644 index 1773a4a0..00000000 --- a/benchmarks/loop18f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop18_F90(N, x, a, b, u, v) - INTEGER i, N - REAL*8 x(N), a(N), b(N), u, v - - x = (u+a)*(v+b) - RETURN - END - - - SUBROUTINE loop18_F90Overhead(N, x, a, b, u, v) - INTEGER i, N - REAL*8 x(N), a(N), b(N), u, v - - RETURN - END diff --git a/benchmarks/loop19.cpp b/benchmarks/loop19.cpp deleted file mode 100644 index 10aa3136..00000000 --- a/benchmarks/loop19.cpp +++ /dev/null @@ -1,484 +0,0 @@ - -// loop19 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop19_f77 loop19_f77_ - #define loop19_f77overhead loop19_f77overhead_ - #define loop19_f90 loop19_f90_ - #define loop19_f90overhead loop19_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop19_f77 loop19_f77__ - #define loop19_f77overhead loop19_f77overhead__ - #define loop19_f90 loop19_f90__ - #define loop19_f90overhead loop19_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop19_f77 LOOP19_F77 - #define loop19_f77overhead LOOP19_F77OVERHEAD - #define loop19_f90 LOOP19_F90 - #define loop19_f90overhead LOOP19_F90OVERHEAD -#endif - -extern "C" { - void loop19_f77(const int& N, double* y, double* x, double* a, double* b, const double& u, const double& v); - void loop19_f77overhead(const int& N, double* y, double* x, double* a, double* b, const double& u, const double& v); - void loop19_f90(const int& N, double* y, double* x, double* a, double* b, const double& u, const double& v); - void loop19_f90overhead(const int& N, double* y, double* x, double* a, double* b, const double& u, const double& v); - -} - -void VectorVersion(BenchmarkExt& bench, double u, double v); -void ArrayVersion(BenchmarkExt& bench, double u, double v); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u, double v); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u, double v); -void ArrayVersion_index(BenchmarkExt& bench, double u, double v); -void doTinyVectorVersion(BenchmarkExt& bench, double u, double v); -void F77Version(BenchmarkExt& bench, double u, double v); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u, double v); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u, double v); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop19: $x = u*$a; $y = v*$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 2 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - double v = 0.39123982498157938742; - - - ArrayVersion(bench, u, v); - ArrayVersion_unaligned(bench, u, v); - ArrayVersion_misaligned(bench, u, v); - ArrayVersion_index(bench, u, v); - //doTinyVectorVersion(bench, u, v); - F77Version(bench, u, v); -#ifdef FORTRAN_90 - F90Version(bench, u, v); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u, v); -#endif - - if(runvector) - VectorVersion(bench, u, v); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop19.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a(tensor::i); y = v*b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+4); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+4); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+4); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+4); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop19_f77(N, y, x, a, b, u, v); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop19_f77overhead(N, y, x, a, b, u, v); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop19_f90(N, y, x, a, b, u, v); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop19_f90overhead(N, y, x, a, b, u, v); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop19f.f b/benchmarks/loop19f.f deleted file mode 100644 index d27ee541..00000000 --- a/benchmarks/loop19f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop19_F77(N, y, x, a, b, u, v) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), u, v - - DO i=1,N - x(i) = u*a(i); y(i) = v*b(i); - END DO - RETURN - END - - - SUBROUTINE loop19_F77Overhead(N, y, x, a, b, u, v) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), u, v - RETURN - END diff --git a/benchmarks/loop19f90.f90 b/benchmarks/loop19f90.f90 deleted file mode 100644 index ab134129..00000000 --- a/benchmarks/loop19f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop19_F90(N, y, x, a, b, u, v) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), u, v - - x = u*a; y = v*b - RETURN - END - - - SUBROUTINE loop19_F90Overhead(N, y, x, a, b, u, v) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), u, v - - RETURN - END diff --git a/benchmarks/loop1f.f b/benchmarks/loop1f.f deleted file mode 100644 index 0f215ed7..00000000 --- a/benchmarks/loop1f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop1_F77(N, x, y) - INTEGER i, N - REAL*8 x(N), y(N) - - DO i=1,N - x(i) = sqrt(y(i)); - END DO - RETURN - END - - - SUBROUTINE loop1_F77Overhead(N, x, y) - INTEGER i, N - REAL*8 x(N), y(N) - RETURN - END diff --git a/benchmarks/loop1f90.f90 b/benchmarks/loop1f90.f90 deleted file mode 100644 index a788a5af..00000000 --- a/benchmarks/loop1f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop1_F90(N, x, y) - INTEGER i, N - REAL*8 x(N), y(N) - - x = sqrt(y) - RETURN - END - - - SUBROUTINE loop1_F90Overhead(N, x, y) - INTEGER i, N - REAL*8 x(N), y(N) - - RETURN - END diff --git a/benchmarks/loop2.cpp b/benchmarks/loop2.cpp deleted file mode 100644 index 1ea3e34e..00000000 --- a/benchmarks/loop2.cpp +++ /dev/null @@ -1,439 +0,0 @@ - -// loop2 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop2_f77 loop2_f77_ - #define loop2_f77overhead loop2_f77overhead_ - #define loop2_f90 loop2_f90_ - #define loop2_f90overhead loop2_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop2_f77 loop2_f77__ - #define loop2_f77overhead loop2_f77overhead__ - #define loop2_f90 loop2_f90__ - #define loop2_f90overhead loop2_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop2_f77 LOOP2_F77 - #define loop2_f77overhead LOOP2_F77OVERHEAD - #define loop2_f90 LOOP2_F90 - #define loop2_f90overhead LOOP2_F90OVERHEAD -#endif - -extern "C" { - void loop2_f77(const int& N, double* x, double* y, const double& u); - void loop2_f77overhead(const int& N, double* x, double* y, const double& u); - void loop2_f90(const int& N, double* x, double* y, const double& u); - void loop2_f90overhead(const int& N, double* x, double* y, const double& u); - -} - -void VectorVersion(BenchmarkExt& bench, double u); -void ArrayVersion(BenchmarkExt& bench, double u); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u); -void ArrayVersion_index(BenchmarkExt& bench, double u); -void doTinyVectorVersion(BenchmarkExt& bench, double u); -void F77Version(BenchmarkExt& bench, double u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop2: $x = $y/u", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop2.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y(tensor::i)/u;; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+2); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+2); - Array y(yfill(Range(1,N+1-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop2_f77(N, x, y, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop2_f77overhead(N, x, y, u); - - bench.stopOverhead(); - - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop2_f90(N, x, y, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop2_f90overhead(N, x, y, u); - - bench.stopOverhead(); - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop21.cpp b/benchmarks/loop21.cpp deleted file mode 100644 index 43d6fd2a..00000000 --- a/benchmarks/loop21.cpp +++ /dev/null @@ -1,504 +0,0 @@ - -// loop21 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop21_f77 loop21_f77_ - #define loop21_f77overhead loop21_f77overhead_ - #define loop21_f90 loop21_f90_ - #define loop21_f90overhead loop21_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop21_f77 loop21_f77__ - #define loop21_f77overhead loop21_f77overhead__ - #define loop21_f90 loop21_f90__ - #define loop21_f90overhead loop21_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop21_f77 LOOP21_F77 - #define loop21_f77overhead LOOP21_F77OVERHEAD - #define loop21_f90 LOOP21_F90 - #define loop21_f90overhead LOOP21_F90OVERHEAD -#endif - -extern "C" { - void loop21_f77(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop21_f77overhead(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop21_f90(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop21_f90overhead(const int& N, double* x, double* a, double* b, double* c, double* d); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop21: $x = $a*$b + $c*$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop21.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)*b(tensor::i) + c(tensor::i)*d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+5); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop21_f77(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop21_f77overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop21_f90(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop21_f90overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop21f.f b/benchmarks/loop21f.f deleted file mode 100644 index ce6ed7a1..00000000 --- a/benchmarks/loop21f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop21_F77(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - DO i=1,N - x(i) = a(i)*b(i) + c(i)*d(i); - END DO - RETURN - END - - - SUBROUTINE loop21_F77Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - RETURN - END diff --git a/benchmarks/loop21f90.f90 b/benchmarks/loop21f90.f90 deleted file mode 100644 index bc792a81..00000000 --- a/benchmarks/loop21f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop21_F90(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - x = a*b + c*d - RETURN - END - - - SUBROUTINE loop21_F90Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - RETURN - END diff --git a/benchmarks/loop22.cpp b/benchmarks/loop22.cpp deleted file mode 100644 index 60a2cfdb..00000000 --- a/benchmarks/loop22.cpp +++ /dev/null @@ -1,504 +0,0 @@ - -// loop22 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop22_f77 loop22_f77_ - #define loop22_f77overhead loop22_f77overhead_ - #define loop22_f90 loop22_f90_ - #define loop22_f90overhead loop22_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop22_f77 loop22_f77__ - #define loop22_f77overhead loop22_f77overhead__ - #define loop22_f90 loop22_f90__ - #define loop22_f90overhead loop22_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop22_f77 LOOP22_F77 - #define loop22_f77overhead LOOP22_F77OVERHEAD - #define loop22_f90 LOOP22_F90 - #define loop22_f90overhead LOOP22_F90OVERHEAD -#endif - -extern "C" { - void loop22_f77(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop22_f77overhead(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop22_f90(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop22_f90overhead(const int& N, double* x, double* a, double* b, double* c, double* d); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop22: $x = $x + $a*$b + $c*$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop22.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x(tensor::i) + a(tensor::i)*b(tensor::i) + c(tensor::i)*d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+5); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop22_f77(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop22_f77overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop22_f90(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop22_f90overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop22f.f b/benchmarks/loop22f.f deleted file mode 100644 index 367bce0a..00000000 --- a/benchmarks/loop22f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop22_F77(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - DO i=1,N - x(i) = x(i) + a(i)*b(i) + c(i)*d(i); - END DO - RETURN - END - - - SUBROUTINE loop22_F77Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - RETURN - END diff --git a/benchmarks/loop22f90.f90 b/benchmarks/loop22f90.f90 deleted file mode 100644 index 516f5260..00000000 --- a/benchmarks/loop22f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop22_F90(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - x = x + a*b + c*d - RETURN - END - - - SUBROUTINE loop22_F90Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - RETURN - END diff --git a/benchmarks/loop23.cpp b/benchmarks/loop23.cpp deleted file mode 100644 index 6ecc89a4..00000000 --- a/benchmarks/loop23.cpp +++ /dev/null @@ -1,526 +0,0 @@ - -// loop23 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop23_f77 loop23_f77_ - #define loop23_f77overhead loop23_f77overhead_ - #define loop23_f90 loop23_f90_ - #define loop23_f90overhead loop23_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop23_f77 loop23_f77__ - #define loop23_f77overhead loop23_f77overhead__ - #define loop23_f90 loop23_f90__ - #define loop23_f90overhead loop23_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop23_f77 LOOP23_F77 - #define loop23_f77overhead LOOP23_F77OVERHEAD - #define loop23_f90 LOOP23_F90 - #define loop23_f90overhead LOOP23_F90OVERHEAD -#endif - -extern "C" { - void loop23_f77(const int& N, double* x, double* a, double* b, double* c, double* d, double* y); - void loop23_f77overhead(const int& N, double* x, double* a, double* b, double* c, double* d, double* y); - void loop23_f90(const int& N, double* x, double* a, double* b, double* c, double* d, double* y); - void loop23_f90overhead(const int& N, double* x, double* a, double* b, double* c, double* d, double* y); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop23: $x = $a*$b + $c*$d; $y = $b+$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop23.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)*b(tensor::i) + c(tensor::i)*d(tensor::i); y = b(tensor::i)+d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+6); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+6); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+6); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+6); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+6); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - Array yfill(N+6); - Array y(yfill(Range(5,N+5-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop23_f77(N, x, a, b, c, d, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop23_f77overhead(N, x, a, b, c, d, y); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop23_f90(N, x, a, b, c, d, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop23_f90overhead(N, x, a, b, c, d, y); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop23f.f b/benchmarks/loop23f.f deleted file mode 100644 index 917847f4..00000000 --- a/benchmarks/loop23f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop23_F77(N, x, a, b, c, d, y) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N), y(N) - - DO i=1,N - x(i) = a(i)*b(i) + c(i)*d(i); y(i) = b(i)+d(i); - END DO - RETURN - END - - - SUBROUTINE loop23_F77Overhead(N, x, a, b, c, d, y) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N), y(N) - RETURN - END diff --git a/benchmarks/loop23f90.f90 b/benchmarks/loop23f90.f90 deleted file mode 100644 index b0969fe4..00000000 --- a/benchmarks/loop23f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop23_F90(N, x, a, b, c, d, y) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N), y(N) - - x = a*b + c*d; y = b+d - RETURN - END - - - SUBROUTINE loop23_F90Overhead(N, x, a, b, c, d, y) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N), y(N) - - RETURN - END diff --git a/benchmarks/loop24.cpp b/benchmarks/loop24.cpp deleted file mode 100644 index ea64f9af..00000000 --- a/benchmarks/loop24.cpp +++ /dev/null @@ -1,526 +0,0 @@ - -// loop24 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop24_f77 loop24_f77_ - #define loop24_f77overhead loop24_f77overhead_ - #define loop24_f90 loop24_f90_ - #define loop24_f90overhead loop24_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop24_f77 loop24_f77__ - #define loop24_f77overhead loop24_f77overhead__ - #define loop24_f90 loop24_f90__ - #define loop24_f90overhead loop24_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop24_f77 LOOP24_F77 - #define loop24_f77overhead LOOP24_F77OVERHEAD - #define loop24_f90 LOOP24_F90 - #define loop24_f90overhead LOOP24_F90OVERHEAD -#endif - -extern "C" { - void loop24_f77(const int& N, double* x, double* a, double* b, double* c, double* d, double* y); - void loop24_f77overhead(const int& N, double* x, double* a, double* b, double* c, double* d, double* y); - void loop24_f90(const int& N, double* x, double* a, double* b, double* c, double* d, double* y); - void loop24_f90overhead(const int& N, double* x, double* a, double* b, double* c, double* d, double* y); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop24: $x = $a*$c - $b*$c; $y = $a*$d + $b+$c", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 6 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop24.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)*c(tensor::i) - b(tensor::i)*c(tensor::i); y = a(tensor::i)*d(tensor::i) + b(tensor::i)+c(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+6); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+6); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+6); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+6); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+6); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - Array yfill(N+6); - Array y(yfill(Range(5,N+5-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop24_f77(N, x, a, b, c, d, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop24_f77overhead(N, x, a, b, c, d, y); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop24_f90(N, x, a, b, c, d, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop24_f90overhead(N, x, a, b, c, d, y); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop24f.f b/benchmarks/loop24f.f deleted file mode 100644 index bd66aaca..00000000 --- a/benchmarks/loop24f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop24_F77(N, x, a, b, c, d, y) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N), y(N) - - DO i=1,N - x(i) = a(i)*c(i) - b(i)*c(i); y(i) = a(i)*d(i) + b(i)+c(i); - END DO - RETURN - END - - - SUBROUTINE loop24_F77Overhead(N, x, a, b, c, d, y) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N), y(N) - RETURN - END diff --git a/benchmarks/loop24f90.f90 b/benchmarks/loop24f90.f90 deleted file mode 100644 index 436950a1..00000000 --- a/benchmarks/loop24f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop24_F90(N, x, a, b, c, d, y) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N), y(N) - - x = a*c - b*c; y = a*d + b+c - RETURN - END - - - SUBROUTINE loop24_F90Overhead(N, x, a, b, c, d, y) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N), y(N) - - RETURN - END diff --git a/benchmarks/loop25.cpp b/benchmarks/loop25.cpp deleted file mode 100644 index e1ec38f2..00000000 --- a/benchmarks/loop25.cpp +++ /dev/null @@ -1,507 +0,0 @@ - -// loop25 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop25_f77 loop25_f77_ - #define loop25_f77overhead loop25_f77overhead_ - #define loop25_f90 loop25_f90_ - #define loop25_f90overhead loop25_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop25_f77 loop25_f77__ - #define loop25_f77overhead loop25_f77overhead__ - #define loop25_f90 loop25_f90__ - #define loop25_f90overhead loop25_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop25_f77 LOOP25_F77 - #define loop25_f77overhead LOOP25_F77OVERHEAD - #define loop25_f90 LOOP25_F90 - #define loop25_f90overhead LOOP25_F90OVERHEAD -#endif - -extern "C" { - void loop25_f77(const int& N, double* x, double* a, double* b, double* c, double* y, const double& u, const double& v, const double& w); - void loop25_f77overhead(const int& N, double* x, double* a, double* b, double* c, double* y, const double& u, const double& v, const double& w); - void loop25_f90(const int& N, double* x, double* a, double* b, double* c, double* y, const double& u, const double& v, const double& w); - void loop25_f90overhead(const int& N, double* x, double* a, double* b, double* c, double* y, const double& u, const double& v, const double& w); - -} - -void VectorVersion(BenchmarkExt& bench, double u, double v, double w); -void ArrayVersion(BenchmarkExt& bench, double u, double v, double w); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u, double v, double w); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u, double v, double w); -void ArrayVersion_index(BenchmarkExt& bench, double u, double v, double w); -void doTinyVectorVersion(BenchmarkExt& bench, double u, double v, double w); -void F77Version(BenchmarkExt& bench, double u, double v, double w); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u, double v, double w); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u, double v, double w); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop25: $x = u*$b; $y = v*$b + w*$a + u*$c", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 6 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - double v = 0.39123982498157938742; - double w = 0.39123982498157938742; - - - ArrayVersion(bench, u, v, w); - ArrayVersion_unaligned(bench, u, v, w); - ArrayVersion_misaligned(bench, u, v, w); - ArrayVersion_index(bench, u, v, w); - //doTinyVectorVersion(bench, u, v, w); - F77Version(bench, u, v, w); -#ifdef FORTRAN_90 - F90Version(bench, u, v, w); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u, v, w); -#endif - - if(runvector) - VectorVersion(bench, u, v, w); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop25.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u, double v, double w) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u, double v, double w) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u, double v, double w) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b(tensor::i); y = v*b(tensor::i) + w*a(tensor::i) + u*c(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u, double v, double w) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u, double v, double w) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array yfill(N+5); - Array y(yfill(Range(4,N+4-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u, double v, double w) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u, double v, double w) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop25_f77(N, x, a, b, c, y, u, v, w); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop25_f77overhead(N, x, a, b, c, y, u, v, w); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u, double v, double w) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop25_f90(N, x, a, b, c, y, u, v, w); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop25_f90overhead(N, x, a, b, c, y, u, v, w); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop25f.f b/benchmarks/loop25f.f deleted file mode 100644 index fac6bac0..00000000 --- a/benchmarks/loop25f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop25_F77(N, x, a, b, c, y, u, v, w) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), y(N), u, v, w - - DO i=1,N - x(i) = u*b(i); y(i) = v*b(i) + w*a(i) + u*c(i); - END DO - RETURN - END - - - SUBROUTINE loop25_F77Overhead(N, x, a, b, c, y, u, v, w) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), y(N), u, v, w - RETURN - END diff --git a/benchmarks/loop25f90.f90 b/benchmarks/loop25f90.f90 deleted file mode 100644 index 8835ceed..00000000 --- a/benchmarks/loop25f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop25_F90(N, x, a, b, c, y, u, v, w) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), y(N), u, v, w - - x = u*b; y = v*b + w*a + u*c - RETURN - END - - - SUBROUTINE loop25_F90Overhead(N, x, a, b, c, y, u, v, w) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), y(N), u, v, w - - RETURN - END diff --git a/benchmarks/loop2f.f b/benchmarks/loop2f.f deleted file mode 100644 index 36d25931..00000000 --- a/benchmarks/loop2f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop2_F77(N, x, y, u) - INTEGER i, N - REAL*8 x(N), y(N), u - - DO i=1,N - x(i) = y(i)/u; - END DO - RETURN - END - - - SUBROUTINE loop2_F77Overhead(N, x, y, u) - INTEGER i, N - REAL*8 x(N), y(N), u - RETURN - END diff --git a/benchmarks/loop2f90.f90 b/benchmarks/loop2f90.f90 deleted file mode 100644 index d82513d3..00000000 --- a/benchmarks/loop2f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop2_F90(N, x, y, u) - INTEGER i, N - REAL*8 x(N), y(N), u - - x = y/u - RETURN - END - - - SUBROUTINE loop2_F90Overhead(N, x, y, u) - INTEGER i, N - REAL*8 x(N), y(N), u - - RETURN - END diff --git a/benchmarks/loop3.cpp b/benchmarks/loop3.cpp deleted file mode 100644 index ef5bc3a3..00000000 --- a/benchmarks/loop3.cpp +++ /dev/null @@ -1,439 +0,0 @@ - -// loop3 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop3_f77 loop3_f77_ - #define loop3_f77overhead loop3_f77overhead_ - #define loop3_f90 loop3_f90_ - #define loop3_f90overhead loop3_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop3_f77 loop3_f77__ - #define loop3_f77overhead loop3_f77overhead__ - #define loop3_f90 loop3_f90__ - #define loop3_f90overhead loop3_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop3_f77 LOOP3_F77 - #define loop3_f77overhead LOOP3_F77OVERHEAD - #define loop3_f90 LOOP3_F90 - #define loop3_f90overhead LOOP3_F90OVERHEAD -#endif - -extern "C" { - void loop3_f77(const int& N, double* x, double* y, const double& a); - void loop3_f77overhead(const int& N, double* x, double* y, const double& a); - void loop3_f90(const int& N, double* x, double* y, const double& a); - void loop3_f90overhead(const int& N, double* x, double* y, const double& a); - -} - -void VectorVersion(BenchmarkExt& bench, double a); -void ArrayVersion(BenchmarkExt& bench, double a); -void ArrayVersion_unaligned(BenchmarkExt& bench, double a); -void ArrayVersion_misaligned(BenchmarkExt& bench, double a); -void ArrayVersion_index(BenchmarkExt& bench, double a); -void doTinyVectorVersion(BenchmarkExt& bench, double a); -void F77Version(BenchmarkExt& bench, double a); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double a); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double a); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop3: $y = $y + a*$x", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 2 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double a = 0.39123982498157938742; - - - ArrayVersion(bench, a); - ArrayVersion_unaligned(bench, a); - ArrayVersion_misaligned(bench, a); - ArrayVersion_index(bench, a); - //doTinyVectorVersion(bench, a); - F77Version(bench, a); -#ifdef FORTRAN_90 - F90Version(bench, a); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, a); -#endif - - if(runvector) - VectorVersion(bench, a); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop3.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y(tensor::i) + a*x(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+2); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+2); - Array y(yfill(Range(1,N+1-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop3_f77(N, x, y, a); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop3_f77overhead(N, x, y, a); - - bench.stopOverhead(); - - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop3_f90(N, x, y, a); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop3_f90overhead(N, x, y, a); - - bench.stopOverhead(); - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop36.cpp b/benchmarks/loop36.cpp deleted file mode 100644 index 1bf13c21..00000000 --- a/benchmarks/loop36.cpp +++ /dev/null @@ -1,438 +0,0 @@ - -// loop36 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop36_f77 loop36_f77_ - #define loop36_f77overhead loop36_f77overhead_ - #define loop36_f90 loop36_f90_ - #define loop36_f90overhead loop36_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop36_f77 loop36_f77__ - #define loop36_f77overhead loop36_f77overhead__ - #define loop36_f90 loop36_f90__ - #define loop36_f90overhead loop36_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop36_f77 LOOP36_F77 - #define loop36_f77overhead LOOP36_F77OVERHEAD - #define loop36_f90 LOOP36_F90 - #define loop36_f90overhead LOOP36_F90OVERHEAD -#endif - -extern "C" { - void loop36_f77(const int& N, double* x, double* e); - void loop36_f77overhead(const int& N, double* x, double* e); - void loop36_f90(const int& N, double* x, double* e); - void loop36_f90overhead(const int& N, double* x, double* e); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop36: $x = exp($e)", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop36.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector e(N); - initializeRandomDouble(e.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array e(N); - initializeRandomDouble(e.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array e(N); - initializeRandomDouble(e.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e(tensor::i));; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array efill(N+1); - Array e(efill(Range(1,N))); - initializeRandomDouble(e.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+2); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array efill(N+2); - Array e(efill(Range(1,N+1-1))); - initializeRandomDouble(e.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray e(N); - initializeRandomDouble(e, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* e = new double[N]; - initializeRandomDouble(e, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop36_f77(N, x, e); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop36_f77overhead(N, x, e); - - bench.stopOverhead(); - - delete [] x; - delete [] e; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* e = new double[N]; - initializeRandomDouble(e, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop36_f90(N, x, e); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop36_f90overhead(N, x, e); - - bench.stopOverhead(); - delete [] x; - delete [] e; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop36f.f b/benchmarks/loop36f.f deleted file mode 100644 index 225e734c..00000000 --- a/benchmarks/loop36f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop36_F77(N, x, e) - INTEGER i, N - REAL*8 x(N), e(N) - - DO i=1,N - x(i) = exp(e(i)); - END DO - RETURN - END - - - SUBROUTINE loop36_F77Overhead(N, x, e) - INTEGER i, N - REAL*8 x(N), e(N) - RETURN - END diff --git a/benchmarks/loop36f90.f90 b/benchmarks/loop36f90.f90 deleted file mode 100644 index af9c4b10..00000000 --- a/benchmarks/loop36f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop36_F90(N, x, e) - INTEGER i, N - REAL*8 x(N), e(N) - - x = exp(e) - RETURN - END - - - SUBROUTINE loop36_F90Overhead(N, x, e) - INTEGER i, N - REAL*8 x(N), e(N) - - RETURN - END diff --git a/benchmarks/loop3f.f b/benchmarks/loop3f.f deleted file mode 100644 index ca99521c..00000000 --- a/benchmarks/loop3f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop3_F77(N, x, y, a) - INTEGER i, N - REAL*8 x(N), y(N), a - - DO i=1,N - y(i) = y(i) + a*x(i); - END DO - RETURN - END - - - SUBROUTINE loop3_F77Overhead(N, x, y, a) - INTEGER i, N - REAL*8 x(N), y(N), a - RETURN - END diff --git a/benchmarks/loop3f90.f90 b/benchmarks/loop3f90.f90 deleted file mode 100644 index 0a676018..00000000 --- a/benchmarks/loop3f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop3_F90(N, x, y, a) - INTEGER i, N - REAL*8 x(N), y(N), a - - y = y + a*x - RETURN - END - - - SUBROUTINE loop3_F90Overhead(N, x, y, a) - INTEGER i, N - REAL*8 x(N), y(N), a - - RETURN - END diff --git a/benchmarks/loop4.cpp b/benchmarks/loop4.cpp deleted file mode 100644 index eac7b78d..00000000 --- a/benchmarks/loop4.cpp +++ /dev/null @@ -1,240 +0,0 @@ -// In KAI C++ 3.2, restrict causes problems for copy propagation. -// Temporary fix: disable restrict - -#define BZ_DISABLE_RESTRICT - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -using namespace blitz; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop4_f77 loop4_f77_ - #define loop4_f90 loop4_f90_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop4_f77 loop4_f77__ - #define loop4_f90 loop4_f90__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop4_f77 LOOP4_F77 - #define loop4_f90 LOOP4_F90 -#endif - -extern "C" { - void loop4_f77(const int& niters, const int& N, double* x, double* a, double* b); - - void loop4_f90(const int& niters, const int& N, double* x, double* a, double* b); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -void F90Version(BenchmarkExt& bench); - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -int main() -{ -#ifdef BENCHMARK_VALARRAY - int numBenchmarks = 5; -#else - int numBenchmarks = 4; -#endif - - BenchmarkExt bench("$x=$a+$b", numBenchmarks); - - const int numSizes = 19; - bench.setNumParameters(numSizes); - bench.setRateDescription("Mflops/s"); - - Vector parameters(numSizes); - Vector iters(numSizes); - Vector flops(numSizes); - - for (int i=0; i < numSizes; ++i) - { - parameters[i] = pow(10.0, (i+1)/4.0); - iters[i] = 50000000L / parameters[i]; - if (iters[i] < 2) - iters[i] = 2; - flops[i] = 1 * parameters[i]; - } - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - - bench.beginBenchmarking(); - - - VectorVersion(bench); - ArrayVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop4.m"); - - return 0; -} - -void initializeRandomDouble(double* data, int numElements, int stride = 1) -{ - static Random rnd; - - for (int i=0; i < numElements; ++i) - data[i*stride] = rnd.random(); -} - -template -void initializeArray(T& array, int numElements) -{ - static Random rnd; - - for (size_t i=0; i < numElements; ++i) - array[i] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << "Vector: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Vector x(N); - initializeArray(x); - Vector a(N); - initializeArray(a); - Vector b(N); - initializeArray(b); - - bench.start(); - for (long i=0; i < iters; ++i) - { - x=a+b; - } - bench.stop(); - } - - bench.endImplementation(); -} - -void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << "Array: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array x(N); - initializeArray(x); - Array a(N); - initializeArray(a); - Array b(N); - initializeArray(b); - - bench.start(); - for (long i=0; i < iters; ++i) - { - x=a+b; - } - bench.stop(); - } - - bench.endImplementation(); -} - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 77: N = " << N << endl; - cout.flush(); - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - bench.start(); - loop4_f77(iters, N, x, a, b); - bench.stop(); - - delete [] x; - delete [] a; - delete [] b; - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 90: N = " << N << endl; - cout.flush(); - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - bench.start(); - loop4_f90(iters, N, x, a, b); - bench.stop(); - - delete [] x; - delete [] a; - delete [] b; - } - - bench.endImplementation(); -} -#endif diff --git a/benchmarks/loop4f.f b/benchmarks/loop4f.f deleted file mode 100644 index 234d46af..00000000 --- a/benchmarks/loop4f.f +++ /dev/null @@ -1,12 +0,0 @@ - SUBROUTINE loop4_F77(niters, N, x, a, b) - INTEGER i, iter, niters, N - REAL*8 x(N), a(N), b(N) - - DO iter=1,niters - DO i=1,N - x(i)=a(i)+b(i) - END DO - END DO - - RETURN - END diff --git a/benchmarks/loop4f90.f90 b/benchmarks/loop4f90.f90 deleted file mode 100644 index b611208e..00000000 --- a/benchmarks/loop4f90.f90 +++ /dev/null @@ -1,10 +0,0 @@ - SUBROUTINE loop4_F90(niters, N, x, a, b) - INTEGER i, iter, niters, N - REAL*8 x(N), a(N), b(N) - - DO iter=1,niters - x=a+b - END DO - - RETURN - END diff --git a/benchmarks/loop5.cpp b/benchmarks/loop5.cpp deleted file mode 100644 index 6574baad..00000000 --- a/benchmarks/loop5.cpp +++ /dev/null @@ -1,460 +0,0 @@ - -// loop5 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop5_f77 loop5_f77_ - #define loop5_f77overhead loop5_f77overhead_ - #define loop5_f90 loop5_f90_ - #define loop5_f90overhead loop5_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop5_f77 loop5_f77__ - #define loop5_f77overhead loop5_f77overhead__ - #define loop5_f90 loop5_f90__ - #define loop5_f90overhead loop5_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop5_f77 LOOP5_F77 - #define loop5_f77overhead LOOP5_F77OVERHEAD - #define loop5_f90 LOOP5_F90 - #define loop5_f90overhead LOOP5_F90OVERHEAD -#endif - -extern "C" { - void loop5_f77(const int& N, double* x, double* a, double* b); - void loop5_f77overhead(const int& N, double* x, double* a, double* b); - void loop5_f90(const int& N, double* x, double* a, double* b); - void loop5_f90overhead(const int& N, double* x, double* a, double* b); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop5: $x = $a+$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop5.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+3); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+3); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+3); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop5_f77(N, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop5_f77overhead(N, x, a, b); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop5_f90(N, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop5_f90overhead(N, x, a, b); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop5f.f b/benchmarks/loop5f.f deleted file mode 100644 index 8e22b5b2..00000000 --- a/benchmarks/loop5f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop5_F77(N, x, a, b) - INTEGER i, N - REAL*8 x(N), a(N), b(N) - - DO i=1,N - x(i) = a(i)+b(i); - END DO - RETURN - END - - - SUBROUTINE loop5_F77Overhead(N, x, a, b) - INTEGER i, N - REAL*8 x(N), a(N), b(N) - RETURN - END diff --git a/benchmarks/loop5f90.f90 b/benchmarks/loop5f90.f90 deleted file mode 100644 index d770bac0..00000000 --- a/benchmarks/loop5f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop5_F90(N, x, a, b) - INTEGER i, N - REAL*8 x(N), a(N), b(N) - - x = a+b - RETURN - END - - - SUBROUTINE loop5_F90Overhead(N, x, a, b) - INTEGER i, N - REAL*8 x(N), a(N), b(N) - - RETURN - END diff --git a/benchmarks/loop6.cpp b/benchmarks/loop6.cpp deleted file mode 100644 index 956f3f55..00000000 --- a/benchmarks/loop6.cpp +++ /dev/null @@ -1,460 +0,0 @@ - -// loop6 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop6_f77 loop6_f77_ - #define loop6_f77overhead loop6_f77overhead_ - #define loop6_f90 loop6_f90_ - #define loop6_f90overhead loop6_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop6_f77 loop6_f77__ - #define loop6_f77overhead loop6_f77overhead__ - #define loop6_f90 loop6_f90__ - #define loop6_f90overhead loop6_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop6_f77 LOOP6_F77 - #define loop6_f77overhead LOOP6_F77OVERHEAD - #define loop6_f90 LOOP6_F90 - #define loop6_f90overhead LOOP6_F90OVERHEAD -#endif - -extern "C" { - void loop6_f77(const int& N, double* x, double* a, double* b); - void loop6_f77overhead(const int& N, double* x, double* a, double* b); - void loop6_f90(const int& N, double* x, double* a, double* b); - void loop6_f90overhead(const int& N, double* x, double* a, double* b); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop6: $x = $a*$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop6.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)*b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+3); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+3); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+3); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop6_f77(N, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop6_f77overhead(N, x, a, b); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop6_f90(N, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop6_f90overhead(N, x, a, b); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop6f.f b/benchmarks/loop6f.f deleted file mode 100644 index 306ac241..00000000 --- a/benchmarks/loop6f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop6_F77(N, x, a, b) - INTEGER i, N - REAL*8 x(N), a(N), b(N) - - DO i=1,N - x(i) = a(i)*b(i); - END DO - RETURN - END - - - SUBROUTINE loop6_F77Overhead(N, x, a, b) - INTEGER i, N - REAL*8 x(N), a(N), b(N) - RETURN - END diff --git a/benchmarks/loop6f90.f90 b/benchmarks/loop6f90.f90 deleted file mode 100644 index 0b0800cc..00000000 --- a/benchmarks/loop6f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop6_F90(N, x, a, b) - INTEGER i, N - REAL*8 x(N), a(N), b(N) - - x = a*b - RETURN - END - - - SUBROUTINE loop6_F90Overhead(N, x, a, b) - INTEGER i, N - REAL*8 x(N), a(N), b(N) - - RETURN - END diff --git a/benchmarks/loop8.cpp b/benchmarks/loop8.cpp deleted file mode 100644 index 3c735f9e..00000000 --- a/benchmarks/loop8.cpp +++ /dev/null @@ -1,439 +0,0 @@ - -// loop8 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop8_f77 loop8_f77_ - #define loop8_f77overhead loop8_f77overhead_ - #define loop8_f90 loop8_f90_ - #define loop8_f90overhead loop8_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop8_f77 loop8_f77__ - #define loop8_f77overhead loop8_f77overhead__ - #define loop8_f90 loop8_f90__ - #define loop8_f90overhead loop8_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop8_f77 LOOP8_F77 - #define loop8_f77overhead LOOP8_F77OVERHEAD - #define loop8_f90 LOOP8_F90 - #define loop8_f90overhead LOOP8_F90OVERHEAD -#endif - -extern "C" { - void loop8_f77(const int& N, double* a, double* x, const double& u); - void loop8_f77overhead(const int& N, double* a, double* x, const double& u); - void loop8_f90(const int& N, double* a, double* x, const double& u); - void loop8_f90overhead(const int& N, double* a, double* x, const double& u); - -} - -void VectorVersion(BenchmarkExt& bench, double u); -void ArrayVersion(BenchmarkExt& bench, double u); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u); -void ArrayVersion_index(BenchmarkExt& bench, double u); -void doTinyVectorVersion(BenchmarkExt& bench, double u); -void F77Version(BenchmarkExt& bench, double u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop8: $x = u/$a", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop8.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+2); - Array a(afill(Range(0,N+0-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array xfill(N+2); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray a(N); - initializeRandomDouble(a, N); - valarray x(N); - initializeRandomDouble(x, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* a = new double[N]; - initializeRandomDouble(a, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop8_f77(N, a, x, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop8_f77overhead(N, a, x, u); - - bench.stopOverhead(); - - delete [] a; - delete [] x; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* a = new double[N]; - initializeRandomDouble(a, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop8_f90(N, a, x, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop8_f90overhead(N, a, x, u); - - bench.stopOverhead(); - delete [] a; - delete [] x; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop8f.f b/benchmarks/loop8f.f deleted file mode 100644 index 2dca941a..00000000 --- a/benchmarks/loop8f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop8_F77(N, a, x, u) - INTEGER i, N - REAL*8 a(N), x(N), u - - DO i=1,N - x(i) = u/a(i); - END DO - RETURN - END - - - SUBROUTINE loop8_F77Overhead(N, a, x, u) - INTEGER i, N - REAL*8 a(N), x(N), u - RETURN - END diff --git a/benchmarks/loop8f90.f90 b/benchmarks/loop8f90.f90 deleted file mode 100644 index 1c28528d..00000000 --- a/benchmarks/loop8f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop8_F90(N, a, x, u) - INTEGER i, N - REAL*8 a(N), x(N), u - - x = u/a - RETURN - END - - - SUBROUTINE loop8_F90Overhead(N, a, x, u) - INTEGER i, N - REAL*8 a(N), x(N), u - - RETURN - END diff --git a/benchmarks/loop9.cpp b/benchmarks/loop9.cpp deleted file mode 100644 index c1774656..00000000 --- a/benchmarks/loop9.cpp +++ /dev/null @@ -1,438 +0,0 @@ - -// loop9 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop9_f77 loop9_f77_ - #define loop9_f77overhead loop9_f77overhead_ - #define loop9_f90 loop9_f90_ - #define loop9_f90overhead loop9_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop9_f77 loop9_f77__ - #define loop9_f77overhead loop9_f77overhead__ - #define loop9_f90 loop9_f90__ - #define loop9_f90overhead loop9_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop9_f77 LOOP9_F77 - #define loop9_f77overhead LOOP9_F77OVERHEAD - #define loop9_f90 LOOP9_F90 - #define loop9_f90overhead LOOP9_F90OVERHEAD -#endif - -extern "C" { - void loop9_f77(const int& N, double* a, double* x); - void loop9_f77overhead(const int& N, double* a, double* x); - void loop9_f90(const int& N, double* a, double* x); - void loop9_f90overhead(const int& N, double* a, double* x); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop9: $x = $x+$a", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop9.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x(tensor::i)+a(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+2); - Array a(afill(Range(0,N+0-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array xfill(N+2); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray a(N); - initializeRandomDouble(a, N); - valarray x(N); - initializeRandomDouble(x, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* a = new double[N]; - initializeRandomDouble(a, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop9_f77(N, a, x); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop9_f77overhead(N, a, x); - - bench.stopOverhead(); - - delete [] a; - delete [] x; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* a = new double[N]; - initializeRandomDouble(a, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop9_f90(N, a, x); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop9_f90overhead(N, a, x); - - bench.stopOverhead(); - delete [] a; - delete [] x; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop9f.f b/benchmarks/loop9f.f deleted file mode 100644 index 9d03021a..00000000 --- a/benchmarks/loop9f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop9_F77(N, a, x) - INTEGER i, N - REAL*8 a(N), x(N) - - DO i=1,N - x(i) = x(i)+a(i); - END DO - RETURN - END - - - SUBROUTINE loop9_F77Overhead(N, a, x) - INTEGER i, N - REAL*8 a(N), x(N) - RETURN - END diff --git a/benchmarks/loop9f90.f90 b/benchmarks/loop9f90.f90 deleted file mode 100644 index 1c435b66..00000000 --- a/benchmarks/loop9f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop9_F90(N, a, x) - INTEGER i, N - REAL*8 a(N), x(N) - - x = x+a - RETURN - END - - - SUBROUTINE loop9_F90Overhead(N, a, x) - INTEGER i, N - REAL*8 a(N), x(N) - - RETURN - END diff --git a/benchmarks/loops.data b/benchmarks/loops.data deleted file mode 100644 index 5948e033..00000000 --- a/benchmarks/loops.data +++ /dev/null @@ -1,147 +0,0 @@ -begin loop1 -array x y -flops 1 -loop $x = sqrt($y) -end - -begin loop2 -array x y -scalar u -flops 1 -loop $x=$y/$u -end - -begin loop3 -array x y -scalar a -flops 2 -loop $y=$y+$a*$x -end - -begin loop5 -array x a b -flops 1 -loop $x=$a+$b -end - -begin loop6 -array x a b -flops 1 -loop $x=$a*$b -end - -begin loop8 -array a x -scalar u -flops 1 -loop $x=u/$a -end - -begin loop9 -array a x -flops 1 -loop $x=$x+$a -end - -begin loop10 -array x a b c -scalar u -flops 3 -loop $x=u+$a+$b+$c -end - -begin loop11 -array x a b c d -flops 3 -loop $x=$a+$b+$c+$d -end - -begin loop12 -array y a x b c d -scalar u -flops 4 -loop $y = u + $a; $x = $a + $b + $c + $d -end - -begin loop13 -array x a b c d y -scalar u -loop $x=$a+$b+$c+$d; $y=u+$d -flops 4 -end - -begin loop14 -array x a b y -loop $x=$a+$b; $y=$a-$b -flops 2 -end - -begin loop15 -array x c a b -flops 2 -loop $x=$c+$a*$b -flops 3 -end - -begin loop16 -array x a b c y -scalar u -loop $x=$a+$b+$c; $y=$x+$c+u -flops 4 -end - -begin loop17 -array x a b c d -flops 3 -loop $x=($a+$b)*($c+$d) -end - -begin loop18 -array x a b -scalar u v -flops 3 -loop $x = (u + $a) * (v + $b) -end - -begin loop19 -array x a b y -scalar u v -flops 2 -loop $x=u*$a; $y=v*$b -end - -begin loop21 -array x a b c d -flops 3 -loop $x = $a * $b + $c * $d -end - -begin loop22 -array x a b c d -flops 4 -loop $x = $x + $a * $b + $c * $d -end - -begin loop23 -array x a b c d y -flops 4 -loop $x=$a*$b+$c*$d; $y=$b+$d -end - -begin loop24 -array x a b c d y -flops 6 -loop $x=$a*$c-$b*$d; $y=$a*$d+$b*$c -end - -begin loop25 -array x a b c y -scalar u v w -loop $x=u*$b; $y=v*$b+w*$a+u*$c -end - -begin loop36 -array x e -flops 1 -loop $x = exp($e) -end diff --git a/benchmarks/loopstruct.cpp b/benchmarks/loopstruct.cpp deleted file mode 100644 index 705b1145..00000000 --- a/benchmarks/loopstruct.cpp +++ /dev/null @@ -1,172 +0,0 @@ -// CC -64 -LANG:std -LANG:restrict -Ofast -PHASE:clist -IPA=off -IPA:INLINE=off -// -// When compiled with the above options, these are the results on convex: -// func1: 34.6484 -// func2: 24.6603 -// func3: 17.2822 -// -// func1 is a simple C-style loop. -// func2 has the pointers stuck inside a struct. Prefetching no longer occurs. -// func3 has the pointers inside "iterator" structs, and a read is done -// using an inline operator(). This somehow results in the loop -// being unrolled only twice, instead of 4 times. - -#include -#include - -// Paste the Timer class in here so you don't have to have Blitz++ - -class Timer { - -public: - Timer() - { - state_ = uninitialized; - } - - void start() - { - state_ = running; - t1_ = systemTime(); - } - - void stop() - { - t2_ = systemTime(); - state_ = stopped; - } - -/* Compaq cxx compiler in ansi mode cannot print out long double type! */ -#if defined(__DECCXX) - double elapsedSeconds() -#else - long double elapsedSeconds() -#endif - { - return t2_ - t1_; - } - -private: - Timer(Timer&) { } - void operator=(Timer&) { } - - long double systemTime() - { - getrusage(RUSAGE_SELF, &resourceUsage_); - double seconds = resourceUsage_.ru_utime.tv_sec - + resourceUsage_.ru_stime.tv_sec; - double micros = resourceUsage_.ru_utime.tv_usec - + resourceUsage_.ru_stime.tv_usec; - return seconds + micros/1.0e6; - } - - enum { uninitialized, running, stopped } state_; - - struct rusage resourceUsage_; - long double t1_, t2_; -}; - - -// func1: Simple version - -void func1(double* restrict x, double* restrict y, double a, int N) -{ - for (int i=0; i < N; ++i) - y[i] += a*x[i]; -} - - -// func2: With pointers inside a struct - -struct A { - double* restrict x; - double* restrict y; - double a; - int N; -}; - -void func2(A& z) -{ - for (int i=0; i < z.N; ++i) - z.y[i] += z.a * z.x[i]; -} - - -// func3: with very simple "iterators" (the B struct). - -struct B { - int q; - double* restrict data; - double operator()(int i) - { return data[i]; } -}; - -struct C { - B x; - B y; - double a; - int N; -}; - -void func3(C& z) -{ - for (int i=0; i < z.N; ++i) - z.y.data[i] += z.a * z.x(i); -} - - -// Initialize array - -void init(double* x, int N) -{ - for (int i=0; i < N; ++i) - x[i] = 1.0; -} - -int main() -{ - Timer timer; - - int N = 1000000; - int iters = 20; - double Mflops = N * iters * 2 / 1000000.; - - double* x = new double[N]; - double* y = new double[N]; - double a = .14989182; - init(x,N); - init(y,N); - - timer.start(); - for (int i=0; i < iters; ++i) - func1(x,y,a,N); - timer.stop(); - - cout << "func1: " << Mflops/timer.elapsedSeconds() << endl; - - timer.start(); - A z; - z.x = x; - z.y = y; - z.a = a; - z.N = N; - for (int i=0; i < iters; ++i) - func2(z); - timer.stop(); - - cout << "func2: " << Mflops/timer.elapsedSeconds() << endl; - - timer.start(); - { - C z; - z.x.data = x; - z.y.data = y; - z.a = a; - z.N = N; - for (int i=0; i < iters; ++i) - func3(z); - } - timer.stop(); - cout << "func3: " << Mflops/timer.elapsedSeconds() << endl; -} - diff --git a/benchmarks/looptest.cpp b/benchmarks/looptest.cpp deleted file mode 100644 index fcd23ffe..00000000 --- a/benchmarks/looptest.cpp +++ /dev/null @@ -1,707 +0,0 @@ -#include - -using namespace blitz; - -void initialize(double& c, double& d, double* a, double* b, int& N); - -template -void sink(T&) -{ } - -void benchmarkLoops(int, long); - -int main() -{ - cout << "This program measures the performance of DAXPY operations" - << endl << "using various C loop structures." << endl << endl; - - cout << endl << "In-cache:" << endl; - - benchmarkLoops(400,50000); - - cout << endl << "Out of cache:" << endl; - - benchmarkLoops(1000000,50); - - return 0; -} - -void benchmarkLoops(int N, long iterations) -{ - double* _bz_restrict a = new double[N]; - double* _bz_restrict b = new double[N]; - double c, d; - double t1, t2; - - initialize(c, d, a, b, N); - - double mflops = iterations * 4.0 * N / (1024.0 * 1024.0); - - Timer timer; - - cout << "Mflops/s Description" << endl; - - long iter; - int i; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - for (i=0; i < N; ++i) - a[i] += c * b[i]; - - for (i=0; i < N; ++i) - a[i] += d * b[i]; - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, indirection, unit stride" << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - for (i=0; i < N; ++i) - a[i] = a[i] + c * b[i]; - - for (i=0; i < N; ++i) - a[i] = a[i] + d * b[i]; - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, indirection, unit stride, no +=" << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - for (i=N-1; i >= 0; --i) - a[i] += c * b[i]; - - for (i=N-1; i >= 0; --i) - a[i] += d * b[i]; - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, indirection, unit stride, backwards loops" << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double c2 = c; - - int n1 = N & 3; - for (i=0; i < n1; ++i) - a[i] += c2 * b[i]; - - for (; i < N; i += 4) - { - a[i] += c2 * b[i]; - a[i+1] += c2 * b[i+1]; - a[i+2] += c2 * b[i+2]; - a[i+3] += c2 * b[i+3]; - } - - double d2 = d; - int n2 = N & 3; - for (i=0; i < n2; ++i) - a[i] += d2 * b[i]; - - for (; i < N; i += 4) - { - a[i] += d2 * b[i]; - a[i+1] += d2 * b[i+1]; - a[i+2] += d2 * b[i+2]; - a[i+3] += d2 * b[i+3]; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, unroll=4, unit stride, constants loaded into temps" - << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double c2 = c; - - int n1 = N & 3; - for (i=0; i < n1; ++i) - a[i] += c2 * b[i]; - - for (; i < N; i += 4) - { - double t1 = c2 * b[i]; - double t2 = c2 * b[i+1]; - double t3 = c2 * b[i+2]; - double t4 = c2 * b[i+3]; - - a[i] += t1; - a[i+1] += t2; - a[i+2] += t3; - a[i+3] += t4; - } - - double d2 = d; - int n2 = N & 3; - for (i=0; i < n2; ++i) - a[i] += d2 * b[i]; - - for (; i < N; i += 4) - { - double t1 = d2 * b[i]; - double t2 = d2 * b[i+1]; - double t3 = d2 * b[i+2]; - double t4 = d2 * b[i+3]; - - a[i] += t1; - a[i+1] += t2; - a[i+2] += t3; - a[i+3] += t4; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, unroll=4, unit stride, constants loaded into temps," - << endl << "\t\t4 read then 4 write" - << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double c2 = c; - - int n1 = N & 3; - for (i=0; i < n1; ++i) - a[i] += c2 * b[i]; - - for (; i < N; i += 4) - { - a[i] = a[i] + c2 * b[i]; - a[i+1] = a[i+1] + c2 * b[i+1]; - a[i+2] = a[i+2] + c2 * b[i+2]; - a[i+3] = a[i+3] + c2 * b[i+3]; - } - - double d2 = d; - int n2 = N & 3; - for (i=0; i < n2; ++i) - a[i] += d2 * b[i]; - - for (; i < N; i += 4) - { - a[i] = a[i] + d2 * b[i]; - a[i+1] = a[i+1] + d2 * b[i+1]; - a[i+2] = a[i+2] + d2 * b[i+2]; - a[i+3] = a[i+3] + d2 * b[i+3]; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, unroll=4, unit stride, constants loaded into temps," - << endl << " no += " - << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double c2 = c; - - int n1 = N & 3; - for (i=0; i < n1; ++i) - a[i] += c2 * b[i]; - - for (; i < N; i += 4) - { - int i1 = i + 1; - a[i] += c2 * b[i]; - int i2 = i + 2; - a[i1] += c2 * b[i1]; - int i3 = i + 3; - a[i2] += c2 * b[i2]; - a[i3] += c2 * b[i3]; - } - - double d2 = d; - int n2 = N & 3; - for (i=0; i < n2; ++i) - a[i] += d2 * b[i]; - - for (; i < N; i += 4) - { - int i1 = i + 1; - a[i] += d2 * b[i]; - int i2 = i + 2; - a[i1] += d2 * b[i1]; - int i3 = i + 3; - a[i2] += d2 * b[i2]; - a[i3] += d2 * b[i3]; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, unroll=4, unit stride, constants loaded into temps," - << endl << " CSE for index offsets" - << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double c2 = c; - - int n1 = N & 3; - for (i=0; i < n1; ++i) - a[i] += c2 * b[i]; - - double* pa = a+n1; - double* pb = b+n1; - - int top = N - n1 - 4; - - for (i=top; i >= 0; i -= 4) - { - pa[i] += c2 * pb[i]; - pa[i+1] += c2 * pb[i+1]; - pa[i+2] += c2 * pb[i+2]; - pa[i+3] += c2 * pb[i+3]; - } - - double d2 = d; - int n2 = N & 3; - for (i=0; i < n2; ++i) - a[i] += d2 * b[i]; - - pa = a+n2; - pb = b+n2; - - top = N - n2 - 4; - for (i=top; i >= 0; i -= 4) - { - pa[i] += d2 * pb[i]; - pa[i+1] += d2 * pb[i+1]; - pa[i+2] += d2 * pb[i+2]; - pa[i+3] += d2 * pb[i+3]; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, unroll=4, unit stride, constants loaded into temps," - << " backwards" - << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double c2 = c; - - int n1 = N & 7; - for (i=0; i < n1; ++i) - a[i] += c2 * b[i]; - - for (; i < N; i += 8) - { - a[i] += c2 * b[i]; - a[i+1] += c2 * b[i+1]; - a[i+2] += c2 * b[i+2]; - a[i+3] += c2 * b[i+3]; - a[i+4] += c2 * b[i+4]; - a[i+5] += c2 * b[i+5]; - a[i+6] += c2 * b[i+6]; - a[i+7] += c2 * b[i+7]; - } - - double d2 = d; - int n2 = N & 7; - for (i=0; i < n2; ++i) - a[i] += d2 * b[i]; - - for (; i < N; i += 8) - { - a[i] += d2 * b[i]; - a[i+1] += d2 * b[i+1]; - a[i+2] += d2 * b[i+2]; - a[i+3] += d2 * b[i+3]; - a[i+4] += d2 * b[i+4]; - a[i+5] += d2 * b[i+5]; - a[i+6] += d2 * b[i+6]; - a[i+7] += d2 * b[i+7]; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, unroll=8, unit stride, constants loaded into temps" - << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double c2 = c; - for (i=0; i < N; ++i) - a[i] += c2 * b[i]; - - double d2 = d; - for (i=0; i < N; ++i) - a[i] += d2 * b[i]; - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, indirection, unit stride, constants into temps" - << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - int stride = 1; - sink(stride); // Prevent copy propagation - - for (i=0; i < N; i += stride) - a[i] += c * b[i]; - - for (i=0; i < N; i += stride) - a[i] += d * b[i]; - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, indirection, non-unit stride" << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - int stride = 1; - sink(stride); // Prevent copy propagation - - double c2 = c; - for (i=0; i < N; i += stride) - a[i] += c2 * b[i]; - - double d2 = d; - for (i=0; i < N; i += stride) - a[i] += d2 * b[i]; - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, indirection, non-unit stride, constants " - "loaded into temps" << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double * _bz_restrict pa1 = a, - * _bz_restrict pb1 = b; - double * _bz_restrict paend1 = a + N; - while (pa1 != paend1) - { - *pa1 += c * (*pb1); - ++pa1; - ++pb1; - } - - double * _bz_restrict pa2 = a, - * _bz_restrict pb2 = b; - double * _bz_restrict paend2 = a + N; - while (pa2 != paend2) - { - *pa2 += d * (*pb2); - ++pa2; - ++pb2; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " while, pointer increment, unit stride" << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double * _bz_restrict pa1 = a, - * _bz_restrict pb1 = b; - double * _bz_restrict paend1 = a + N; - double c2 = c; - while (pa1 != paend1) - { - *pa1 += c2 * (*pb1); - ++pa1; - ++pb1; - } - - double * _bz_restrict pa2 = a, - * _bz_restrict pb2 = b; - double * _bz_restrict paend2 = a + N; - double d2 = d; - while (pa2 != paend2) - { - *pa2 += d2 * (*pb2); - ++pa2; - ++pb2; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " while, pointer increment, unit stride, " << endl - << " constants loaded into temps" - << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - int stride = 1; - sink(stride); - - double * _bz_restrict pa1 = a, - * _bz_restrict pb1 = b; - double * _bz_restrict paend1 = a + N * stride; - while (pa1 != paend1) - { - *pa1 += c * (*pb1); - pa1 += stride; - pb1 += stride; - } - - double * _bz_restrict pa2 = a, - * _bz_restrict pb2 = b; - double * _bz_restrict paend2 = a + N * stride; - while (pa2 != paend2) - { - *pa2 += d * (*pb2); - pa2 += stride; - pb2 += stride; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " while, pointer increment, non-unit stride" << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - int stride = 1; - sink(stride); - - double * _bz_restrict pa1 = a, - * _bz_restrict pb1 = b; - double * _bz_restrict paend1 = a + N * stride; - double c2 = c; - int n1 = N & 3; - - for (i=0; i < n1; ++i) - { - *pa1 += c2 * (*pb1); - pa1 += stride; - pb1 += stride; - } - - while (pa1 != paend1) - { - pa1[0] += c2 * pb1[0]; - pa1[1] += c2 * pb1[1]; - pa1[2] += c2 * pb1[2]; - pa1[3] += c2 * pb1[3]; - pa1 += 4 * stride; - pb1 += 4 * stride; - } - - double * _bz_restrict pa2 = a, - * _bz_restrict pb2 = b; - double * _bz_restrict paend2 = a + N * stride; - double d2 = d; - int n2 = N & 3; - - for (i=0; i < n2; ++i) - { - *pa2 += d2 * (*pb2); - pa2 += stride; - pb2 += stride; - } - - while (pa2 != paend2) - { - pa2[0] += d2 * pb2[0]; - pa2[1] += d2 * pb2[1]; - pa2[2] += d2 * pb2[2]; - pa2[3] += d2 * pb2[3]; - pa2 += 4 * stride; - pb2 += 4 * stride; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " while, pointer increment, unroll=4, non-unit stride," << endl - << " constants loaded into temps" << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double c2 = c; - - int n1 = N & 3; - for (i=0; i < n1; ++i) - a[i] += c2 * b[i]; - - for (; i < N; i += 4) - { - t1 = a[i+4]; - a[i] += c2 * b[i]; - a[i+1] += c2 * b[i+1]; - t2 = b[i+4]; - a[i+2] += c2 * b[i+2]; - a[i+3] += c2 * b[i+3]; - } - - double d2 = d; - int n2 = N & 3; - for (i=0; i < n2; ++i) - a[i] += d2 * b[i]; - - for (; i < N; i += 4) - { - t1 = a[i+4]; - a[i] += d2 * b[i]; - a[i+1] += d2 * b[i+1]; - t2 = b[i+4]; - a[i+2] += d2 * b[i+2]; - a[i+3] += d2 * b[i+3]; - } - } - timer.stop(); - - - sink(t1); - sink(t2); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, unroll=4, unit stride, constants loaded into temps," - << " prefetching" - << endl; - - /********************************************************************/ - - struct vectorPair { - double a; - double b; - }; - vectorPair* v = new vectorPair[N]; - int N2 = 2*N; - initialize(c, d, (double*)v, (double*)v, N2); - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - for (i=0; i < N; ++i) - v[i].a += c * v[i].b; - - for (i=0; i < N; ++i) - v[i].a += d * v[i].b; - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " interlaced, for, indirection, unit stride" << endl; - - /*********************************************************************/ - - initialize(c, d, (double*)v, (double*)v, N2); - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double c2 = c; - - int n1 = N & 3; - for (i=0; i < n1; ++i) - v[i].a += c2 * v[i].b; - - for (; i < N; i += 4) - { - v[i].a += c2 * v[i].b; - v[i+1].a += c2 * v[i+1].b; - v[i+2].a += c2 * v[i+2].b; - v[i+3].a += c2 * v[i+3].b; - } - - double d2 = d; - int n2 = N & 3; - for (i=0; i < n2; ++i) - v[i].a += d2 * v[i].b; - - for (; i < N; i += 4) - { - v[i].a += d2 * v[i].b; - v[i+1].a += d2 * v[i+1].b; - v[i+2].a += d2 * v[i+2].b; - v[i+3].a += d2 * v[i+3].b; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, unroll=4, unit stride, interlaced, " << endl - << "\t\tconstants loaded into temps" - << endl; - - delete [] v; - - /********************************************************************/ - - delete [] a; - delete [] b; -} - -void initialize(double& c, double& d, double* a, double* b, int& N) -{ - for (int i=0; i < N; ++i) - { - a[i] = 1/7.; - b[i] = 1/3.; - } - c = 0.398192839842; - d = - c; -} - diff --git a/benchmarks/makelogo.cpp b/benchmarks/makelogo.cpp deleted file mode 100644 index 73f9f87c..00000000 --- a/benchmarks/makelogo.cpp +++ /dev/null @@ -1,114 +0,0 @@ -#include -#include - -using namespace blitz; - -void makeLogo(); - -int main() -{ - makeLogo(); - return 0; -} - -void setInitialConditions(Array& c, Array& P1, - Array& P2, Array& P3, int N, int M); - -void snapshot(const Array& P, const Array& c); - -void makeLogo() -{ - const int N = 300, M = 900; - int niters = 3000; - - Array P1, P2, P3, c; - allocateArrays(shape(N,M), P1, P2, P3, c); - Range I(1,N-2), J(1,M-2); - - setInitialConditions(c, P1, P2, P3, N, M); - - for (int iter=0; iter < niters; ++iter) - { - P3(I,J) = (2-4*c(I,J)) * P2(I,J) - + c(I,J)*(P2(I-1,J) + P2(I+1,J) + P2(I,J-1) + P2(I,J+1)) - - P1(I,J); - - cycleArrays(P1,P2,P3); - - snapshot(P2, c); - } - -} - -void setInitialConditions(Array& c, Array& P1, - Array& P2, Array& P3, int N, int M) -{ - // Set the velocity field - c = 0.3; - - ifstream ifs("blitz3.pgm"); - char tmpBuf[128]; - int pixel; - ifs.getline(tmpBuf, 128); - ifs.getline(tmpBuf, 128); - ifs.getline(tmpBuf, 128); - - for (int pi=0; pi < 199; ++pi) - { - for (int pj=0; pj < 798; ++pj) - { - ifs >> pixel; - if (pixel) - c(pi+50,pj+56) = 0.02; - } - } - - // Initial pressure distribution: gaussian pulse - using namespace blitz::tensor; - int cr = N/6-1; -// int cc = 7.0*M/8.0-1; - float s2 = 64.0 * 9.0 / pow2(N/2.0); - P1 = 0.0; -// P2 = exp(-(pow2(i-cr)+pow2(j-cc)) * s2); - P2 = exp(-(pow2(i-cr)) * s2); - - P3 = 0.0; -} - - -void snapshot(const Array& P, const Array& c) -{ - static int count = 0, snapshotNum = 0; - if (++count < 50) - return; - - count = 0; - ++snapshotNum; - char filename[128]; - sprintf(filename, "logo%03d.m", snapshotNum); - - ofstream ofs(filename); - int N = P.length(firstDim); - int M = P.length(secondDim); - - float Pmin = -0.6; - float PScale = 1.0/1.2; - float VScale = 1.0; - - ofs << "P" << snapshotNum << " = [ "; - for (int i=0; i < N; ++i) - { - for (int j=0; j < M; ++j) - { - float value1 = (P(i,j)-Pmin)*PScale; - float value2 = c(i,j)*VScale; - int r1 = value1 * 4096; - int r2 = value2 * 4096; - ofs << r1 << " " << r2 << " "; - } - if (i < N-1) - ofs << ";" << endl; - } - ofs << "];" << endl; -} - diff --git a/benchmarks/makeloops.cpp b/benchmarks/makeloops.cpp deleted file mode 100644 index cab22342..00000000 --- a/benchmarks/makeloops.cpp +++ /dev/null @@ -1,791 +0,0 @@ - - -#include -#include -#include -#include -#include -#include -#include -#include - -class loop -{ - -public: - loop() - { - reset(); - } - - void reset(); - - void parseLoop(istream& is); - - int numArrays() const - { - return numArrays_; - } - - char arrayName(int i) const - { - return arrays_[i]; - } - - int numScalars() const - { - return numScalars_; - } - - char scalarName(int i) const - { - return scalars_[i]; - } - - const char* loopName() const - { - return loopName_; - } - - const char* loopBuffer() const - { - return loopBuffer_; - } - - int flops() const - { - return flops_; - } - - int isArray(char c) const - { - for (int i=0; i < numArrays_; ++i) - if (arrays_[i] == c) - return 1; - return 0; - } - - int isScalar(char c) const - { - for (int i=0; i < numScalars_; ++i) - if (scalars_[i] == c) - return 1; - return 0; - } - -private: - enum { maxArrays = 20, maxScalars = 20, bufLen = 128 }; - - char arrays_[maxArrays]; - int numArrays_; - char scalars_[maxScalars]; - int numScalars_; - char loopBuffer_[bufLen]; - char loopName_[bufLen]; - int flops_; -}; - -void loop::reset() -{ - numArrays_ = 0; - numScalars_ = 0; - loopBuffer_[0] = '\0'; - loopName_[0] = '\0'; - flops_ = 0; -} - -void loop::parseLoop(istream& is) -{ - const int bufLen = 128; - char buffer[bufLen]; - - const char* whitespace = " \t"; - - reset(); - - while (!is.eof()) { - is.getline(buffer, bufLen); - char* token = strtok(buffer, whitespace); - if (!token) - continue; - - if (!strcmp(token, "begin")) { - token = strtok(0, whitespace); - strcpy(loopName_, token); - cout << "Creating loop: " << loopName_ << endl; - } else if (!strcmp(token, "end")) - return; - else if (!strcmp(token, "array")) { - while (token = strtok(0, whitespace)) { - arrays_[numArrays_++] = token[0]; - cout << "Array: " << token[0] << endl; - } - } else if (!strcmp(token, "scalar")) { - while (token = strtok(0, whitespace)) { - scalars_[numScalars_++] = token[0]; - cout << "Scalar: " << token[0] << endl; - } - } else if (!strcmp(token, "flops")) { - token = strtok(0, whitespace); - flops_ = atoi(token); - cout << "Flops: " << flops_ << endl; - } else if (!strcmp(token, "loop")) { - loopBuffer_[0] = '\0'; - while (token = strtok(0, whitespace)) - strcat(loopBuffer_, token); - cout << "Loop: " << loopBuffer_ << endl; - } - } -} - -void fortranVersion(loop& lp) -{ - const char* numtype = "REAL*8"; - - char filename[128]; - sprintf(filename, "%sf.f", lp.loopName()); - ofstream ofs(filename); - - ofs << " SUBROUTINE " << lp.loopName() << "_F77(N"; - for (int i=0; i < lp.numArrays(); ++i) - ofs << ", " << lp.arrayName(i); - for (int i=0; i < lp.numScalars(); ++i) - ofs << ", " << lp.scalarName(i); - ofs << ")" << endl - << " INTEGER i, N" << endl - << " " << numtype << " " << lp.arrayName(0) << "(N)"; - - for (int i=1; i < lp.numArrays(); ++i) - ofs << ", " << lp.arrayName(i) << "(N)"; - for (int i=0; i < lp.numScalars(); ++i) - ofs << ", " << lp.scalarName(i); - - ofs << endl << endl - << " DO i=1,N" << endl - << " "; - - const char* loopBuffer = lp.loopBuffer(); - - for (int i=0; loopBuffer[i]; ++i) { - if (loopBuffer[i] == ';') - ofs << endl << " "; - else if (loopBuffer[i] != '$') - ofs << loopBuffer[i]; - else { - ++i; - if (lp.isArray(loopBuffer[i])) - ofs << loopBuffer[i] << "(i)"; - else if (lp.isScalar(loopBuffer[i])) - ofs << loopBuffer[i]; - } - } - - ofs << endl - << " END DO" << endl; - - ofs << " RETURN" << endl - << " END" << endl; - - ofs << endl << endl - << " SUBROUTINE " << lp.loopName() << "_F77Overhead(N"; - - for (int i=0; i < lp.numArrays(); ++i) - ofs << ", " << lp.arrayName(i); - for (int i=0; i < lp.numScalars(); ++i) - ofs << ", " << lp.scalarName(i); - ofs << ")" << endl - << " INTEGER i, N" << endl - << " " << numtype << " " << lp.arrayName(0) << "(N)"; - - for (int i=1; i < lp.numArrays(); ++i) - ofs << ", " << lp.arrayName(i) << "(N)"; - for (int i=0; i < lp.numScalars(); ++i) - ofs << ", " << lp.scalarName(i); - - ofs << endl - << " RETURN" << endl - << " END" << endl; -} - -void fortran90Version(loop& lp) -{ - const char* numtype = "REAL*8"; - - char filename[128]; - sprintf(filename, "%sf90.f90", lp.loopName()); - ofstream ofs(filename); - - ofs << " SUBROUTINE " << lp.loopName() << "_F90(N"; - for (int i=0; i < lp.numArrays(); ++i) - ofs << ", " << lp.arrayName(i); - for (int i=0; i < lp.numScalars(); ++i) - ofs << ", " << lp.scalarName(i); - ofs << ")" << endl - << " INTEGER i, N" << endl - << " " << numtype << " " << lp.arrayName(0) << "(N)"; - - for (int i=1; i < lp.numArrays(); ++i) - ofs << ", " << lp.arrayName(i) << "(N)"; - for (int i=0; i < lp.numScalars(); ++i) - ofs << ", " << lp.scalarName(i); - - ofs << endl << endl - << " "; - - const char* loopBuffer = lp.loopBuffer(); - - for (int i=0; loopBuffer[i]; ++i) { - if (loopBuffer[i] == ';') - ofs << endl << " "; - else if (loopBuffer[i] != '$') - ofs << loopBuffer[i]; - } - - ofs << endl - << " RETURN" << endl - << " END" << endl; - - ofs << endl << endl - << " SUBROUTINE " << lp.loopName() << "_F90Overhead(N"; - for (int i=0; i < lp.numArrays(); ++i) - ofs << ", " << lp.arrayName(i); - for (int i=0; i < lp.numScalars(); ++i) - ofs << ", " << lp.scalarName(i); - ofs << ")" << endl - << " INTEGER i, N" << endl - << " " << numtype << " " << lp.arrayName(0) << "(N)"; - - for (int i=1; i < lp.numArrays(); ++i) - ofs << ", " << lp.arrayName(i) << "(N)"; - for (int i=0; i < lp.numScalars(); ++i) - ofs << ", " << lp.scalarName(i); - - ofs << endl << endl - << " RETURN" << endl - << " END" << endl; -} - -void writeFortranDecl(ofstream& ofs, const char* version, loop& lp, - const char* numtype); -void VectorVersion(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2); -void ArrayVersion(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2); -void ValarrayVersion(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2); -void F77Version(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2); -void F90Version(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2); - -void cppVersion(loop& lp) -{ - const char* numtype = "double"; - - char filename[128]; - sprintf(filename, "%s.cpp", lp.loopName()); - ofstream ofs(filename); - - char capsLoopName[128]; - for (int i=0; i <= strlen(lp.loopName()); ++i) - capsLoopName[i] = toupper(lp.loopName()[i]); - - ofs << "// Generated code (makeloops.cpp) -- do not edit." << endl << endl - << "// In KAI C++ 3.2, restrict causes problems for copy propagation." - << endl << "// Temporary fix: disable restrict" << endl << endl - << "#define BZ_DISABLE_RESTRICT" << endl << endl - << - "#include \n" - "#include \n" - "#include \n" - "#include \n" - "\n" - "// Generated: " << __FILE__ << " " << __DATE__ << endl << endl << - "#ifdef BZ_HAVE_VALARRAY\n" - " #define BENCHMARK_VALARRAY\n" - "#endif\n\n" - "#ifdef BENCHMARK_VALARRAY\n" - "#include \n" - "#endif\n" - "\n" - "using namespace blitz;\n" - "\n" - "#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES)\n" - " #define " << lp.loopName() << "_f77 " << lp.loopName() << "_f77_\n" - " #define " << lp.loopName() << "_f77overhead " << lp.loopName() << "_f77overhead_\n" - - " #define " << lp.loopName() << "_f90 " << lp.loopName() << "_f90_\n" - " #define " << lp.loopName() << "_f90overhead " << lp.loopName() << "_f90overhead_\n" - - "#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES)\n" - " #define " << lp.loopName() << "_f77 " << lp.loopName() << "_f77__\n" - " #define " << lp.loopName() << "_f77overhead " << lp.loopName() << "_f77overhead__\n" - - " #define " << lp.loopName() << "_f90 " << lp.loopName() << "_f90__\n" - " #define " << lp.loopName() << "_f90overhead " << lp.loopName() << "_f90overhead__\n" - - "#elif defined(BZ_FORTRAN_SYMBOLS_CAPS)\n" - " #define " << lp.loopName() << "_f77 " << capsLoopName << "_F77\n" - " #define " << lp.loopName() << "_f77overhead " << capsLoopName << "_F77OVERHEAD\n" - " #define " << lp.loopName() << "_f90 " << capsLoopName << "_F90\n" - " #define " << lp.loopName() << "_f90overhead " << capsLoopName << "_F90OVERHEAD\n" - "#endif\n" - "\n" - "extern \"C\" {" << endl; - - writeFortranDecl(ofs, "_f77", lp, numtype); - writeFortranDecl(ofs, "_f77overhead", lp, numtype); - writeFortranDecl(ofs, "_f90", lp, numtype); - writeFortranDecl(ofs, "_f90overhead", lp, numtype); - - ofs << "}" << endl << endl; - - // Create a string with a list of arguments for the scalars - ostrstream tmpbuf; - for (int i=0; i < lp.numScalars(); ++i) { - tmpbuf << ", " << numtype << " " << lp.scalarName(i); - } - tmpbuf << '\0'; - const char* scalarArgs = tmpbuf.str(); - - ofs << "void VectorVersion(BenchmarkExt& bench" - << scalarArgs << ");" << endl - << "void ArrayVersion(BenchmarkExt& bench" - << scalarArgs << ");" << endl - << "void F77Version(BenchmarkExt& bench" - << scalarArgs << ");" << endl - << "#ifdef FORTRAN_90" << endl - << "void F90Version(BenchmarkExt& bench" - << scalarArgs << ");" << endl - << "#endif" << endl - << "#ifdef BENCHMARK_VALARRAY" << endl - << "void ValarrayVersion(BenchmarkExt& bench" - << scalarArgs << ");" << endl << "#endif" << endl << endl; - - ofs << "void sink() {}\n\n"; - - ofs << "int main()\n" - "{\n" - " int numBenchmarks = 5;\n" - "#ifndef BENCHMARK_VALARRAY\n" - " numBenchmarks--; // No valarray\n" - "#endif\n" - "#ifndef FORTRAN_90\n" - " numBenchmarks--; // No fortran 90\n" - "#endif\n" - - "\n" - " BenchmarkExt bench(\"" << lp.loopName() << ": " - << lp.loopBuffer() << "\", numBenchmarks);\n" - "\n" - " const int numSizes = 23;\n" - " bench.setNumParameters(numSizes);\n" - " bench.setRateDescription(\"Mflops/s\");\n" - "\n" - " Vector parameters(numSizes);\n" - " Vector iters(numSizes);\n" - " Vector flops(numSizes);\n" - "\n" - " for (int i=0; i < numSizes; ++i)\n" - " {\n" - " parameters[i] = (int)pow(10.0, (i+1)/4.0);\n" - " iters[i] = 10000000L / parameters[i];\n" - " if (iters[i] < 2)\n" - " iters[i] = 2;\n" - " flops[i] = " << lp.flops() << " * parameters[i];\n" - " }\n" - "\n" - " bench.setParameterVector(parameters);\n" - " bench.setIterations(iters);\n" - " bench.setOpsPerIteration(flops);\n" - "\n" - " bench.beginBenchmarking();" << endl << endl; - - // Create literals - for (int i=0; i < lp.numScalars(); ++i) { - ofs << " " << numtype << " " << lp.scalarName(i) - << " = 0.39123982498157938742;" << endl; - } - - ofs << endl; - - ofs.flush(); - - // Create a string with a list of arguments for the scalars - ostrstream tmpbuf2; - for (int i=0; i < lp.numScalars(); ++i) { - tmpbuf2 << ", " << lp.scalarName(i); - } - tmpbuf2 << '\0'; - char* scalarArgs2 = tmpbuf2.str(); - - ofs << " VectorVersion(bench" << scalarArgs2 << ");" << endl - << " ArrayVersion(bench" << scalarArgs2 << ");" << endl - << " F77Version(bench" << scalarArgs2 << ");" << endl - << "#ifdef FORTRAN_90" << endl - << " F90Version(bench" << scalarArgs2 << ");" << endl - << "#endif" << endl - << "#ifdef BENCHMARK_VALARRAY" << endl - << " ValarrayVersion(bench" << scalarArgs2 << ");" << endl - << "#endif" << endl << endl << - " bench.endBenchmarking();\n" - "\n" - " bench.saveMatlabGraph(\"" << lp.loopName() << ".m\");\n" - "\n" - " return 0;\n" - "}\n\n" - "template\n" - "void initializeRandomDouble(T data, int numElements, int stride = 1)\n" - "{\n" - " static Random rnd;\n" - "\n" - " for (int i=0; i < numElements; ++i)\n" - " data[size_t(i*stride)] = rnd.random();\n" - "}\n" - "\n" - "template\n" - "void initializeArray(T& array, int numElements)\n" - "{\n" - " static Random rnd;\n" - "\n" - " for (size_t i=0; i < numElements; ++i)\n" - " array[i] = rnd.random();\n" - "}\n\n"; - - VectorVersion(ofs, lp, numtype, scalarArgs, scalarArgs2); - ArrayVersion(ofs, lp, numtype, scalarArgs, scalarArgs2); - ValarrayVersion(ofs, lp, numtype, scalarArgs, scalarArgs2); - F77Version(ofs, lp, numtype, scalarArgs, scalarArgs2); - F90Version(ofs, lp, numtype, scalarArgs, scalarArgs2); -} - -void writeFortranDecl(ofstream& ofs, const char* version, loop& lp, - const char* numtype) -{ - ofs << " void " << lp.loopName() << version - << "(const int& N"; - - for (int i=0; i < lp.numArrays(); ++i) - ofs << ", " << numtype << "* " << lp.arrayName(i); - for (int i=0; i < lp.numScalars(); ++i) - ofs << ", const " << numtype << "& " << lp.scalarName(i); - ofs << ");" << endl << endl; -} - -void VectorVersion(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2) -{ - os << "void VectorVersion(BenchmarkExt& bench" - << scalarArgs << ")\n" - << - "{\n" - " bench.beginImplementation(\"Vector\");\n" - "\n" - " while (!bench.doneImplementationBenchmark())\n" - " {\n" - " int N = bench.getParameter();\n" - " cout << \"Vector: N = \" << N << endl;\n" - " cout.flush();\n" - "\n" - " long iters = bench.getIterations();\n" - "\n"; - - for (int i=0; i < lp.numArrays(); ++i) { - os << " Vector<" << numtype << "> " << lp.arrayName(i) - << "(N);" << endl - << " initializeRandomDouble(" << lp.arrayName(i) << ".data(), N);" << endl; - } - - os << endl << - " bench.start();\n" - " for (long i=0; i < iters; ++i)\n" - " {\n" - " "; - - const char* loopBuffer = lp.loopBuffer(); - - for (int i=0; loopBuffer[i]; ++i) { - if (loopBuffer[i] != '$') - os << loopBuffer[i]; - } - - os << ";" << endl << - " sink();\n"; - - os << - " }\n" - " bench.stop();\n\n" - " bench.startOverhead();\n" - " for (long i=0; i < iters; ++i)\n" - " sink();\n" - " bench.stopOverhead();\n" - - " }\n" - "\n" - " bench.endImplementation();\n" - "}" << endl << endl; -} - -void ArrayVersion(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2) -{ - os << "void ArrayVersion(BenchmarkExt& bench" - << scalarArgs << ")\n" - << - "{\n" - " bench.beginImplementation(\"Array\");\n" - "\n" - " while (!bench.doneImplementationBenchmark())\n" - " {\n" - " int N = bench.getParameter();\n" - " cout << \"Array: N = \" << N << endl;\n" - " cout.flush();\n" - "\n" - " long iters = bench.getIterations();\n" - "\n"; - - for (int i=0; i < lp.numArrays(); ++i) { - os << " Array<" << numtype << ", 1> " << lp.arrayName(i) - << "(N);" << endl - << " initializeRandomDouble(" << lp.arrayName(i) << ".dataFirst(), N);" << endl; - } - - os << endl << - " bench.start();\n" - " for (long i=0; i < iters; ++i)\n" - " {\n" - " "; - - const char* loopBuffer = lp.loopBuffer(); - - for (int i=0; loopBuffer[i]; ++i) { - if (loopBuffer[i] != '$') - os << loopBuffer[i]; - } - - os << ";" << endl << - " sink();\n"; - - os << - " }\n" - " bench.stop();\n\n" - " bench.startOverhead();\n" - " for (long i=0; i < iters; ++i)\n" - " sink();\n" - " bench.stopOverhead();\n"; - os << - " }\n" - "\n" - " bench.endImplementation();\n" - "}" << endl << endl; -} - -void F77Version(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2) -{ - os << "void F77Version(BenchmarkExt& bench" - << scalarArgs << ")\n" - "{\n" - " bench.beginImplementation(\"Fortran 77\");\n" - "\n" - " while (!bench.doneImplementationBenchmark())\n" - " {\n" - " int N = bench.getParameter();\n\n" - " cout << \"Fortran 77: N = \" << N << endl;\n" - " cout.flush();\n\n" - " int iters = bench.getIterations();\n" - "\n"; - - for (int i=0; i < lp.numArrays(); ++i) { - os << " " << numtype << "* " << lp.arrayName(i) - << " = new " << numtype << "[N];" << endl - << " initializeRandomDouble(" << lp.arrayName(i) - << ", N);" << endl; - } - - os << endl << - " bench.start();\n" - " for (int iter=0; iter < iters; ++iter)\n" - " " << lp.loopName() << "_f77(N"; - - for (int i=0; i < lp.numArrays(); ++i) - os << ", " << lp.arrayName(i); - os << scalarArgs2 << ");\n" - " bench.stop();\n\n" - " bench.startOverhead();\n" - " for (int iter=0; iter < iters; ++iter)\n" - " " << lp.loopName() << "_f77overhead(N"; - for (int i=0; i < lp.numArrays(); ++i) - os << ", " << lp.arrayName(i); - os << scalarArgs2 << ");\n"; - - os << endl << - " bench.stopOverhead();\n"; - - for (int i=0; i < lp.numArrays(); ++i) { - os << " delete [] " << lp.arrayName(i) << ";" << endl; - } - - os << " }\n" - "\n" - " bench.endImplementation();\n" - "}\n" << endl; - -} - -void F90Version(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2) -{ - os << "#ifdef FORTRAN_90" << endl - << "void F90Version(BenchmarkExt& bench" - << scalarArgs << ")\n" - "{\n" - " bench.beginImplementation(\"Fortran 90\");\n" - "\n" - " while (!bench.doneImplementationBenchmark())\n" - " {\n" - " int N = bench.getParameter();\n\n" - " cout << \"Fortran 90: N = \" << N << endl;\n" - " cout.flush();\n\n" - " int iters = bench.getIterations();\n" - "\n"; - - for (int i=0; i < lp.numArrays(); ++i) { - os << " " << numtype << "* " << lp.arrayName(i) - << " = new " << numtype << "[N];" << endl - << " initializeRandomDouble(" << lp.arrayName(i) - << ", N);" << endl; - } - - os << endl << - " bench.start();\n" - " for (int iter=0; iter < iters; ++iter)\n" - " " << lp.loopName() << "_f90(N"; - - for (int i=0; i < lp.numArrays(); ++i) - os << ", " << lp.arrayName(i); - os << scalarArgs2 << ");\n" - " bench.stop();\n\n" - " bench.startOverhead();\n" - " for (int iter=0; iter < iters; ++iter)\n" - " " << lp.loopName() << "_f90overhead(N"; - for (int i=0; i < lp.numArrays(); ++i) - os << ", " << lp.arrayName(i); - os << scalarArgs2 << ");\n"; - - os << endl << - " bench.stopOverhead();\n"; - - for (int i=0; i < lp.numArrays(); ++i) { - os << " delete [] " << lp.arrayName(i) << ";" << endl; - } - - os << " }\n" - "\n" - " bench.endImplementation();\n" - "}\n" - << "#endif\n" << endl; - -} - -void ValarrayVersion(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2) -{ - os << "#ifdef BENCHMARK_VALARRAY" << endl; - os << "void ValarrayVersion(BenchmarkExt& bench" - << scalarArgs << ")\n" - << - "{\n" - " bench.beginImplementation(\"valarray\");\n" - "\n" - " while (!bench.doneImplementationBenchmark())\n" - " {\n" - " int N = bench.getParameter();\n" - " cout << \"valarray: N = \" << N << endl;\n" - " cout.flush();\n" - "\n" - " long iters = bench.getIterations();\n" - "\n"; - - for (int i=0; i < lp.numArrays(); ++i) { - os << " valarray<" << numtype << "> " << lp.arrayName(i) - << "(N);" << endl - << " initializeArray(" << lp.arrayName(i) << ", N);" << endl; - } - - os << endl << - " bench.start();\n" - " for (long i=0; i < iters; ++i)\n" - " {\n" - " "; - - const char* loopBuffer = lp.loopBuffer(); - - for (int i=0; loopBuffer[i]; ++i) { - if (loopBuffer[i] != '$') - os << loopBuffer[i]; - } - - os << ";" << endl << - " sink();\n"; - - os << - " }\n" - " bench.stop();\n\n" - " bench.startOverhead();\n" - " for (long i=0; i < iters; ++i)\n" - " sink();\n" - " bench.stopOverhead();\n" - - " }\n" - "\n" - " bench.endImplementation();\n" - "}" << endl << endl << "#endif" << endl; -} - -int main() -{ - ifstream ifs("loops.data"); - - //ofstream ofs("makefile.inc"); - - loop lp; - - while (!ifs.eof()) { - lp.parseLoop(ifs); - - if (ifs.eof()) - break; - - /* - ofs -#ifdef FORTRAN_90 - << lp.loopName() << "f90.o:\t" << lp.loopName() << "f90.f" - << endl << "\t$(F90) $(F90FLAGS) -c " << lp.loopName() << "f90.f" - << endl << endl -#endif - << lp.loopName() << ":\t" << lp.loopName() << ".o " - << lp.loopName() << "f.o " -#ifdef FORTRAN_90 - << lp.loopName() << "f90.o" -#endif - << endl - << "\t$(CXX) $(CXXFLAGS) $(LDFLAGS) -o " << lp.loopName() << " " - << lp.loopName() << ".o " << lp.loopName() << "f.o " -#ifdef FORTRAN_90 - << lp.loopName() << "f90.o " -#endif - << "$(LIBS)" << endl << endl; - */ - - fortranVersion(lp); -//#ifdef FORTRAN_90 - fortran90Version(lp); -//#endif - cppVersion(lp); - } - return 0; -} - - - diff --git a/benchmarks/plot_benchmarks.m.in b/benchmarks/plot_benchmarks.m.in deleted file mode 100644 index 99e2ef76..00000000 --- a/benchmarks/plot_benchmarks.m.in +++ /dev/null @@ -1,57 +0,0 @@ -function plot_benchmarks(hostname) -% function plot_benchmarks(hostname) - -% -% $Id$ -% -% Copyright (c) 2001 Patrick Guio -% -% All Rights Reserved. -% -% This program is free software; you can redistribute it and/or modify it -% under the terms of the GNU General Public License as published by the -% Free Software Foundation; either version 2. of the License, or (at your -% option) any later version. -% -% This program is distributed in the hope that it will be useful, but -% WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -% Public License for more details. -% - -close all - -bench={'daxpy','haney','loop1','loop2','loop3','loop5','loop6','loop8',... - 'loop9','loop10','loop11','loop12','loop13','loop14','loop15','loop16',... - 'loop17','loop18','loop19','loop21','loop22','loop23','loop24','loop25',... - 'loop36','stencil'}; - -if nargin == 0, - str=sprintf('@PACKAGE@-@VERSION@ benchmark on a %s', computer); -else - str=sprintf('@PACKAGE@-@VERSION@ benchmark on %s (%s)', hostname, computer); -end -str=sprintf('%s\nCXX=@CXX@ CXXFLAGS=@CXXFLAGS@ @CXX_OPTIMIZE_FLAGS@', str); -str=sprintf('%s\nF77=@F77@ FFLAGS=@FFLAGS@ @F77_OPTIMIZE_FLAGS@', str); -if length('@FC@') - str=sprintf('%s\nFC=@FC@ FCFLAGS=@FCFLAGS@ @FC_OPTIMIZE_FLAGS@', str); -end -str=strrep(str,'_','\_'); -h=text(0.5,0.5,str); -set(h,'HorizontalAlignment','center') -set(h,'FontSize',18) -set(h,'FontWeight','demi') -set(gca,'visible','off') -orient landscape -print -dpsc benchmarks.ps - -for i=1:length(bench), - eval(bench{i}) - hs=get(gca,'children')'; - for h=hs, set(h,'linewidth',1.5) , end - legend - orient landscape - print -dpsc -append benchmarks.ps -end - - diff --git a/benchmarks/qcd.cpp b/benchmarks/qcd.cpp deleted file mode 100644 index 040beb9b..00000000 --- a/benchmarks/qcd.cpp +++ /dev/null @@ -1,244 +0,0 @@ -#include -#include -#include -#include - -#ifdef BZ_HAVE_COMPLEX - -using namespace blitz; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define qcdf qcdf_ - #define qcdf2 qcdf2_ -#elif defined( BZ_FORTRAN_SYMBOLS_CAPS) - #define qcdf QCDF - #define qcdf2 QCDF2 -#endif - -extern "C" { - void qcdf(const void* M, void* res, const void* src, const int& N, - const int& iters); - void qcdf2(const void* M, void* res, const void* src, const int& N, - const int& iters); -} - -int QCDBlitzVersion(BenchmarkExt& bench); -int QCDBlitzTunedVersion(BenchmarkExt& bench); -int QCDFortran77Version(BenchmarkExt& bench); -int QCDFortran77TunedVersion(BenchmarkExt& bench); - -void initializeRandomDouble(double* data, int numElements); - -int main() -{ - cout << "Blitz++ QCD Benchmark" << endl - << "Working... (this may take a while) "; - cout.flush(); - - BenchmarkExt bench("Lattice QCD Benchmark", 4); - - bench.setRateDescription("Millions of operations/s"); - bench.beginBenchmarking(); - - QCDBlitzVersion(bench); - QCDBlitzTunedVersion(bench); - QCDFortran77Version(bench); - QCDFortran77TunedVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("qcd.m"); - - cout << "Done." << endl; - - return 0; -} - -int QCDBlitzVersion(BenchmarkExt& bench) -{ - typedef TinyMatrix, 3, 2> spinor; - typedef TinyMatrix, 3, 3> SU3Gauge; - - bench.beginImplementation("Blitz++"); - - while (!bench.doneImplementationBenchmark()) - { - int length = bench.getParameter(); - int iters = (int)bench.getIterations(); - -cout << "length = " << length << endl; - - Vector res(length), src(length); - Vector M(length); - - initializeRandomDouble((double*)src.data(), - length * sizeof(spinor) / sizeof(double)); - initializeRandomDouble((double*)M.data(), - length * sizeof(SU3Gauge) / sizeof(double)); - - bench.start(); - for (long i=0; i < iters; ++i) - { - for (int i=0; i < length; ++i) - res[i] = product(M[i], src[i]); - } - bench.stop(); - - // Time overhead - bench.startOverhead(); - for (long i=0; i < iters; ++i) - { - } - bench.stopOverhead(); - } - - bench.endImplementation(); - - return 0; -} - - typedef TinyMatrix, 3, 2> spinor; - typedef TinyMatrix, 3, 3> gaugeFieldElement; - - struct latticeUnit { - spinor one; - gaugeFieldElement gauge; - spinor two; - }; - -int QCDBlitzTunedVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++ (tuned)"); - - while (!bench.doneImplementationBenchmark()) - { - int length = bench.getParameter(); - int iters = (int)bench.getIterations(); - - Vector lattice(length); - - initializeRandomDouble((double*)lattice.data(), - length * sizeof(latticeUnit) / sizeof(double)); - - bench.start(); - for (long i=0; i < iters; ++i) - { - for (int i=0; i < length; ++i) - lattice[i].two = product(lattice[i].gauge, lattice[i].one); - } - bench.stop(); - - // Time overhead - bench.startOverhead(); - for (long i=0; i < iters; ++i) - { - } - bench.stopOverhead(); - } - - bench.endImplementation(); - - return 0; -} - -int QCDFortran77Version(BenchmarkExt& bench) -{ - // Use Blitz++ library only to allocate space for the - // arrays. - typedef TinyMatrix, 3, 2> spinor; - typedef TinyMatrix, 3, 3> SU3Gauge; - - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int length = bench.getParameter(); - int iters = (int)bench.getIterations(); - - Vector res(length), src(length); - Vector M(length); - - initializeRandomDouble((double*)src.data(), - length * sizeof(spinor) / sizeof(double)); - initializeRandomDouble((double*)M.data(), - length * sizeof(SU3Gauge) / sizeof(double)); - - bench.start(); - qcdf(M.data(), res.data(), src.data(), length, iters); - bench.stop(); - - // Time overhead - bench.startOverhead(); - for (long i=0; i < iters; ++i) - { - } - bench.stopOverhead(); - } - - bench.endImplementation(); - - return 0; -} - -int QCDFortran77TunedVersion(BenchmarkExt& bench) -{ - // Use Blitz++ library only to allocate space for the - // arrays. - typedef TinyMatrix, 3, 2> spinor; - typedef TinyMatrix, 3, 3> SU3Gauge; - - bench.beginImplementation("Fortran 77 Hand-tuned"); - - while (!bench.doneImplementationBenchmark()) - { - int length = bench.getParameter(); - int iters = (int)bench.getIterations(); - - Vector res(length), src(length); - Vector M(length); - - initializeRandomDouble((double*)src.data(), - length * sizeof(spinor) / sizeof(double)); - initializeRandomDouble((double*)M.data(), - length * sizeof(SU3Gauge) / sizeof(double)); - - bench.start(); - qcdf2(M.data(), res.data(), src.data(), length, iters); - bench.stop(); - - // Time overhead - bench.startOverhead(); - for (long i=0; i < iters; ++i) - { - } - bench.stopOverhead(); - } - - bench.endImplementation(); - - return 0; -} - -void initializeRandomDouble(double* data, int numElements) -{ - // This is a temporary kludge until I implement random complex - // numbers. - - static Random rnd; - - for (int i=0; i < numElements; ++i) - data[i] = rnd.random(); -} - -#else // BZ_HAVE_COMPLEX - -#include - -int main() -{ - cout << "This benchmark requires from the ISO/ANSI C++ standard." - << endl; - return 0; -} - -#endif // BZ_HAVE_COMPLEX diff --git a/benchmarks/qcd.txt b/benchmarks/qcd.txt deleted file mode 100644 index c244da47..00000000 --- a/benchmarks/qcd.txt +++ /dev/null @@ -1,19 +0,0 @@ - -The most expensive routines in terms of CPU time were a family of routines -that multiplied a 2-spinors by SU(3) gauge elements: The core of these -routines is basically the same as the following code: - -COMPLEX M(V,3,3) res(V,3,2), src(V,3,2) - DO spin=1,2 - DO col=1,3 - DO site=1,V - res(site,col,spin)= - M(site,col,1) * src(site,1,spin) - + M(site,col,2) * src(site,2,spin) - + M(site,col,3) * src(site,3,spin) - END DO - END DO - END DO - -http://www.epcc.ed.ac.uk/t3d/documents/techreports/EPCC-TR96-03/EPCC-TR96-03.book_1.html - diff --git a/benchmarks/qcdf.f b/benchmarks/qcdf.f deleted file mode 100644 index f65f0c02..00000000 --- a/benchmarks/qcdf.f +++ /dev/null @@ -1,77 +0,0 @@ -C Initial Fortran 77 version of the Lattice QCD benchmark. - - subroutine qcdf(M, res, src, V, iters) - integer V, iters, i, site, spin, col - complex*16 M(3,3,V), res(3,2,V), src(3,2,V) - - DO i=1,iters - DO site=1,V - DO spin=1,2 - DO col=1,3 - res(col,spin,site) = M(col,1,site) * src(1,spin,site) - . + M(col,2,site) * src(2,spin,site) - . + M(col,3,site) * src(3,spin,site) - ENDDO - ENDDO - ENDDO - ENDDO - - return - end - -C Hand-tuned version -C Changes: -C o Ordering of array altered to improve layout of data in memory -C o col and spin loops unwound; it was found that unwinding the -C col loop inside the spin loop was marginally faster (by 1.1%) -C o Unwinding both loops was faster than unwinding just one. - - subroutine qcdf2(M, res, src, V, iters) - integer V, iters, i, site - complex*16 M(3,3,V), res(3,2,V), src(3,2,V) - - DO i=1,iters - DO site=1,V - -C col=1, spin=1 - - res(1,1,site) = M(1,1,site) * src(1,1,site) - . + M(1,2,site) * src(2,1,site) - . + M(1,3,site) * src(3,1,site) - -C col=2, spin=1 - - res(2,1,site) = M(2,1,site) * src(1,1,site) - . + M(2,2,site) * src(2,1,site) - . + M(2,3,site) * src(3,1,site) - -C col=3, spin=1 - - res(3,1,site) = M(3,1,site) * src(1,1,site) - . + M(3,2,site) * src(2,1,site) - . + M(3,3,site) * src(3,1,site) - -C col=1, spin=2 - - res(1,2,site) = M(1,1,site) * src(1,2,site) - . + M(1,2,site) * src(2,2,site) - . + M(1,3,site) * src(3,2,site) - -C col=2, spin=2 - - res(2,2,site) = M(2,1,site) * src(1,2,site) - . + M(2,2,site) * src(2,2,site) - . + M(2,3,site) * src(3,2,site) - -C col=3, spin=2 - - res(3,2,site) = M(3,1,site) * src(1,2,site) - . + M(3,2,site) * src(2,2,site) - . + M(3,3,site) * src(3,2,site) - - ENDDO - ENDDO - - return - end - diff --git a/benchmarks/quinlan.cpp b/benchmarks/quinlan.cpp deleted file mode 100644 index 3d246a63..00000000 --- a/benchmarks/quinlan.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include - -using namespace blitz; - -template -void sink(T&) { } - -void foo() -{ - Vector A(100), B(100); - - A = B + B; - sink(A); - sink(B); -} - - diff --git a/benchmarks/stencil.cpp b/benchmarks/stencil.cpp deleted file mode 100644 index b923373f..00000000 --- a/benchmarks/stencil.cpp +++ /dev/null @@ -1,453 +0,0 @@ -// Array stencil benchmark - -#include -#include -#include -#include -#include - -using namespace blitz; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define stencilf stencilf_ - #define stencilftiled stencilftiled_ - #define stencilf90 stencilf90_ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define stencilf STENCILF - #define stencilftiled STENCILFTILED - #define stencilf90 STENCILF90 -#endif - -extern "C" { - void stencilf(double* A, double* B, int& N, int& iters); - void stencilftiled(double* A, double* B, int& N, int& iters); - void stencilf90(double* A, double* B, int& N, int& iters); -} - -#ifdef FORTRAN_90 -void stencilFortran90Version(BenchmarkExt& bench); -#endif -void stencilFortran77Version(BenchmarkExt& bench); -void stencilFortran77VersionTiled(BenchmarkExt& bench); -void stencilBlitzVersion(BenchmarkExt& bench); -void stencilBlitzExpressionVersion(BenchmarkExt& bench); -void stencilBlitzProductVersion(BenchmarkExt& bench); -void stencilBlitzProductVersion2(BenchmarkExt& bench); -void stencilBlitzProductVersion3(BenchmarkExt& bench); -void stencilBlitzStencilVersion(BenchmarkExt& bench); -void stencilBlitzIndexVersion(BenchmarkExt& bench); - -int main() -{ - int numBenchmarks = 10; -#ifndef FORTRAN_90 - numBenchmarks--; // No fortran 90 -#endif - - BenchmarkExt bench("Array stencil", numBenchmarks); - - const int numSizes = 28; - - bench.setNumParameters(numSizes); - bench.setRateDescription("Mflops/s"); - - Vector parameters(numSizes); - Vector iters(numSizes); - Vector flops(numSizes); - - for (int i=0; i < numSizes; ++i) - { - parameters[i] = (i+1) * 8; - iters[i] = 32*8*8*8/(i+1)/(i+1)/(i+1)/4; - if (iters[i] < 2) - iters[i] = 2; - int npoints = parameters[i] - 2; - flops[i] = npoints * npoints * npoints * 7 * 2; - } - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - - bench.beginBenchmarking(); -#ifdef FORTRAN_90 - stencilFortran90Version(bench); -#endif - stencilBlitzVersion(bench); - stencilBlitzStencilVersion(bench); - stencilBlitzExpressionVersion(bench); - stencilBlitzProductVersion(bench); - stencilBlitzProductVersion2(bench); - stencilBlitzProductVersion3(bench); - stencilBlitzIndexVersion(bench); - stencilFortran77Version(bench); - stencilFortran77VersionTiled(bench); - bench.endBenchmarking(); - - bench.saveMatlabGraph("stencil.m","plot"); - - return 0; -} - -void initializeRandomDouble(double* data, int numElements, int stride = 1) -{ - static Random rnd; - - for (int i=0; i < numElements; ++i) - data[i*stride] = rnd.random(); -} - -void stencilBlitzVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++ Range Expr"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Blitz++: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array A(N,N,N), B(N,N,N); - initializeRandomDouble(A.data(), N*N*N, A.stride(thirdDim)); - initializeRandomDouble(B.data(), N*N*N, B.stride(thirdDim)); - TinyVector size = N-2; - generateFastTraversalOrder(size); - double c = 1/7.; - - bench.start(); - for (long i=0; i < iters; ++i) - { - Range I(1,N-2), J(1,N-2), K(1,N-2); - - A(I,J,K) = c * (B(I,J,K) + B(I+1,J,K) + B(I-1,J,K) + B(I,J+1,K) - + B(I,J-1,K) + B(I,J,K+1) + B(I,J,K-1)); - - B(I,J,K) = c * (A(I,J,K) + A(I+1,J,K) + A(I-1,J,K) + A(I,J+1,K) - + A(I,J-1,K) + A(I,J,K+1) + A(I,J,K-1)); - } - bench.stop(); - } - - bench.endImplementation(); -} - -BZ_DECLARE_STENCIL_OPERATOR1(test1,B) -return (1./7) * ( (*B) + B.shift(1,0) + B.shift(-1,0) + B.shift(1,1) - + B.shift(-1,1) + B.shift(1,2) + B.shift(-1,2)); -BZ_END_STENCIL_OPERATOR - -BZ_ET_STENCIL(test1, double, double,shape(-1,-1,-1),shape(1,1,1)) - -BZ_DECLARE_STENCIL2(test1stencil,A,B) - A=test1(B); -BZ_END_STENCIL_WITH_SHAPE(shape(-1,-1,-1),shape(1,1,1)) - -void stencilBlitzExpressionVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++ StencilOp"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Blitz++ Stencil Operator: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array A(N,N,N), B(N,N,N); - initializeRandomDouble(A.data(), N*N*N, A.stride(thirdDim)); - initializeRandomDouble(B.data(), N*N*N, B.stride(thirdDim)); - TinyVector size = N-2; - generateFastTraversalOrder(size); - double c = 1/7.; - - bench.start(); - for (long i=0; i < iters; ++i) - { - Range I(1,N-2), J(1,N-2), K(1,N-2); - A(I,J,K) = test1(B); - - B(I,J,K) = test1(A); - } - bench.stop(); - } - - bench.endImplementation(); -} - -void stencilBlitzProductVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++ product"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Blitz++ Stencil Operator on product: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array A(N,N,N), B(N,N,N),C(N,N,N); - initializeRandomDouble(A.data(), N*N*N, A.stride(thirdDim)); - initializeRandomDouble(B.data(), N*N*N, B.stride(thirdDim)); - TinyVector size = N-2; - generateFastTraversalOrder(size); - double c = 1/7.; - - bench.start(); - for (long i=0; i < iters; ++i) - { - Range I(1,N-2), J(1,N-2), K(1,N-2); - C=B*B; - A(I,J,K) = test1(C); - C=A*A; - B(I,J,K) = test1(C); - } - bench.stop(); - } - - bench.endImplementation(); -} - -void stencilBlitzProductVersion2(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++ product w alloc"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Blitz++ Stencil Operator on product: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array A(N,N,N), B(N,N,N); - initializeRandomDouble(A.data(), N*N*N, A.stride(thirdDim)); - initializeRandomDouble(B.data(), N*N*N, B.stride(thirdDim)); - TinyVector size = N-2; - generateFastTraversalOrder(size); - double c = 1/7.; - - bench.start(); - for (long i=0; i < iters; ++i) - { - Range I(1,N-2), J(1,N-2), K(1,N-2); - { - ArrayC(B*B); - A(I,J,K) = test1(C); - } - { - ArrayC(A*A); - B(I,J,K) = test1(C); - } - } - bench.stop(); - } - - bench.endImplementation(); -} - -void stencilBlitzProductVersion3(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++ product expr"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Blitz++ Stencil Operator on product expr: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array A(N,N,N), B(N,N,N); - initializeRandomDouble(A.data(), N*N*N, A.stride(thirdDim)); - initializeRandomDouble(B.data(), N*N*N, B.stride(thirdDim)); - TinyVector size = N-2; - generateFastTraversalOrder(size); - double c = 1/7.; - - bench.start(); - for (long i=0; i < iters; ++i) - { - Range I(1,N-2), J(1,N-2), K(1,N-2); - A(I,J,K) = test1(B*B); - - B(I,J,K) = test1(A*A); - } - bench.stop(); - } - - bench.endImplementation(); -} - -void stencilBlitzIndexVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++ Indexed StencilOp"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Blitz++ Indexed Stencil Operator: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array A(N,N,N), B(N,N,N); - initializeRandomDouble(A.data(), N*N*N, A.stride(thirdDim)); - initializeRandomDouble(B.data(), N*N*N, B.stride(thirdDim)); - TinyVector size = N-2; - generateFastTraversalOrder(size); - double c = 1/7.; - - bench.start(); - for (long i=0; i < iters; ++i) - { - Range I(1,N-2), J(1,N-2), K(1,N-2); - A(I,J,K) = test1(B(tensor::i, tensor::j, tensor::k)); - - B(I,J,K) = test1(A(tensor::i, tensor::j, tensor::k)); - } - bench.stop(); - } - - bench.endImplementation(); -} - -void stencilBlitzStencilVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++ Stencil"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Blitz++ Stencil: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array A(N,N,N), B(N,N,N); - initializeRandomDouble(A.data(), N*N*N, A.stride(thirdDim)); - initializeRandomDouble(B.data(), N*N*N, B.stride(thirdDim)); - TinyVector size = N-2; - generateFastTraversalOrder(size); - double c = 1/7.; - - ; bench.start(); - for (long i=0; i < iters; ++i) - { - Range I(1,N-2), J(1,N-2), K(1,N-2); - applyStencil(test1stencil(),A,B); - applyStencil(test1stencil(),B,A); - } - bench.stop(); - } - - bench.endImplementation(); -} - -void stencilFortran77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 77: N = " << N << endl; - cout.flush(); - - int iters = (int)bench.getIterations(); - - size_t arraySize = size_t(N) * size_t(N) * N; - - double* A = new double[arraySize]; - double* B = new double[arraySize]; - - initializeRandomDouble(A, arraySize); - initializeRandomDouble(B, arraySize); - - bench.start(); - stencilf(A, B, N, iters); - bench.stop(); - - delete [] A; - delete [] B; - } - - bench.endImplementation(); -} - -void stencilFortran77VersionTiled(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77 (tiled)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 77: N = " << N << endl; - cout.flush(); - - int iters = (int)bench.getIterations(); - - size_t arraySize = size_t(N) * size_t(N) * N; - - double* A = new double[arraySize]; - double* B = new double[arraySize]; - - initializeRandomDouble(A, arraySize); - initializeRandomDouble(B, arraySize); - - bench.start(); - stencilftiled(A, B, N, iters); - bench.stop(); - - delete [] A; - delete [] B; - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void stencilFortran90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 90: N = " << N << endl; - cout.flush(); - - int iters = (int)bench.getIterations(); - - size_t arraySize = size_t(N) * size_t(N) * N; - - double* A = new double[arraySize]; - double* B = new double[arraySize]; - - initializeRandomDouble(A, arraySize); - initializeRandomDouble(B, arraySize); - - bench.start(); - stencilf90(A, B, N, iters); - bench.stop(); - - delete [] A; - delete [] B; - } - - bench.endImplementation(); -} -#endif diff --git a/benchmarks/stencilf.f b/benchmarks/stencilf.f deleted file mode 100644 index 44f4ea04..00000000 --- a/benchmarks/stencilf.f +++ /dev/null @@ -1,32 +0,0 @@ - subroutine stencilf(A, B, N, iters) - integer N, iters - double precision A(N,N,N), B(N,N,N) - integer i,j,k,z - double precision c - c = 1 / 7. - - do z=1,iters - - do k=2,N-1 - do j=2,N-1 - do i=2,N-1 - A(i,j,k) = c * (B(i,j,k) + B(i+1,j,k) + B(i-1,j,k) - . + B(i,j+1,k) + B(i,j-1,k) + B(i,j,k+1) + B(i,j,k-1)) - enddo - enddo - enddo - - do k=2,N-1 - do j=2,N-1 - do i=2,N-1 - B(i,j,k) = c * (A(i,j,k) + A(i+1,j,k) + A(i-1,j,k) - . + A(i,j+1,k) + A(i,j-1,k) + A(i,j,k+1) + A(i,j,k-1)) - enddo - enddo - enddo - - enddo - - return - end - diff --git a/benchmarks/stencilf2.f b/benchmarks/stencilf2.f deleted file mode 100644 index 400dadb1..00000000 --- a/benchmarks/stencilf2.f +++ /dev/null @@ -1,48 +0,0 @@ -! Tiled fortran stencil - - SUBROUTINE stencilftiled(A, B, N, iters) - INTEGER N, iters - DOUBLE PRECISION A(N,N,N), B(N,N,N) - INTEGER z - DOUBLE PRECISION c - c = 1 / 7. - - DO z=1,iters - CALL tiledStencil(A, B, N, c) - CALL tiledStencil(B, A, N, c) - ENDDO - - RETURN - END - - SUBROUTINE tiledStencil(A, B, N, c) - INTEGER N - DOUBLE PRECISION A(N,N,N), B(N,N,N) - DOUBLE PRECISION c - INTEGER i,j,k,bi,bj,bk,ni,nj,nk - INTEGER blockSize - - blockSize = 16 - - DO bi=2,N-1,blockSize - DO bj=2,N-1,blockSize - DO bk=2,N-1,blockSize - ni = min(bi+blockSize-1,N-1) - nj = min(bj+blockSize-1,N-1) - nk = min(bk+blockSize-1,N-1) - - DO k=bk,nk - DO j=bj,nj - DO i=bi,ni - A(i,j,k) = c * (B(i,j,k) + B(i+1,j,k) + B(i-1,j,k) - . + B(i,j+1,k) + B(i,j-1,k) + B(i,j,k+1) + B(i,j,k-1)) - ENDDO - ENDDO - ENDDO - ENDDO - ENDDO - ENDDO - - RETURN - END - diff --git a/benchmarks/stencilf90.f90 b/benchmarks/stencilf90.f90 deleted file mode 100644 index 08d787b4..00000000 --- a/benchmarks/stencilf90.f90 +++ /dev/null @@ -1,19 +0,0 @@ -SUBROUTINE stencilf90(A, B, n, iters) - IMPLICIT NONE - INTEGER, INTENT( IN ) :: n, iters - DOUBLE PRECISION, DIMENSION (n,n,n) :: A, B - DOUBLE PRECISION :: c - INTEGER :: count - - c = 1 / 7. - - DO count=1,iters - A(2:N-1,2:N-1,2:N-1) = c * (B(2:N-1,2:N-1,2:N-1) + B(3:N,2:N-1,2:N-1) & - + B(1:N-2,2:N-1,2:N-1) + B(2:N-1,3:N,2:N-1) + B(2:N-1,1:N-2,2:N-1) & - + B(2:N-1,2:N-1,3:N) + B(2:N-1,2:N-1,1:N-2)) - - B(2:N-1,2:N-1,2:N-1) = c * (A(2:N-1,2:N-1,2:N-1) + A(3:N,2:N-1,2:N-1) & - + A(1:N-2,2:N-1,2:N-1) + A(2:N-1,3:N,2:N-1) + A(2:N-1,1:N-2,2:N-1) & - + A(2:N-1,2:N-1,3:N) + A(2:N-1,2:N-1,1:N-2)) - END DO -END SUBROUTINE diff --git a/benchmarks/stenciln.cpp b/benchmarks/stenciln.cpp deleted file mode 100644 index 0a9ad655..00000000 --- a/benchmarks/stenciln.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include - -using namespace blitz; - -BZ_DECLARE_STENCIL4(acoustic2D_stencil,P1,P2,P3,c) - P3 = 2 * P2 + c * Laplacian2D(P2) - P1; -BZ_END_STENCIL - -int benchmark(int N, int nIterations, int blockSize) -{ - Array P1, P2, P3, c; - allocateArrays(shape(N,N), P1, P2, P3, c); - - // Initial conditions: obviously in a real application these - // wouldn't be zeroed... - Range I(0,blockSize-1), J(0,blockSize-1); - - P1(I,J) = 0; - P2(I,J) = 0; - P3(I,J) = 0; - c(I,J) = 0; - - for (int i=0; i < nIterations; ++i) - { - // Apply the stencil object to the arrays - applyStencil(acoustic2D_stencil(), P1(I,J), P2(I,J), P3(I,J), c(I,J)); - - // Set [P1,P2,P3] <- [P2,P3,P1] to set up for the next - // time step - cycleArrays(P1,P2,P3); - } - - return 0; -} - -int main() -{ - Timer timer; - - cout << "N\tMflops" << endl; - - const int blockSize = 27; - - for (int N=2000; N < 2100; ++N) - { - double stencilPoints = pow(blockSize-2,2.0); - int nIterations = 5000; - - timer.start(); - benchmark(N, nIterations, blockSize); - timer.stop(); - - double flops = (4 + 7) * stencilPoints * nIterations; - double Mflops = flops / timer.elapsedSeconds() / 1.0E+6; - cout << N << "\t" << Mflops << endl; - } - - return 0; -} - diff --git a/benchmarks/tiny3.cpp b/benchmarks/tiny3.cpp deleted file mode 100644 index d87c35de..00000000 --- a/benchmarks/tiny3.cpp +++ /dev/null @@ -1,128 +0,0 @@ -#include -#include -#include - -using namespace blitz; - -template -void optimizationSink(T&); - -int main() -{ - TinyMatrix A1, A2; - TinyVector b1, b2, c1, c2; - Timer timer; - - const int iters = 9000000; - - for (int i=0; i < 3; ++i) - { - for (int j=0; j < 3; ++j) - A1(i,j) = 1.0; - b1(i) = 1.0; - b2(i) = 1.0; - } - - optimizationSink(A1); - optimizationSink(A2); - optimizationSink(b1); - optimizationSink(b2); - - timer.start(); - for (int i=0; i < iters; ++i) - { - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - } - - timer.stop(); - - double ops = 64.0 * iters; - double flops = ops * 15; - double seconds = timer.elapsedSeconds(); - - double timePerOp = seconds / ops; - double cycles = timePerOp * (100.0 * 1e+6); - - cout << "ops = " << ops << endl - << "seconds = " << seconds << endl; - - cout << "timePerOp = " << timePerOp << endl - << "cycles = " << cycles << endl; - - double Mflops = flops / seconds / 1.0e+6; - cout << "Mflops = " << Mflops << endl; - - optimizationSink(c1); - optimizationSink(c2); - - return 0; -} - -template -void optimizationSink(T&) -{ -} - diff --git a/benchmarks/tinydaxpy.cpp b/benchmarks/tinydaxpy.cpp deleted file mode 100644 index 40499c39..00000000 --- a/benchmarks/tinydaxpy.cpp +++ /dev/null @@ -1,124 +0,0 @@ -// TinyVector DAXPY benchmark - -//#define BZ_DISABLE_KCC_COPY_PROPAGATION_KLUDGE - -#include -#include -#include - -using namespace blitz; - -ranlib::Uniform rnd; - -template -void optimizationSink(T&); - -template -void tinyDAXPYBenchmark(TinyVector, int iters, double a) -{ - Timer timer; - - TinyVector ta, tb, tc, td, te, tf, tg, th, ti, tj; - for (int i=0; i < N_rank; ++i) - { - ta[i] = rnd.random()+1; - tb[i] = rnd.random()+1; - tc[i] = rnd.random()+1; - td[i] = rnd.random()+1; - te[i] = rnd.random()+1; - tf[i] = rnd.random()+1; - tg[i] = rnd.random()+1; - th[i] = rnd.random()+1; - ti[i] = rnd.random()+1; - tj[i] = rnd.random()+1; - } - - double b = -a; - - double numFlops = 0; - - if (N_rank < 20) - { - timer.start(); - for (int i=0; i < iters; ++i) - { - ta += a * tb; - tc += a * td; - te += a * tf; - tg += a * th; - ti += a * tj; - tb += b * ta; - td += b * tc; - tf += b * te; - th += b * tg; - tj += b * ti; - ta += a * tb; - tc += a * td; - te += a * tf; - tg += a * th; - ti += a * tj; - tb += b * ta; - td += b * tc; - tf += b * te; - th += b * tg; - tj += b * ti; - } - timer.stop(); - numFlops = 40.0 * N_rank * double(iters); - } - else { - timer.start(); - for (int i=0; i < iters; ++i) - { - ta += a * tb; - tb += b * ta; - } - timer.stop(); - numFlops = 4.0 * N_rank * double(iters); - } - - optimizationSink(ta); - optimizationSink(tb); - optimizationSink(tc); - optimizationSink(td); - optimizationSink(te); - optimizationSink(tf); - optimizationSink(tg); - optimizationSink(th); - optimizationSink(ti); - optimizationSink(tj); - - timer.stop(); - float Gflops = numFlops / (1e9*timer.elapsed()); - - if (iters > 1) - { - cout << setw(5) << N_rank << '\t' << Gflops << endl; - } -} - -double a = 0.3429843; - -template -void optimizationSink(T&) -{ -} - -int main() -{ - cout << "TinyVector DAXPY benchmark" << endl - << setw(5) << "N" << '\t' << "Gflops/" << Timer::indep_var() << endl; - tinyDAXPYBenchmark(TinyVector(), 800000, a); - tinyDAXPYBenchmark(TinyVector(), 800000, a); - tinyDAXPYBenchmark(TinyVector(), 800000, a); - tinyDAXPYBenchmark(TinyVector(), 700000, a); - tinyDAXPYBenchmark(TinyVector(), 600000, a); - tinyDAXPYBenchmark(TinyVector(), 500000, a); - tinyDAXPYBenchmark(TinyVector(), 500000, a); - tinyDAXPYBenchmark(TinyVector(), 500000, a); - tinyDAXPYBenchmark(TinyVector(), 500000, a); - tinyDAXPYBenchmark(TinyVector(), 500000, a); - - return 0; -} - diff --git a/blitz/CMakeLists.txt b/blitz/CMakeLists.txt index 1524030e..ff3f792a 100644 --- a/blitz/CMakeLists.txt +++ b/blitz/CMakeLists.txt @@ -1,4 +1,15 @@ -include(CheckCXXFeatures) +# Ubuntu 20.04LTS versions for compilers + +set(GCC_MINIMAL 9.3) +set(CLANG_MINIMAL 10.0) + +if (${CMAKE_CXX_COMPILER_ID} STREQUAL GNU AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS ${GCC_MINIMAL}) + message(FATAL_ERROR "GCC g++ version >= ${GCC_MINIMAL} needed.") +endif () + +if (${CMAKE_CXX_COMPILER_ID} STREQUAL Clang AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS ${CLANG_MINIMAL}) + message(FATAL_ERROR "Clang version >= ${CLANG_MINIMAL} needed.") +endif () option(BZ_FULLY64BIT "Enable 64 dimensions with > 2^31 elements (NOT IMPLEMENTED)" OFF) option(BZ_THREADSAFE "Enable Blitz thread-safety features" OFF) @@ -55,16 +66,7 @@ endif() set(BLITZ_EXTRA_LIBRARIES ${EXTRA_LIBS} PARENT_SCOPE) set(DEPS ${DEP_PKGS} LIBRARIES ${DEP_LIBS} PARENT_SCOPE) -if (NOT CHECKED_COMPILER_CXX_FEATURES) - CHECK_ALL_CXX_FEATURES(BZ_) - CHECK_HEADER(HAVE_INTTYPES_H inttypes.h) - set(BZ_MATH_ABSINT_IN_NAMESPACE_STD ${BZ_HAVE_MATH_ABSINT_IN_NAMESPACE_STD}) - set(BZ_MATH_FN_IN_NAMESPACE_STD ${BZ_HAVE_MATH_FN_IN_NAMESPACE_STD}) - set(BZ_ISNAN_IN_NAMESPACE_STD ${BZ_HAVE_ISNAN_IN_NAMESPACE_STD}) - set(BZ_HAVE_NCEG_RESTRICT_EGCS ${BZ_HAVE_RESTRICT_EGCS}) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.cmake.h.in ${BLITZ_CONFIG_FILE}) - set(CHECKED_COMPILER_CXX_FEATURES TRUE CACHE INTERNAL "Gating variable to avoid checking compiler features multiple times") -endif() +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.cmake.h.in ${BLITZ_CONFIG_FILE}) macro(GENERATE_BLITZ_HEADER header prog var) add_custom_target(${header} ALL) diff --git a/blitz/config.cmake.h.in b/blitz/config.cmake.h.in index 3f1a255f..6b87ecb3 100644 --- a/blitz/config.cmake.h.in +++ b/blitz/config.cmake.h.in @@ -5,13 +5,13 @@ // from the file config.cmake.h.in. // Macro for declaring aligned variables. -#cmakedefine BZ_HAVE_ALIGNEMENT_DIRECTIVE_WINDOWS_STYLE +/* #undef BZ_HAVE_ALIGNEMENT_DIRECTIVE_WINDOWS_STYLE */ #ifdef BZ_HAVE_ALIGNEMENT_DIRECTIVE_WINDOWS_STYLE #define BZ_ALIGN_VARIABLE(vartype,varname,alignment) __declspec(align(alignment)) vartype varname; #undef BZ_HAVE_ALIGNEMENT_DIRECTIVE_WINDOWS_STYLE #endif -#cmakedefine BZ_HAVE_ALIGNEMENT_DIRECTIVE_GCC_STYLE +/* #undef BZ_HAVE_ALIGNEMENT_DIRECTIVE_GCC_STYLE */ #ifdef BZ_HAVE_ALIGNEMENT_DIRECTIVE_GCC_STYLE #define BZ_ALIGN_VARIABLE(vartype,varname,alignment) vartype __attribute__ ((aligned (alignment))) varname; #undef BZ_HAVE_ALIGNEMENT_DIRECTIVE_GCC_STYLE @@ -22,259 +22,259 @@ #endif // Enable dimensions with > 2^31 elements (NOT IMPLEMENTED). -#cmakedefine BZ_FULLY64BIT +/* #undef BZ_FULLY64BIT */ // define if bool is a built-in type -#cmakedefine BZ_HAVE_BOOL +#define BZ_HAVE_BOOL // define if the Boost library is available #cmakedefine BZ_HAVE_BOOST // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_BOOST_MPI +/* #undef BZ_HAVE_BOOST_MPI */ // define if the Boost::Serialization library is available #cmakedefine BZ_HAVE_BOOST_SERIALIZATION // define if the compiler has header -#cmakedefine BZ_HAVE_CLIMITS +#define BZ_HAVE_CLIMITS // define if the compiler has complex -#cmakedefine BZ_HAVE_COMPLEX +#define BZ_HAVE_COMPLEX // define if the compiler has standard complex functions -#cmakedefine BZ_HAVE_COMPLEX_FCNS +#define BZ_HAVE_COMPLEX_FCNS // define if the compiler has complex math functions -#cmakedefine BZ_HAVE_COMPLEX_MATH1 +#define BZ_HAVE_COMPLEX_MATH1 // define if the compiler has more complex math functions -#cmakedefine BZ_HAVE_COMPLEX_MATH2 +/* #undef BZ_HAVE_COMPLEX_MATH2 */ // define if complex math functions are in namespace std -#cmakedefine BZ_HAVE_COMPLEX_MATH_IN_NAMESPACE_STD +#define BZ_HAVE_COMPLEX_MATH_IN_NAMESPACE_STD // define if the compiler supports const_cast<> -#cmakedefine BZ_HAVE_CONST_CAST +#define BZ_HAVE_CONST_CAST // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_CSTRING +#define BZ_HAVE_CSTRING // define if the compiler supports default template parameters -#cmakedefine BZ_HAVE_DEFAULT_TEMPLATE_PARAMETERS +#define BZ_HAVE_DEFAULT_TEMPLATE_PARAMETERS // Obsolete ? // Define to 1 if you have the header file. -//#cmakedefine BZ_HAVE_DLFCN_H +/* #undef BZ_HAVE_DLFCN_H */ // define if the compiler supports dynamic_cast<> -#cmakedefine BZ_HAVE_DYNAMIC_CAST +#define BZ_HAVE_DYNAMIC_CAST // define if the compiler handle computations inside an enum -#cmakedefine BZ_HAVE_ENUM_COMPUTATIONS +#define BZ_HAVE_ENUM_COMPUTATIONS // define if the compiler handles (int) casts in enum computations -#cmakedefine BZ_HAVE_ENUM_COMPUTATIONS_WITH_CAST +#define BZ_HAVE_ENUM_COMPUTATIONS_WITH_CAST // define if the compiler supports exceptions -#cmakedefine BZ_HAVE_EXCEPTIONS +#define BZ_HAVE_EXCEPTIONS // define if the compiler supports the explicit keyword -#cmakedefine BZ_HAVE_EXPLICIT +#define BZ_HAVE_EXPLICIT // define if the compiler supports explicit template function qualification -#cmakedefine BZ_HAVE_EXPLICIT_TEMPLATE_FUNCTION_QUALIFICATION +#define BZ_HAVE_EXPLICIT_TEMPLATE_FUNCTION_QUALIFICATION // define if the compiler recognizes the full specialization syntax -#cmakedefine BZ_HAVE_FULL_SPECIALIZATION_SYNTAX +#define BZ_HAVE_FULL_SPECIALIZATION_SYNTAX // define if the compiler supports function templates with non-type parameters -#cmakedefine BZ_HAVE_FUNCTION_NONTYPE_PARAMETERS +#define BZ_HAVE_FUNCTION_NONTYPE_PARAMETERS // define if the compiler supports IEEE math library -#cmakedefine BZ_HAVE_IEEE_MATH +#define BZ_HAVE_IEEE_MATH // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_INTTYPES_H +#define BZ_HAVE_INTTYPES_H // Obsolete ? // Define to 1 if you have the `m' library (-lm). -#cmakedefine BZ_HAVE_LIBM +#define BZ_HAVE_LIBM // Define to 1 if you have the `papi' library (-lpapi). -#cmakedefine BZ_HAVE_LIBPAPI +/* #undef BZ_HAVE_LIBPAPI */ // define if the compiler supports member constants -#cmakedefine BZ_HAVE_MEMBER_CONSTANTS +#define BZ_HAVE_MEMBER_CONSTANTS // define if the compiler supports member templates -#cmakedefine BZ_HAVE_MEMBER_TEMPLATES +#define BZ_HAVE_MEMBER_TEMPLATES // define if the compiler supports member templates outside the class // declaration -#cmakedefine BZ_HAVE_MEMBER_TEMPLATES_OUTSIDE_CLASS +#define BZ_HAVE_MEMBER_TEMPLATES_OUTSIDE_CLASS // define if the compiler supports the mutable keyword -#cmakedefine BZ_HAVE_MUTABLE +#define BZ_HAVE_MUTABLE // define if the compiler supports the Numerical C Extensions Group restrict // keyword -#cmakedefine BZ_HAVE_NCEG_RESTRICT +/* #undef BZ_HAVE_NCEG_RESTRICT */ // define if the compiler supports the __restrict__ keyword -#cmakedefine BZ_HAVE_NCEG_RESTRICT_EGCS +#define BZ_HAVE_NCEG_RESTRICT_EGCS // define if the compiler has numeric_limits -#cmakedefine BZ_HAVE_NUMERIC_LIMITS +#define BZ_HAVE_NUMERIC_LIMITS // define if the compiler accepts the old for scoping rules -#cmakedefine BZ_HAVE_OLD_FOR_SCOPING +/* #undef BZ_HAVE_OLD_FOR_SCOPING */ // define if the compiler supports partial ordering -#cmakedefine BZ_HAVE_PARTIAL_ORDERING +#define BZ_HAVE_PARTIAL_ORDERING // define if the compiler supports partial specialization -#cmakedefine BZ_HAVE_PARTIAL_SPECIALIZATION +#define BZ_HAVE_PARTIAL_SPECIALIZATION // define if the compiler supports reinterpret_cast<> -#cmakedefine BZ_HAVE_REINTERPRET_CAST +#define BZ_HAVE_REINTERPRET_CAST // define if the compiler supports Run-Time Type Identification -#cmakedefine BZ_HAVE_RTTI +#define BZ_HAVE_RTTI // define if the compiler has getrusage() function -#cmakedefine BZ_HAVE_RUSAGE +#define BZ_HAVE_RUSAGE // define if the compiler supports static_cast<> -#cmakedefine BZ_HAVE_STATIC_CAST +#define BZ_HAVE_STATIC_CAST // define if the compiler supports ISO C++ standard library -#cmakedefine BZ_HAVE_STD +#define BZ_HAVE_STD // Obsolete ? // Define to 1 if you have the header file. -//#cmakedefine BZ_HAVE_STDINT_H +/* #undef BZ_HAVE_STDINT_H */ // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_STDLIB_H +/* #undef BZ_HAVE_STDLIB_H */ // define if the compiler supports Standard Template Library -#cmakedefine BZ_HAVE_STL +#define BZ_HAVE_STL // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_STRINGS_H +/* #undef BZ_HAVE_STRINGS_H */ // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_STRING_H +/* #undef BZ_HAVE_STRING_H */ // define if the compiler supports System V math library -#cmakedefine BZ_HAVE_SYSTEM_V_MATH +/* #undef BZ_HAVE_SYSTEM_V_MATH */ // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_SYS_STAT_H +/* #undef BZ_HAVE_SYS_STAT_H */ // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_SYS_TYPES_H +/* #undef BZ_HAVE_SYS_TYPES_H */ // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_TBB_ATOMIC_H +/* #undef BZ_HAVE_TBB_ATOMIC_H */ // define if the compiler supports basic templates -#cmakedefine BZ_HAVE_TEMPLATES +#define BZ_HAVE_TEMPLATES // define if the compiler supports templates as template arguments -#cmakedefine BZ_HAVE_TEMPLATES_AS_TEMPLATE_ARGUMENTS +#define BZ_HAVE_TEMPLATES_AS_TEMPLATE_ARGUMENTS // define if the compiler supports use of the template keyword as a qualifier -#cmakedefine BZ_HAVE_TEMPLATE_KEYWORD_QUALIFIER +#define BZ_HAVE_TEMPLATE_KEYWORD_QUALIFIER // define if the compiler supports template-qualified base class specifiers -#cmakedefine BZ_HAVE_TEMPLATE_QUALIFIED_BASE_CLASS +#define BZ_HAVE_TEMPLATE_QUALIFIED_BASE_CLASS // define if the compiler supports template-qualified return types -#cmakedefine BZ_HAVE_TEMPLATE_QUALIFIED_RETURN_TYPE +#define BZ_HAVE_TEMPLATE_QUALIFIED_RETURN_TYPE // define if the compiler supports function matching with argument types which // are template scope-qualified -#cmakedefine BZ_HAVE_TEMPLATE_SCOPED_ARGUMENT_MATCHING +#define BZ_HAVE_TEMPLATE_SCOPED_ARGUMENT_MATCHING // define if the compiler recognizes typename -#cmakedefine BZ_HAVE_TYPENAME +#define BZ_HAVE_TYPENAME // define if the compiler supports the vector type promotion mechanism -#cmakedefine BZ_HAVE_TYPE_PROMOTION +#define BZ_HAVE_TYPE_PROMOTION // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_UNISTD_H +/* #undef BZ_HAVE_UNISTD_H */ // define if the compiler supports numeric traits promotions -#cmakedefine BZ_HAVE_USE_NUMTRAIT +#define BZ_HAVE_USE_NUMTRAIT // define if the compiler has valarray -#cmakedefine BZ_HAVE_VALARRAY +#define BZ_HAVE_VALARRAY // define if the compiler has isnan function in namespace std -#cmakedefine BZ_ISNAN_IN_NAMESPACE_STD +#define BZ_ISNAN_IN_NAMESPACE_STD // define if the compiler has C math abs(integer types) in namespace std -#cmakedefine BZ_MATH_ABSINT_IN_NAMESPACE_STD +#define BZ_MATH_ABSINT_IN_NAMESPACE_STD // define if the compiler has C math functions in namespace std -#cmakedefine BZ_MATH_FN_IN_NAMESPACE_STD +#define BZ_MATH_FN_IN_NAMESPACE_STD // Name of package -#cmakedefine BZ_PACKAGE @BZ_PACKAGE@ +/* #undef BZ_PACKAGE */ // Define to the address where bug reports for this package should be sent. -#cmakedefine BZ_PACKAGE_BUGREPORT @BZ_PACKAGE_BUGREPORT@ +/* #undef BZ_PACKAGE_BUGREPORT */ // Define to the full name of this package. -#cmakedefine BZ_PACKAGE_NAME @BZ_PACKAGE_NAME@ +/* #undef BZ_PACKAGE_NAME */ // Define to the full name and version of this package. -#cmakedefine BZ_PACKAGE_STRING "@BZ_PACKAGE_STRING@" +#define BZ_PACKAGE_STRING " 1.0" // Define to the one symbol short name of this package. -#cmakedefine BZ_PACKAGE_TARNAME @BZ_PACKAGE_TARNAME@ +/* #undef BZ_PACKAGE_TARNAME */ // Define to the home page for this package. -#cmakedefine BZ_PACKAGE_URL @BZ_PACKAGE_URL@ +/* #undef BZ_PACKAGE_URL */ // Define to the version of this package. -#cmakedefine BZ_PACKAGE_VERSION @BZ_PACKAGE_VERSION@ +/* #undef BZ_PACKAGE_VERSION */ // Pad array lengths to SIMD width. -#cmakedefine BZ_PAD_ARRAYS +/* #undef BZ_PAD_ARRAYS */ // Set SIMD instruction width in bytes. -#cmakedefine BZ_SIMD_WIDTH @BZ_SIMD_WIDTH@ +#define BZ_SIMD_WIDTH 1 // Define to 1 if you have the ANSI C header files. -#cmakedefine BZ_STDC_HEADERS +/* #undef BZ_STDC_HEADERS */ // Enable Blitz thread-safety features -#cmakedefine BZ_THREADSAFE +/* #undef BZ_THREADSAFE */ // Use TBB atomic types. -#cmakedefine BZ_THREADSAFE_USE_TBB +/* #undef BZ_THREADSAFE_USE_TBB */ // Specifies whether compiler alignment pragmas should be used. -#cmakedefine BZ_USE_ALIGNMENT_PRAGMAS +/* #undef BZ_USE_ALIGNMENT_PRAGMAS */ // Version number of package -#cmakedefine BZ_VERSION +/* #undef BZ_VERSION */ // CXX -#cmakedefine BZ__compiler_name +/* #undef BZ__compiler_name */ // CXXFLAGS -#cmakedefine BZ__compiler_options +/* #undef BZ__compiler_options */ // date -#cmakedefine BZ__config_date +/* #undef BZ__config_date */ // uname -a -#cmakedefine BZ__os_name +/* #undef BZ__os_name */ // target -#cmakedefine BZ__platform +/* #undef BZ__platform */ diff --git a/cmake/CheckCXXFeatures.cmake b/cmake/CheckCXXFeatures.cmake deleted file mode 100644 index ef8342bc..00000000 --- a/cmake/CheckCXXFeatures.cmake +++ /dev/null @@ -1,116 +0,0 @@ -set(CHECK_CXX_FEATURE_PREFIX "") -MARK_AS_ADVANCED(CHECK_CXX_FEATURE_PREFIX) - -macro(CHECK_CXX_FEATURE feature file message) - message(STATUS "Check whether the compiler ${message}") - try_compile(RESULT ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR}/cmake/cxx_tests/${file} - COMPILE_DEFINITIONS "${CHECK_CXX_FEATURE_DEFINITIONS}" - OUTPUT_VARIABLE OUTPUT) - - if (RESULT) - set (FOUND "found") - set (STATUS "passed") - set(CHECK_CXX_FEATURE_DEFINITIONS "${CHECK_CXX_FEATURE_DEFINITIONS} -D${feature}") - else() - set (FOUND "not found") - set (STATUS "failed") - endif() - message(STATUS "Check whether the compiler ${message} - ${FOUND}") - set("${CHECK_CXX_FEATURE_PREFIX}${feature}" ${RESULT} CACHE INTERNAL "Does the compiler ${message}") - file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log - "Determining if the CXX compiler ${message} ${STATUS} with " - "the following output:\n${OUTPUT}\n\n") -endmacro() - -macro(CHECK_CXX_GENERAL) - CHECK_CXX_FEATURE(HAVE_EXCEPTIONS exceptions.cpp "supports exceptions") - CHECK_CXX_FEATURE(HAVE_RTTI rtti.cpp "supports Run-Time Type Identification") - CHECK_CXX_FEATURE(HAVE_MEMBER_CONSTANTS member_constants.cpp "supports member constants") - CHECK_CXX_FEATURE(HAVE_OLD_FOR_SCOPING old_for_scoping.cpp "accepts the old for scoping rules") -endmacro() - -macro(CHECK_CXX_KEYWORDS) - CHECK_CXX_FEATURE(HAVE_EXPLICIT explicit.cpp "supports the explicit keyword") - CHECK_CXX_FEATURE(HAVE_MUTABLE mutable.cpp "supports the mutable keyword") - CHECK_CXX_FEATURE(HAVE_TYPENAME typename.cpp "recognizes typename") - CHECK_CXX_FEATURE(HAVE_NCEG_RESTRICT nceg_restrict.cpp "supports the Numerical C Extensions Group restrict keyword") - CHECK_CXX_FEATURE(HAVE_RESTRICT_EGCS restrict_egcs.cpp "recognizes the '__restrict__' keyword") - CHECK_CXX_FEATURE(HAVE_BOOL bool.cpp "recognizes bool as a built-in type") -endmacro() - -macro(CHECK_CXX_TYPE_CASTS) - CHECK_CXX_FEATURE(HAVE_CONST_CAST const_cast.cpp "supports const_cast<>") - CHECK_CXX_FEATURE(HAVE_STATIC_CAST static_cast.cpp "supports static_cast<>") - CHECK_CXX_FEATURE(HAVE_REINTERPRET_CAST reinterpret_cast.cpp "supports reinterpret_cast<>") - CHECK_CXX_FEATURE(HAVE_DYNAMIC_CAST dynamic_cast.cpp "supports dynamic_cast<>") -endmacro() - -macro(CHECK_CXX_TEMPLATES_FEATURES) - CHECK_CXX_FEATURE(HAVE_TEMPLATES templates.cpp "supports basic templates") - CHECK_CXX_FEATURE(HAVE_PARTIAL_SPECIALIZATION partial_specialization.cpp "supports partial specialization") - CHECK_CXX_FEATURE(HAVE_PARTIAL_ORDERING partial_ordering.cpp "supports partial ordering") - CHECK_CXX_FEATURE(HAVE_DEFAULT_TEMPLATE_PARAMETERS default_template_parameters.cpp "supports default template parameters") - CHECK_CXX_FEATURE(HAVE_MEMBER_TEMPLATES member_templates.cpp "supports member templates") - CHECK_CXX_FEATURE(HAVE_MEMBER_TEMPLATES_OUTSIDE_CLASS member_templates_outside_class.cpp "supports member templates outside the class declaration") - CHECK_CXX_FEATURE(HAVE_FULL_SPECIALIZATION_SYNTAX full_specialization_syntax.cpp "recognizes the full specialization syntax") - CHECK_CXX_FEATURE(HAVE_FUNCTION_NONTYPE_PARAMETERS function_nontype_parameters.cpp "supports function templates with non-type parameters") - CHECK_CXX_FEATURE(HAVE_TEMPLATE_QUALIFIED_BASE_CLASS template_qualified_base_class.cpp "supports template-qualified base class specifiers") - CHECK_CXX_FEATURE(HAVE_TEMPLATE_QUALIFIED_RETURN_TYPE template_qualified_return_type.cpp "supports template-qualified return types") - CHECK_CXX_FEATURE(HAVE_EXPLICIT_TEMPLATE_FUNCTION_QUALIFICATION explicit_template_function_qualification.cpp "supports explicit template function qualification") - CHECK_CXX_FEATURE(HAVE_TEMPLATES_AS_TEMPLATE_ARGUMENTS templates_as_template_arguments.cpp "supports templates as template arguments") - CHECK_CXX_FEATURE(HAVE_TEMPLATE_KEYWORD_QUALIFIER template_keyword_qualifier.cpp "supports use of the template keyword as a qualifier") - CHECK_CXX_FEATURE(HAVE_TEMPLATE_SCOPED_ARGUMENT_MATCHING template_scoped_argument_matching.cpp "supports function matching with argument types which are template scope-qualified") - CHECK_CXX_FEATURE(HAVE_TYPE_PROMOTION type_promotion.cpp "support the vector type promotion mechanism") - CHECK_CXX_FEATURE(HAVE_USE_NUMTRAIT use_numtrait.cpp "supports numeric traits promotions") - CHECK_CXX_FEATURE(HAVE_ENUM_COMPUTATIONS enum_computations.cpp "handles computations inside an enum") - CHECK_CXX_FEATURE(HAVE_ENUM_COMPUTATIONS_WITH_CAST enum_computations_with_cast.cpp "handles (int) casts in enum computations") -endmacro() - -macro(CHECK_HEADER variable name) - find_path(FOUND ${name} "/usr/include;/usr/include/c++") - if (FOUND) - set(value TRUE) - else() - set(value FALSE) - endif() - set("${CHECK_CXX_FEATURE_PREFIX}${variable}" ${value} CACHE INTERNAL "Whether the header ${name} exists.") -endmacro() - -macro(CHECK_CXX_STANDARD_LIBRARY) - CHECK_CXX_FEATURE(HAVE_COMPLEX have_complex.cpp "has complex") - CHECK_CXX_FEATURE(HAVE_COMPLEX_FCNS have_complex_fcns.cpp "has standard complex functions") - CHECK_HEADER(HAVE_CSTRING cstring) - CHECK_CXX_FEATURE(HAVE_NUMERIC_LIMITS have_numeric_limits.cpp "has numeric_limits") - CHECK_CXX_FEATURE(HAVE_CLIMITS have_climits.cpp "has header") - CHECK_CXX_FEATURE(HAVE_VALARRAY have_valarray.cpp "has valarray") - CHECK_CXX_FEATURE(HAVE_COMPLEX_MATH1 have_complex_math1.cpp "has complex math functions") - CHECK_CXX_FEATURE(HAVE_COMPLEX_MATH2 have_complex_math2.cpp "has more complex math functions") - CHECK_CXX_FEATURE(HAVE_IEEE_MATH have_ieee_math.cpp "supports IEEE math library") - CHECK_CXX_FEATURE(HAVE_SYSTEM_V_MATH have_system_v_math.cpp "supports System V math library") - CHECK_CXX_FEATURE(HAVE_MATH_FN_IN_NAMESPACE_STD math_fn_in_namespace_std.cpp "has C math functions in namespace std") - CHECK_CXX_FEATURE(HAVE_MATH_ABSINT_IN_NAMESPACE_STD math_absint_in_namespace_std.cpp "has C math abs(integer type) in namespace std") - CHECK_CXX_FEATURE(HAVE_COMPLEX_MATH_IN_NAMESPACE_STD complex_math_in_namespace_std.cpp "supports complex math functions are in namespace std") - CHECK_CXX_FEATURE(HAVE_ISNAN_IN_NAMESPACE_STD isnan_in_namespace_std.cpp "has isnan function in namespace std") - CHECK_CXX_FEATURE(HAVE_ISNORMAL_IN_NAMESPACE_STD isnormal_in_namespace_std.cpp "has isnormal function in namespace std") - CHECK_CXX_FEATURE(HAVE_STD have_std.cpp "supports ISO C++ standard library") - CHECK_CXX_FEATURE(HAVE_STL have_stl.cpp "supports Standard Template Library") - CHECK_CXX_FEATURE(HAVE_RUSAGE have_rusage.cpp "has getrusage() function") -endmacro() - -macro(CHECK_ALL_CXX_FEATURES) - set(CHECK_CXX_FEATURE_PREFIX ${ARGN}) - CHECK_CXX_GENERAL() - CHECK_CXX_KEYWORDS() - CHECK_CXX_TYPE_CASTS() - CHECK_CXX_TEMPLATES_FEATURES() - CHECK_CXX_STANDARD_LIBRARY() -endmacro() - -macro(CHECK_ALIGNMENT_DIRECTIVE) - set(CHECK_CXX_FEATURE_PREFIX ${ARGN}) - CHECK_CXX_FEATURE(HAVE_ALIGNEMENT_DIRECTIVE_WINDOWS_STYLE alignment_directive_win.cpp "has windows style alignment directives") - if (NOT ${CHECK_CXX_FEATURE_PREFIX}HAVE_ALIGNEMENT_DIRECTIVE_WINDOWS_STYLE STREQUAL "TRUE") - CHECK_CXX_FEATURE(HAVE_ALIGNEMENT_DIRECTIVE_GCC_STYLE alignment_directive_gcc.cpp "has gcc style alignment directives") - endif() -endmacro() diff --git a/compiler/LEGAL b/compiler/LEGAL deleted file mode 100644 index eae295cf..00000000 --- a/compiler/LEGAL +++ /dev/null @@ -1,8 +0,0 @@ -This compiler test suite is (C) 1997 Todd Veldhuizen. Permission is -granted to use this test suite for non-commercial purposes only. This -suite may be redistributed so long as no fee is charged, and all the -files in the original distribution are included intact. - -If you wish to use this suite for a commercial project (i.e. testing -ISO/ANSI C++ standard compliance for a compiler), please contact me -for licensing information at . diff --git a/compiler/README b/compiler/README deleted file mode 100644 index a276f1dc..00000000 --- a/compiler/README +++ /dev/null @@ -1,22 +0,0 @@ - Blitz++ Compiler Feature Tests - - http://monet.uwaterloo.ca/blitz/compilers/ - -August 1997 - -This tar file (bzconfig.tar.gz) contains a set of small programs to -test your compiler's support of new C++ language features. Some of -these features are necessary for the Blitz++ library; others aren't. - -Run the script bzconfig by invoking bash (or sh) on it: -bash ./bzconfig - -After asking a few questions about invoking your compiler, the script -will compile and run a series of small C++ programs. The results of -the tests are written to a file config.h. - -* If you have problems getting the script to work, try setting - "verbose=1" or "set -x" at the beginning of the script file. - -Thanks, -Todd diff --git a/compiler/bool.cpp b/compiler/bool.cpp deleted file mode 100644 index 81274155..00000000 --- a/compiler/bool.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// bool treated as distinct type -// BZ_BOOL - -int foo(int x) -{ - return 1; -} - -int foo(char x) -{ - return 1; -} - -int foo(bool x) -{ - return 0; -} - -int main() -{ - bool c = true; - return foo(c); -} - diff --git a/compiler/bzconfig b/compiler/bzconfig deleted file mode 100755 index cecb1022..00000000 --- a/compiler/bzconfig +++ /dev/null @@ -1,377 +0,0 @@ -#! /bin/sh -# -# Evaluate C++ compiler implementation, to determine which kludges the -# Blitz++ library should use. -# -# With thanks to Tom Keffer (Rogue Wave Software) and Larry Wall -# -# $Id$ - -# If you need to debug this script, try uncommenting the -# next line -# set -x - -# Alternately, try setting verbose=1 on the next line -verbose=0 - -# If your platform generates .OBJ instead of .o files, you'll need -# to edit the next line -objextension='o' - -if test ! -t 0; then - echo "Use 'sh bzconfig', not 'sh < bzconfig'" - exit 1 -fi - -# clean="rm -f a.out core bztemp bztest bzjunk.cpp bzjunk.o bzjunk.ii bzjunk.int.c bzjunk.s" -clean="rm -f core" -trap '$clean; exit 1' 1 2 3 15 - -# Information about the system: -link='ln -s' - -# Information about invoking the compiler -cppinvoke='' -extension='cpp' -special='' -srcdir='.' -install=0 -interactive=1 - -# Don't bother using symbolic links, just copy - -# Does the system support symbolic links? -# echo "foo" >test.1 -# ln -s test.1 test.2 >>bztemp 2>&1 -# if grep foo test.2 >>bztemp 2>&1; then -# copy='ln -s' -# else -# copy='cp' -# fi -# rm -f test.1 test.2 -copy='cp' - -# Determine if we're running in interactive mode, or if the -# arguments were passed on the command line - -ac_prev= -for ac_option -do - case "$ac_option" in - -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;; - *) ac_optarg= ;; - esac - - case "$ac_option" in - --compiler=*) - cppinvoke="$ac_optarg" - interactive=0 ;; - --flags=*) - special="$ac_optarg" ;; - --srcdir=*) - srcdir="$ac_optarg" ;; - --extension=*) - extension="$ac_optarg" ;; - --install) - install=1 ;; - --help) - cat << EOF -Usage: bzconfig [options] -Options: - --compiler=PROGRAM Compiler invokation (cc, KCC, g++, vacbld, etc.) - You will be prompted if this option is missing. - --flags=FLAGS Flags for the compiler. Multiple flags can be - indicated using quotes, e.g. --flags="-x -g" - --extension=EXT Extension for C++ programs (cpp, C, cxx) - Defaults to cpp - --install Automatically install the resulting config.h - file to ../blitz/config.h (default is not to) -EOF - exit 0;; - -*) { echo "bzconfig: error: $ac_option: invalid option; use --help to show usage" 1>&2; exit 1; } - ;; - esac -done - -cat << 'EOH' - -Blitz++ compiler evaluation - -This script will test your compiler to determine which language -features it supports. - -EOH - -if test $interactive -eq 1; then - -echo "Running in interactive mode (the --compiler option was not specified)" - -echo "What is the command to invoke your C++ compiler? " -read cppinvoke - -echo " " -echo "Suggested flags:" -echo "KCC: -x --restrict" -echo "KCC under Linux: -x --restrict -D__signed__=" -echo "SGi: -n32 -experimental" -echo " " -echo "* If your compiler does not recognize the new ISO C++ keyword" -echo " \"typename,\" you should compile with -DBZ_NO_TYPENAME" -echo "* If your compiler does not implement namespaces, you should" -echo " compile with -DBZ_NO_NAMESPACES" -echo "* If your compiler needs special flags for exceptions and RTTI, don't " -echo " bother -- Blitz++ doesn't use these features, although this suite does " -echo " test for them." -echo " " -echo "Any special compile flags? (ENTER for none) " -read special - -echo " " -echo "I am assuming your compiler recognizes .cpp extensions. If not," -echo "start bzconfig again and use the --extension=EXT option." -fi - - -case "$cppinvoke" in - vacbld) - vacbld=1 - echo Using special setup for vacbld. - ;; - *) - vacbld=0 - ;; -esac - -echo " " -echo "I am now going to try a simple program." -cat <<'EOP' >bzjunk.$extension -int main() { return 0; } -EOP - -if test $vacbld -eq 1; then - echo Checking vacbld... - rm -f bzjunk - if vacbld $special $srcdir/vac.icc >>bztemp 2>&1 && - test -x bzjunk - then - echo vacbld ran successfully. - else - echo "Hmmm.. I was unable to compile a simple program." - echo "The command used was:" - echo "vacbld $special $srcdir/vac.icc" - $clean - exit 1 - fi -else -if test $verbose -eq 1; then - echo $cppinvoke $special -c bzjunk.$extension - echo test -f bzjunk.$objextension -fi - -if $cppinvoke $special -c bzjunk.$extension && # >>bztemp 2>&1 && - test -f bzjunk.$objextension -then - echo "Okay, it compiled. But will it link?" -else - echo "Hmmm.. I was unable to compile a simple program." - echo "The command line I used was:" - echo "$cppinvoke $special -c bzjunk.$extension" - echo "If your platform uses .OBJ instead of .o files, you'll need" - echo "to edit the bzconfig script and set objextension correctly." - $clean - exit 1 -fi - -if test $verbose -eq 1; then - echo $cppinvoke $special bzjunk.$objextension -o bzjunk - echo test -x bzjunk - echo sh -c ./bzjunk -fi - -if $cppinvoke $special bzjunk.$objextension -o bzjunk -lm >>bztemp 2>&1 && - sh -c ./bzjunk >>bztemp 2>&1 -then - echo "Yes, it linked too. Great." - echo " " - rm -f bzjunk.$extension bzjunk.o bzjunk -else - echo "No, I could compile, but couldn't link (or couldn't execute" - echo "the resulting file." - echo "The command line I used was:" - echo "$cppinvoke $special bzjunk.$objextension -o bzjunk -lm" - $clean - exit 1 -fi -fi - -######################################################################## - -echo " " -echo "Okay, now the fun begins." -echo " " - -rm -f config.h logfile -cat << 'EOH' >config.h -/****************************************************************************** - * config.h Compiler language support flags - * - * This file was generated automatically by the script bzconfig. - * You should rerun bzconfig each time you switch compilers, install new - * standard libraries, or change compiler versions. - * - */ - -EOH - -echo " " >>config.h -echo "#ifndef BZ_CONFIG_H" >>config.h -echo "#define BZ_CONFIG_H" >>config.h -echo " " >>config.h -echo "#define BZ_COMPILER_NAME \"$cppinvoke\"" >>config.h -echo "#define BZ_COMPILER_OPTIONS \"$special\"" >>config.h -echo "#define BZ_OS_NAME \"`uname -s -r`\"" >>config.h -echo "#define BZ_BZCONFIG_DATE \"`date`\"" >>config.h -echo "#define BZ_PLATFORM \"`$srcdir/../config.guess`\"" >>config.h -echo " " >>config.h - -# Set up a little script to make this easier -echo \#\!/bin/sh > bztest -echo cppinvoke=\"$cppinvoke\" >> bztest -echo special=\"$special\" >> bztest -echo copy=\"$copy\" >> bztest -echo extension=\"$extension\" >> bztest -echo verbose=\"$verbose\" >> bztest -echo srcdir=\"$srcdir\" >> bztest -cat << 'EOSC' >>bztest -# set -x -echo " " -echo $3 -echo " " >>logfile -echo " " >>logfile -echo " " >>logfile -echo $3 >>logfile -echo $cppinvoke $special $srcdir/$2 >>logfile - -rm -f bzjunk bzjunk.$extension bzjunk.o -if test $verbose -eq 1; then echo $copy $srcdir/$2 bzjunk.$extension; fi - -$copy $srcdir/$2 bzjunk.$extension - -if test $verbose -eq 1; then - echo $cppinvoke $special bzjunk.$extension -o bzjunk -fi - -case $cppinvoke in - vacbld) - build="vacbld $special vac.icc" - ;; - *) - build="$cppinvoke $special bzjunk.$extension -o bzjunk -lm" - ;; -esac - -if $build >>logfile 2>&1 && - test -x bzjunk && - sh -c ./bzjunk >>bztemp 2>&1 -then - echo "Yes." - echo "#define $1" >>config.h - echo "Success: $1" >>logfile -else - echo "Nope." - echo "#undef $1" >>config.h - echo "Failed: $1" >>logfile -fi -EOSC -chmod +x bztest - -# Major language features -./bztest BZ_NAMESPACES namespac.cpp "Does your compiler implement namespaces?" -./bztest BZ_EXCEPTIONS except.cpp "What about exceptions?" -./bztest BZ_RTTI rtti.cpp "Run-Time Type Identification?" -./bztest BZ_MEMBER_CONSTANTS membcnst.cpp "Member constants?" -./bztest BZ_OLD_FOR_SCOPING oldfor.cpp "Does your compiler cling to the old 'for' scoping rules?" - -# New keywords -echo " " -echo "Now for some of the new keywords." -./bztest BZ_EXPLICIT explicit.cpp "How about the 'explicit' keyword?" -./bztest BZ_MUTABLE mutable.cpp "What about the 'mutable' keyword?" -./bztest BZ_TYPENAME typename.cpp "Does your compiler recognize 'typename'?" -./bztest BZ_NCEG_RESTRICT restrict.cpp "Just on the off chance... the NCEG 'restrict' keyword?" -./bztest BZ_NCEG_RESTRICT_EGCS restric2.cpp "Maybe it recognizes __restrict__?" -./bztest BZ_BOOL bool.cpp "Does it recognize bool as a built-in type?" - -# Typecasting -echo " " -echo "Does your compiler understand the newfangled casting syntax?" -./bztest BZ_CONST_CAST constcst.cpp "What about const_cast<>?" -./bztest BZ_STATIC_CAST statcast.cpp "static_cast<>?" -./bztest BZ_REINTERPRET_CAST reinterp.cpp "reinterpret_cast<>?" -./bztest BZ_DYNAMIC_CAST dynamic.cpp "dynamic_cast<>?" - -# Templates (most important) -echo " " -echo "Okay, now the important stuff -- templates." - -./bztest BZ_TEMPLATES template.cpp "Will it handle basic templates? (If not, just give up now.)" -./bztest BZ_PARTIAL_SPECIALIZATION partial.cpp "Partial specialization?" -./bztest BZ_PARTIAL_ORDERING porder.cpp "Partial ordering?" -./bztest BZ_DEFAULT_TEMPLATE_PARAMETERS default.cpp "Default template parameters?" -./bztest BZ_MEMBER_TEMPLATES membtmpl.cpp "Member templates?" -./bztest BZ_MEMBER_TEMPLATES_OUTSIDE_CLASS membtmp2.cpp "Member templates outside the class declaration?" -./bztest BZ_FULL_SPECIALIZATION_SYNTAX fullspec.cpp "Does it recognize the full specialization syntax?" -./bztest BZ_FUNCTION_NONTYPE_PARAMETERS nontype.cpp "Function templates with non-type parameters?" -./bztest BZ_TEMPLATE_QUALIFIED_BASE_CLASS elabbase.cpp "Template-qualified base class specifiers?" -./bztest BZ_TEMPLATE_QUALIFIED_RETURN_TYPE elabret.cpp "Template-qualified return types (necessary for vector type promotion)?" -./bztest BZ_EXPLICIT_TEMPLATE_FUNCTION_QUALIFICATION tempqual.cpp "Explicit template function qualification?" -./bztest BZ_TEMPLATES_AS_TEMPLATE_ARGUMENTS temptemp.cpp "Templates as template arguments?" -./bztest BZ_TEMPLATE_KEYWORD_QUALIFIER tempkey.cpp "Use of the template keyword as a qualifier?" -./bztest BZ_TEMPLATE_SCOPED_ARGUMENT_MATCHING tempqmt.cpp "Function matching with argument types which are template scope-qualified?" -./bztest BZ_TYPE_PROMOTION promote.cpp "Will it support the vector type promotion mechanism?" -./bztest BZ_USE_NUMTRAIT numtrait.cpp "Numeric traits promotions (sum type, etc.)?" -./bztest BZ_ENUM_COMPUTATIONS enumcomp.cpp "Can your compiler handle computations inside an enum?" -./bztest BZ_ENUM_COMPUTATIONS_WITH_CAST enumcmp2.cpp "Does it handle (int) casts in enum computations?" - -# Standard library -echo " " -echo "Which library features does your compiler provide?" -./bztest BZ_HAVE_COMPLEX complex.cpp "Does it have complex?" -./bztest BZ_HAVE_NUMERIC_LIMITS numlimit.cpp "Does it have numeric_limits?" -./bztest BZ_HAVE_CLIMITS climits.cpp "Does it have ?" -./bztest BZ_HAVE_VALARRAY valarray.cpp "Does it have valarray?" -./bztest BZ_HAVE_COMPLEX_MATH compmath.cpp "Complex math functions?" -./bztest BZ_HAVE_IEEE_MATH ieeemath.cpp "IEEE Math library?" -./bztest BZ_HAVE_SYSTEM_V_MATH sysvmath.cpp "System V Math library?" -./bztest BZ_MATH_FN_IN_NAMESPACE_STD mathscop.cpp "Are C math functions in and std::?" -./bztest BZ_COMPLEX_MATH_IN_NAMESPACE_STD cmthscop.cpp "Are complex math functions in std::?" -./bztest BZ_HAVE_STD std.cpp "ISO C++ Standard library?" -./bztest BZ_HAVE_STL stl.cpp "Standard template library?" -./bztest BZ_HAVE_RUSAGE getruse.cpp "What about getrusage()?" -# Clean up -$clean - -echo " " >>config.h -echo "#endif // BZ_CONFIG_H" >>config.h - -echo " " -echo The results have been written to the file config.h. - -if test $interactive -eq 1; then - -echo You should now copy this file to the location of the Blitz++ header -echo files, overwriting the current version of "". -echo " " -echo If you have installed this library in the usual fashion, the command -echo you should run is: -echo " " -echo cp config.h ../blitz -echo " " - -fi - -echo If you\'re curious about which tests passed and failed and why, see -echo this file: -ls -l logfile - - diff --git a/compiler/climits.cpp b/compiler/climits.cpp deleted file mode 100644 index e96d28a2..00000000 --- a/compiler/climits.cpp +++ /dev/null @@ -1,10 +0,0 @@ -// has the header? - -#include - -int main() -{ - int i = INT_MIN; - return 0; -} - diff --git a/compiler/cmthscop.cpp b/compiler/cmthscop.cpp deleted file mode 100644 index 63c7626f..00000000 --- a/compiler/cmthscop.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// Where are those pesky math functions? -// BZ_COMPLEX_MATH_IN_NAMESPACE_STD - -#include - -namespace blitz { - using namespace std; - - complex pow(complex x, complex y) - { return std::pow(x,y); } -}; - -int main() -{ - using namespace blitz; - complex x = 1.0, y = 1.0; - blitz::pow(x,y); - return 0; -} - diff --git a/compiler/complex.cpp b/compiler/complex.cpp deleted file mode 100644 index 7f050f56..00000000 --- a/compiler/complex.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// complex class - -#include - -#ifndef BZ_NO_NAMESPACES -using namespace std; -#endif - -int main() -{ - complex a; - complex b; - return 0; -} - diff --git a/compiler/compmath.cpp b/compiler/compmath.cpp deleted file mode 100644 index 678e5652..00000000 --- a/compiler/compmath.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// BZ_HAVE_COMPLEX_MATH1 -// Complex math functions, as per 26.2.7 of the Jan'96 draft standard -#include - -#ifndef BZ_NO_NAMESPACES -using namespace std; -#endif - -int main() -{ - complex x(1.0, 1.0), y(1.0, 1.0); - - real(x); - imag(x); - abs(x); - arg(x); - norm(x); - conj(x); - polar(1.0,1.0); - - cos(x); - cosh(x); - exp(x); - log(x); - log10(x); - pow(x,1); - pow(x,double(2.0)); - pow(x, y); - pow(double(2.0), x); - sin(x); - sinh(x); - sqrt(x); - tan(x); - tanh(x); - - return 0; -} - diff --git a/compiler/constcst.cpp b/compiler/constcst.cpp deleted file mode 100644 index 737c4cef..00000000 --- a/compiler/constcst.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// const_cast - - -int main() -{ - int x = 0; - const int& y = x; - - int& z = const_cast(y); - z = 3; - if (x == 3) - return 0; - - return 1; -} - diff --git a/compiler/cstd.cpp b/compiler/cstd.cpp deleted file mode 100644 index 1b12e2ab..00000000 --- a/compiler/cstd.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include - -int main() -{ - return 0; -} - diff --git a/compiler/default.cpp b/compiler/default.cpp deleted file mode 100644 index 6e9acb1f..00000000 --- a/compiler/default.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// Default template parameters -// BZ_DEFAULT_TEMPLATE_PARAMETERS - -template -class foo { -public: - int bar() const - { return 0; } -}; - -int main() -{ - foo z; - return z.bar(); -} - diff --git a/compiler/dynamic.cpp b/compiler/dynamic.cpp deleted file mode 100644 index c8eb1488..00000000 --- a/compiler/dynamic.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include - -class Dog { -public: - Dog() { } - virtual void fetch() = 0; -}; - -class Dalmation : public Dog { -public: - Dalmation() { } - virtual void fetch(); -}; - -void Dalmation::fetch() -{ -} - -int main() -{ - Dalmation cairo; - Dog& doggie = cairo; - - if (dynamic_cast(&doggie)) - { - return 0; - } - - return 1; -} - diff --git a/compiler/elabbase.cpp b/compiler/elabbase.cpp deleted file mode 100644 index 776d1e22..00000000 --- a/compiler/elabbase.cpp +++ /dev/null @@ -1,45 +0,0 @@ -#ifdef BZ_NO_TYPENAME - #define typename -#endif - - -// Template-qualified base class specifier -// BZ_TEMPLATE_QUALIFIED_BASE_CLASS - -class base1 { -public: - int bar() const - { return 1; } -}; - -class base2 { -public: - int bar() const - { return 0; } -}; - -template -struct base_trait { - typedef base1 base; -}; - -template<> -struct base_trait { - typedef base2 base; -}; - -template -class weird : public base_trait::base { -public: - typedef typename base_trait::base base; - - int zowee() const - { return this->bar(); } -}; - -int main() -{ - weird z; - return z.zowee(); -} - diff --git a/compiler/elabret.cpp b/compiler/elabret.cpp deleted file mode 100644 index d5a7824f..00000000 --- a/compiler/elabret.cpp +++ /dev/null @@ -1,39 +0,0 @@ -#ifdef BZ_NO_TYPENAME - #define typename -#endif - - -// Template-qualified return type, necessary for type promotion on vectors -// BZ_TEMPLATE_QUALIFIED_RETURN_TYPE - -template -struct promote_trait { - typedef X T; -}; - - -template<>struct promote_trait { - typedef float T; -}; - -template -class Vector { -public: - Vector() { } -}; - -template -Vector::T> operator+(const Vector&, - const Vector&) -{ - return Vector::T>(); -} - -int main() -{ - Vector x; - Vector y; - Vector z = x + y; - return 0; -} - diff --git a/compiler/enumcmp2.cpp b/compiler/enumcmp2.cpp deleted file mode 100644 index eaf7bb44..00000000 --- a/compiler/enumcmp2.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// BZ_ENUM_COMPUTATIONS_WITH_CAST - -struct foo { - enum { a = 5, b = 7, c = 2 }; -}; - -struct bar { - enum { a = 1, b = 6, c = 9 }; -}; - -template -struct Z { - enum { a = ((int)T1::a > (int)T2::a) ? (int)T1::a : (int)T2::b, - b = (int)T1::b + (int)T2::b, - c = ((int)T1::c * (int)T2::c + (int)T2::a + (int)T1::a) - }; -}; - -int main() -{ - if (((int)Z::a == 5) && ((int)Z::b == 13) - && ((int)Z::c == 24)) - return 0; - else - return 1; -} - diff --git a/compiler/enumcomp.cpp b/compiler/enumcomp.cpp deleted file mode 100644 index dc34b9f4..00000000 --- a/compiler/enumcomp.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// BZ_ENUM_COMPUTATIONS - -struct foo { - enum { a = 5, b = 7, c = 2 }; -}; - -struct bar { - enum { a = 1, b = 6, c = 9 }; -}; - -template -struct Z { - enum { a = (T1::a > T2::a) ? T1::a : T2::b, - b = T1::b + T2::b, - c = (T1::c * T2::c + T2::a + T1::a) - }; -}; - -int main() -{ - if (((int)Z::a == 5) && ((int)Z::b == 13) - && ((int)Z::c == 24)) - return 0; - else - return 1; -} - diff --git a/compiler/except.cpp b/compiler/except.cpp deleted file mode 100644 index 09ba54a3..00000000 --- a/compiler/except.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// Exceptions -// BZ_EXCEPTIONS - -#include - -class foo { }; - -int divide(int a, int b) -{ - if (b == 0) - throw foo(); - - return a / b; -} - -int main() -{ - try { - divide(5,0); - } - catch(foo x) { - return 0; - } - - return 1; -} - diff --git a/compiler/explicit.cpp b/compiler/explicit.cpp deleted file mode 100644 index 7b90d4f5..00000000 --- a/compiler/explicit.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// 'explicit' keyword -// BZ_EXPLICIT - -class vector { - public: - explicit vector(double) - { } -}; - -int main() -{ - double c = 5.0; - vector x(c); - return 0; -} - diff --git a/compiler/fullspec.cpp b/compiler/fullspec.cpp deleted file mode 100644 index b852ae5a..00000000 --- a/compiler/fullspec.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// Special syntax for full specialization - -template -class foo { -public: - int bar() const - { return 1; } -}; - -template<> -class foo { -public: - int bar() const - { return 0; } -}; - -int main() -{ - foo z; - return z.bar(); -} - diff --git a/compiler/getruse.cpp b/compiler/getruse.cpp deleted file mode 100644 index 8fed74dc..00000000 --- a/compiler/getruse.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include - -int main() -{ - struct rusage resUsage; - getrusage(RUSAGE_SELF, &resUsage); - return 0; -} diff --git a/compiler/ieeemath.cpp b/compiler/ieeemath.cpp deleted file mode 100644 index dd7415f2..00000000 --- a/compiler/ieeemath.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// BZ_HAVE_IEEE_MATH - -#if !defined(__GNUC__) - #ifndef _ALL_SOURCE - #define _ALL_SOURCE - #endif - - #ifndef _XOPEN_SOURCE - #define _XOPEN_SOURCE - #endif - - #ifndef _XOPEN_SOURCE_EXTENDED - #define _XOPEN_SOURCE_EXTENDED 1 - #endif -#endif - -#include - -// finite and trunc have been removed: -// blitz-bugs/archive/0189.html - -int main() -{ - double x = 1.0; - // double y = 1.0; - - acosh(x); - asinh(x); - atanh(x); - cbrt(x); - erf(x); - erfc(x); - expm1(x); - // finite(x); - ilogb(x); - isnan(x); - j0(x); - j1(x); -// lgamma function has different interface under AIX in threaded mode -#if !(defined(_AIX) && defined(_THREAD_SAFE)) - lgamma(x); -#endif - logb(x); - log1p(x); - rint(x); - // trunc(x); - y0(x); - y1(x); - - return 0; -} - diff --git a/compiler/instant.cpp b/compiler/instant.cpp deleted file mode 100644 index a037fb2b..00000000 --- a/compiler/instant.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// Explicit template instantiation -// BZ_EXPLICIT_TEMPLATE_INSTANTIATION - -template -class Vector { -public: - Vector() { } -}; - -template class Vector; - -int main() -{ - return 0; -} - diff --git a/compiler/mathscop.cpp b/compiler/mathscop.cpp deleted file mode 100644 index 9bda92bc..00000000 --- a/compiler/mathscop.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// Where are those pesky math functions? -// BZ_MATH_FN_IN_NAMESPACE_STD - -#include - -namespace blitz { - double pow(double x, double y) - { return std::pow(x,y); } -}; - -int main() -{ - using namespace blitz; - double x = 1.0, y = 1.0; - blitz::pow(x,y); - return 0; -} - - diff --git a/compiler/membcnst.cpp b/compiler/membcnst.cpp deleted file mode 100644 index c92c64ff..00000000 --- a/compiler/membcnst.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// Member constants - -class Foo { -public: - static const int value = 0; -}; - -const int Foo::value; - -int main() -{ - return Foo::value; -} - diff --git a/compiler/membtmp2.cpp b/compiler/membtmp2.cpp deleted file mode 100644 index 708db734..00000000 --- a/compiler/membtmp2.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// Test member function templates #2: declaration of member templates outside -// the class. -// BZ_MEMBER_TEMPLATES_OUTSIDE_CLASS - -template -class Foo { - -public: - template - Foo operator=(const Foo& z); -}; - -template template -Foo Foo::operator=(const Foo& z) -{ - return Foo(); -} - -int main() -{ - Foo x; - Foo y; - x = y; - - return 0; -} - diff --git a/compiler/membtmpl.cpp b/compiler/membtmpl.cpp deleted file mode 100644 index 9ccfb5de..00000000 --- a/compiler/membtmpl.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// Test member function templates -// BZ_MEMBER_TEMPLATES - -template -class Foo { - -public: - template - Foo operator=(const Foo&); -}; - -template template -Foo Foo::operator=(const Foo& z) -{ - return Foo(); -} - -int main() -{ - Foo x; - Foo y; - x = y; - - return 0; -} - diff --git a/compiler/mutable.cpp b/compiler/mutable.cpp deleted file mode 100644 index fe357844..00000000 --- a/compiler/mutable.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// 'mutable' keyword -// BZ_MUTABLE - -class num { - -public: - num(int z) - { - x_ = z; - numReads_ = 0; - } - - void set(int z) - { x_ = z; } - - int get() const - { - ++numReads_; - return x_; - } - -private: - int x_; - mutable int numReads_; -}; - -int main() -{ - num q(4); - q.set(5); - int k = q.get(); - q.get(); - return 0; -} - diff --git a/compiler/namespac.cpp b/compiler/namespac.cpp deleted file mode 100644 index fc66161c..00000000 --- a/compiler/namespac.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// BZ_NAMESPACES - -namespace computers { - -class keyboard { - public: - int getkey() const; -}; - -int keyboard::getkey() const -{ - return 0; -} - -} - -namespace music { - -class keyboard { - public: - void playNote(int note); -}; - -} - -namespace music { - -void keyboard::playNote(int note) -{ -} - -namespace foo { - template void Xeg(T) { } -} - -} - -using namespace computers; - -int main() -{ - keyboard x; - int z = x.getkey(); - - music::keyboard y; - y.playNote(z); - - using namespace music::foo; - Xeg(z); - - return 0; -} - diff --git a/compiler/nontype.cpp b/compiler/nontype.cpp deleted file mode 100644 index 8f5d800c..00000000 --- a/compiler/nontype.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// Test function templates with non-type parameters -// BZ_FUNCTION_NONTYPE_PARAMETERS - -template -class Foo { -}; - -template -void showFoo(const Foo& x) -{ -} - -int main() -{ - Foo z; - showFoo(z); - return 0; -} - - diff --git a/compiler/numlimit.cpp b/compiler/numlimit.cpp deleted file mode 100644 index ab8ea1dc..00000000 --- a/compiler/numlimit.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// numeric_limits class - -#include - -#ifndef BZ_NO_NAMESPACES -using namespace std; -#endif - -int main() -{ - double e = numeric_limits::epsilon(); - return 0; -} - diff --git a/compiler/numtrait.cpp b/compiler/numtrait.cpp deleted file mode 100644 index b0a8db0e..00000000 --- a/compiler/numtrait.cpp +++ /dev/null @@ -1,37 +0,0 @@ -#ifdef BZ_NO_TYPENAME - #define typename -#endif - - -// BZ_USE_NUMTRAIT - -template -class SumType { -public: - typedef T_numtype T_sumtype; -}; - -template<> -class SumType { -public: - typedef int T_sumtype; -}; - -template -class Vector { -}; - -template -Vector::T_sumtype> -sum(Vector) -{ - return Vector::T_sumtype>(); -} - -int main() -{ - Vector x; - sum(x); - return 0; -} - diff --git a/compiler/oldfor.cpp b/compiler/oldfor.cpp deleted file mode 100644 index ef3c34a8..00000000 --- a/compiler/oldfor.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// Old 'for' scoping rules - -int main() -{ - long z = 0; - - for (int i=0; i < 10; ++i) - { - z = z + i; - } - - z = i; - - return 0; -} - diff --git a/compiler/partial.cpp b/compiler/partial.cpp deleted file mode 100644 index 35fbc38c..00000000 --- a/compiler/partial.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// Partial specialization -// BZ_PARTIAL_SPECIALIZATION - -template -class foo { -public: - enum bar { z = 0 }; -}; - -template -class foo { -public: - enum bar { z = 1 }; -}; - -template -class foo { -public: - enum bar { z = 2 }; -}; - -int main() -{ - if ((foo::z == 0) && (foo::z == 1) - && (foo::z == 2)) - return 0; - else - return 1; -} - diff --git a/compiler/porder.cpp b/compiler/porder.cpp deleted file mode 100644 index 848d401f..00000000 --- a/compiler/porder.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// Partial ordering of member templates -// BZ_PARTIAL_ORDERING - -template -struct I { -}; - -template -struct A { - - int r; - - template - void operator()(T1, T2) - { r = 0; } - - template - void operator()(I, I) - { r = 1; } -}; - -int main() -{ - A x; - I<0> a; - I<1> b; - - x(a,b); - if (x.r != 1) - return 1; - - x(float(), double()); - if (x.r != 0) - return 1; - - return 0; -} - diff --git a/compiler/promote.cpp b/compiler/promote.cpp deleted file mode 100644 index 291d1d0b..00000000 --- a/compiler/promote.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef HAVE_TYPENAME - #define typename -#endif - -template -struct vec3 { T data_[3]; }; - -template -struct promote_trait { typedef T1 T_promote; }; -template <> -struct promote_trait { typedef double T_promote; }; - -template -vec3::T_promote> -operator+(const vec3& a, const vec3& b) { - vec3::T_promote> c; - c.data_[0] = a.data_[0] + b.data_[0]; - c.data_[1] = a.data_[1] + b.data_[1]; - c.data_[2] = a.data_[2] + b.data_[2]; - return c; -} - -int main() { - vec3 a,b; - vec3 c,d,e; - b=a+a; - d=c+c; - e=b+d; - return 0; -} diff --git a/compiler/reinterp.cpp b/compiler/reinterp.cpp deleted file mode 100644 index 3ecd46db..00000000 --- a/compiler/reinterp.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// Reinterpret cast - -#include - -class Dog { -public: - Dog() { } - virtual void fetch() = 0; -}; - -class Dalmation : public Dog { -public: - Dalmation() { } - virtual void fetch(); -}; - -void Dalmation::fetch() -{ -} - -class Unrelated { -public: - Unrelated() { } - -}; - -void foo(Unrelated&) -{ } - -int main() -{ - Dalmation cairo; - Dog& dog = cairo; - Unrelated& eek = reinterpret_cast(dog); - foo(eek); - return 0; -} - diff --git a/compiler/restric2.cpp b/compiler/restric2.cpp deleted file mode 100644 index a177638a..00000000 --- a/compiler/restric2.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// egcs support for restrict, but as "__restrict__" -// BZ_NCEG_RESTRIC2 - -void add(int length, double * __restrict__ a, const double * __restrict__ b, - const double * __restrict__ c) -{ - for (int i=0; i < length; ++i) - a[i] = b[i] + c[i]; -} - -int main() -{ - double a[10], b[10], c[10]; - for (int i=0; i < 10; ++i) - { - a[i] = 0.; - b[i] = 0.; - c[i] = 0.; - } - - add(10,a,b,c); - return 0; -} - diff --git a/compiler/restrict.cpp b/compiler/restrict.cpp deleted file mode 100644 index ba566241..00000000 --- a/compiler/restrict.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// Numerical C Extensions Group (NCEG) keyword 'restrict' -// BZ_NCEG_RESTRICT - -void add(int length, double * restrict a, const double * restrict b, - const double * restrict c) -{ - for (int i=0; i < length; ++i) - a[i] = b[i] + c[i]; -} - -int main() -{ - double a[10], b[10], c[10]; - for (int i=0; i < 10; ++i) - { - a[i] = 0.; - b[i] = 0.; - c[i] = 0.; - } - - add(10,a,b,c); - return 0; -} - diff --git a/compiler/rtti.cpp b/compiler/rtti.cpp deleted file mode 100644 index feb77b5b..00000000 --- a/compiler/rtti.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include - -class Dog { -public: - Dog() { } - virtual void fetch() = 0; -}; - -class Dalmation : public Dog { -public: - Dalmation() { } - virtual void fetch(); -}; - -void Dalmation::fetch() -{ -} - -int main() -{ - Dalmation z; - Dog* y = &z; - - if (typeid(*y) == typeid(Dalmation)) - { - return 0; - } - - return 1; -} - diff --git a/compiler/statcast.cpp b/compiler/statcast.cpp deleted file mode 100644 index 407b2593..00000000 --- a/compiler/statcast.cpp +++ /dev/null @@ -1,34 +0,0 @@ -// static_cast - -#include - -class Dog { -public: - Dog() { } - virtual void fetch() = 0; -}; - -class Dalmation : public Dog { -public: - Dalmation() { } - virtual void fetch(); -}; - -void Dalmation::fetch() -{ -} - -void foo(Dalmation&) -{ } - -int main() -{ - Dalmation cairo; - Dog& dog = cairo; - - Dalmation& spotted = static_cast(dog); - foo(spotted); - - return 0; -} - diff --git a/compiler/std.cpp b/compiler/std.cpp deleted file mode 100644 index 0b6af372..00000000 --- a/compiler/std.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include -#include -#include - -#ifndef BZ_NO_NAMESPACES -using namespace std; -#endif - -int main() -{ - return 0; -} - diff --git a/compiler/stl.cpp b/compiler/stl.cpp deleted file mode 100644 index d4f1ffbf..00000000 --- a/compiler/stl.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include - -#ifndef BZ_NO_NAMESPACES -using namespace std; -#endif - -int main() -{ - list x; - x.push_back(5); - x.push_back(10); - - int sum = 0; - - for (list::iterator iter = x.begin(); - iter != x.end(); ++iter) - { - sum += *iter; - } - - if (sum != 15) - return 1; - - return 0; -} - diff --git a/compiler/sysvmath.cpp b/compiler/sysvmath.cpp deleted file mode 100644 index 0faab481..00000000 --- a/compiler/sysvmath.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// BZ_HAVE_SYSTEM_V_MATH - -#ifndef _ALL_SOURCE - #define _ALL_SOURCE -#endif - -#ifndef _XOPEN_SOURCE - #define _XOPEN_SOURCE -#endif - -#ifndef _XOPEN_SOURCE_EXTENDED - #define _XOPEN_SOURCE_EXTENDED 1 -#endif - -#include - -int main() -{ - double x = 1.0; - double y = 1.0; - - _class(x); - itrunc(x); - nearest(x); - rsqrt(x); - uitrunc(x); - - copysign(x,y); - drem(x,y); - hypot(x,y); - nextafter(x,y); - remainder(x,y); - scalb(x,y); - unordered(x,y); - - return 0; -} - diff --git a/compiler/tempkey.cpp b/compiler/tempkey.cpp deleted file mode 100644 index 2444676d..00000000 --- a/compiler/tempkey.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// 'template' keyword qualifier -// BZ_TEMPLATE_KEYWORD_QUALIFIER - -class Foo { -public: - Foo() { }; - template static T convert() { return T(); } -}; - -template -double f() { - return Foo::template convert(); -} - -int main() -{ - double z = f(); - return 0; -} - diff --git a/compiler/template.cpp b/compiler/template.cpp deleted file mode 100644 index 9c958ce0..00000000 --- a/compiler/template.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Basic templates - -template -class Vector { -public: - Vector() { } -}; - -template -void foo(const Vector& ) -{ } - -int main() -{ - Vector x; - Vector z; - foo(x); - foo(z); - return 0; -} - diff --git a/compiler/tempqmt.cpp b/compiler/tempqmt.cpp deleted file mode 100644 index 5f17ffaf..00000000 --- a/compiler/tempqmt.cpp +++ /dev/null @@ -1,29 +0,0 @@ -#ifdef BZ_NO_TYPENAME - #define typename -#endif - -// BZ_TEMPLATE_SCOPED_ARGUMENT_MATCHING -template -class A { -public: - typedef X W; -}; - -template -class B { - -}; - -template -void operator+(B d1, typename Y::W d2) -{ -} - -int main() -{ - B > z; - z + 0.5f; // match +(B>, A::W) - // +(B>, float) ...? - return 0; -} - diff --git a/compiler/tempqual.cpp b/compiler/tempqual.cpp deleted file mode 100644 index 123b5ee9..00000000 --- a/compiler/tempqual.cpp +++ /dev/null @@ -1,23 +0,0 @@ -// Explicit template function qualification -// BZ_EXPLICIT_TEMPLATE_FUNCTION_QUALIFICATION - -template -class Vector { -public: - Vector() { } -}; - - -template -Vector to(const Vector&) -{ - return Vector(); -} - -int main() -{ - Vector x; - Vector y = to(x); - return 0; -} - diff --git a/compiler/temptemp.cpp b/compiler/temptemp.cpp deleted file mode 100644 index 303db27e..00000000 --- a/compiler/temptemp.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// Templates as template arguments -// BZ_TEMPLATES_AS_TEMPLATE_ARGUMENTS - -template -class allocator { -public: - allocator() { }; -}; - -template class T_alloc> -class foo { -public: - foo() { } - -private: - T_alloc alloc_; -}; - -int main() -{ - foo x; - return 0; -} - diff --git a/compiler/typename.cpp b/compiler/typename.cpp deleted file mode 100644 index bb1046a0..00000000 --- a/compiler/typename.cpp +++ /dev/null @@ -1,12 +0,0 @@ -template -class X { -public: - X() { } -}; - -int main() -{ - X z; - return 0; -} - diff --git a/compiler/vac.icc b/compiler/vac.icc deleted file mode 100644 index 56280593..00000000 --- a/compiler/vac.icc +++ /dev/null @@ -1,8 +0,0 @@ -option -link(debug) -{ - target "bzjunk" - { - source type(cpp) "bzjunk.cpp" - } -} diff --git a/compiler/valarray.cpp b/compiler/valarray.cpp deleted file mode 100644 index b6b9970d..00000000 --- a/compiler/valarray.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// valarray class - -#include - -#ifndef BZ_NO_NAMESPACES -using namespace std; -#endif - -int main() -{ - valarray x(100); - return 0; -} - diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt deleted file mode 100644 index 7085c54c..00000000 --- a/doc/CMakeLists.txt +++ /dev/null @@ -1,91 +0,0 @@ -add_subdirectory(examples) -add_subdirectory(stencils) - -add_custom_target(blitz-doc-prep DEPENDS stencils doc-examples) -add_custom_target(blitz-doc) - -add_subdirectory(doxygen) - -set(STENCILS - backward11.texi backward12.texi backward21.texi backward22.texi backward31.texi backward32.texi backward41.texi backward42.texi - central12.texi central14.texi central22.texi central24.texi central32.texi central34.texi central42.texi central44.texi - forward11.texi forward12.texi forward21.texi forward22.texi forward31.texi forward32.texi forward41.texi forward42.texi - Laplacian2D4.texi Laplacian2D.texi) - -foreach(i ${STENCILS}) - set(TEXINFOS ${TEXINFOS} stencil/${i}) -endforeach() - -set(EXAMPLES - cast.texi debug.texi dump.texi fixed-point.texi fixed.texi io.texi outer.texi output.texi range.texi simple.texi slicing.texi - storage.texi strideslice.texi xor.texi) - -set(OUTPUTS - cast.out debug.out dump.out fixed.out io.out outer.out output.out - range.out simple.out slicing.out storage.out strideslice.out xor.out) - -foreach(i ${EXAMPLES} ${OUTPUTS}) - set(TEXINFOS ${TEXINFOS} examples/${i}) -endforeach() - -set(TEXINFOS - about.texi arrays-ctors.texi arrays-debug.texi arrays-expr.texi arrays-globals.texi arrays-indirect.texi arrays-intro.texi - arrays-io.texi arrays-members.texi arrays-multi.texi arrays-slicing.texi arrays-stencils.texi arrays-storage.texi arrays-types.texi - arrays-usertype.texi compiling.texi constants.texi download.texi faq.texi help.texi install.texi legal.texi numinquire.texi - parallel.texi platforms.texi random.texi tau.texi tinymatrix.texi tinyvector.texi tuning.texi copyright.texi) - -set(TEXI_SRC ${CMAKE_CURRENT_SOURCE_DIR}/blitz.texi) -set(TEXI_PROG_ARGS -I ${CMAKE_CURRENT_SOURCE_DIR} -I ${CMAKE_CURRENT_BINARY_DIR}) - -find_program(MAKEINFO makeinfo) -mark_as_advanced(MAKEINFO) -if (MAKEINFO) - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/blitz.info - COMMAND ${MAKEINFO} --no-split ${TEXI_PROG_ARGS} ${TEXI_SRC} - DEPENDS blitz-doc-prep ${TEXI_SRC}) - add_custom_target(info DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/blitz.info) - add_dependencies(blitz-doc info) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/blitz.info DESTINATION ${CMAKE_INSTALL_INFODIR}) -endif() - -find_program(TEXI2HTML texi2html - ${CYGWIN_INSTALL_PATH}/bin /bin /usr/bin /usr/local/bin /sbin) -mark_as_advanced(TEXI2HTML) - -if (TEXI2HTML) - set(HTML_DIR ${CMAKE_CURRENT_BINARY_DIR}/html) - file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html) - add_custom_target(html ${TEXI2HTML} ${TEXI_PROG_ARGS} --split=chapter ${TEXI_SRC} - WORKING_DIRECTORY ${HTML_DIR} - DEPENDS blitz-doc-prep - SOURCES ${TEXI_SRC}) - add_dependencies(blitz-doc html) - install(DIRECTORY ${HTML_DIR} DESTINATION ${CMAKE_INSTALL_DOCDIR}) -endif() - -find_program(TEXI2PDF texi2pdf - ${CYGWIN_INSTALL_PATH}/bin /bin /usr/bin /usr/local/bin /sbin) -mark_as_advanced(TEXI2PDF) -find_program(PDFLATEX pdflatex - ${CYGWIN_INSTALL_PATH}/bin /bin /usr/bin /usr/local/bin /sbin) - -if (TEXI2PDF AND PDFLATEX) - find_path(TEXINFO_TEX_DIR "texinfo.tex" HINTS "/usr/share/texmf/tex/texinfo/") - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/blitz.pdf - COMMAND TEXINPUTS=:${TEXINFO_TEX_DIR}:; ${TEXI2PDF} ${TEXI_PROG_ARGS} ${TEXI_SRC} - DEPENDS blitz-doc-prep ${TEXI_SRC}) - add_custom_target(pdf DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/blitz.pdf) - add_dependencies(blitz-doc pdf) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/blitz.pdf DESTINATION ${CMAKE_INSTALL_DOCDIR}) -endif() - -set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES - "blitz.html;blitz.pdf;blitz.aux;blitz.cp;blitz.cps;blitz.fn;blitz.fns;blitz.ky;blitz.log;blitz.pg;blitz.toc;blitz.tp;blitz.vr") - -foreach(i AUTHORS COPYING COPYING.LESSER COPYRIGHT LEGAL LICENSE NEWS README.md) - set(BLITZ_INFORMATION ${BLITZ_INFORMATION} ${CMAKE_SOURCE_DIR}/${i}) -endforeach() - -# Install files - -install(FILES ${BLITZ_INFORMATION} DESTINATION ${CMAKE_INSTALL_DOCDIR}) diff --git a/doc/about.texi b/doc/about.texi deleted file mode 100644 index 950c28da..00000000 --- a/doc/about.texi +++ /dev/null @@ -1,15 +0,0 @@ - -@node about, platforms, , Introduction -@section About this document - -To use the Blitz++ library, you will need a compiler with near-ISO/ANSI C++ -syntax support (see the following section for possible compilers). -Information on what platforms are supported is available from -@uref{http://oonumerics.org/blitz/platforms/}. To download Blitz++, please -go to the download page at @uref{http://oonumerics.org/blitz/download/}. - -If you need to do something that Blitz++ doesn't support, see a possible -improvement, or notice an error in the documentation, please send a note to -one of the Blitz++ mailing lists (described later). - - diff --git a/doc/arrays-ctors.texi b/doc/arrays-ctors.texi deleted file mode 100644 index 66925cf6..00000000 --- a/doc/arrays-ctors.texi +++ /dev/null @@ -1,302 +0,0 @@ - -@node Array ctors, Array slicing, Array types, Arrays -@section Constructors - -@subsection Default constructor -@cindex Array default ctor - -@example -Array(); -Array(GeneralArrayStorage storage) -@end example - -The default constructor creates a C-style array of zero size. Any attempt -to access data in the array may result in a run-time error, because there -isn't any data to access! - -An optional argument specifies a storage order for the array. - -Arrays created using the default constructor can subsequently be given data -by the @code{resize()}, @code{resizeAndPreserve()}, or @code{reference()} -member functions. - -@subsection Creating an array from an expression - -@example -Array(expression...) -@end example - -You may create an array from an array expression. For example, - -@example -Array A(4,3), B(4,3); // ... -Array C(A*2.0+B); -@end example - -This is an explicit constructor (it will not be used to perform implicit -type conversions). The newly constructed array will have the same storage -format as the arrays in the expression. If arrays with different storage -formats appear in the expression, an error will result. (In this case, you -must first construct the array, then assign the expression to it). - -@subsection Constructors which take extent parameters -@cindex Array ctors with extent parameters - -@example -Array(int extent1); -Array(int extent1, int extent2); -Array(int extent1, int extent2, int extent3); -... -Array(int extent1, int extent2, int extent3, ..., int extent11) -@end example - -These constructors take arguments which specify the size of the array to be -constructed. You should provide as many arguments as there are dimensions -in the array.@footnote{If you provide fewer than @code{N_rank} arguments, -the missing arguments will be filled in using the last provided argument. -However, for code clarity, it makes sense to provide all @code{N_rank} -parameters.} - -An optional last parameter specifies a storage format: - -@example -Array(int extent1, GeneralArrayStorage storage); -Array(int extent1, int extent2, GeneralArrayStorage storage); -... -@end example - -For high-rank arrays, it may be convenient to use this constructor: -@cindex Array high-rank - -@example -Array(const TinyVector& extent); -Array(const TinyVector& extent, - GeneralArrayStorage storage); -@end example - -The argument @code{extent} is a vector containing the extent (length) of the -array in each dimension. The optional second parameter indicates a storage -format. Note that you can construct @code{TinyVector} objects on the -fly with the @code{shape(i1,i2,...)} global function. For example, -@code{Array A(shape(3,5))} will create a 3x5 array. - -A similar constructor lets you provide both a vector of base index values -(lbounds) and extents: - -@example -Array(const TinyVector& lbound, - const TinyVector& extent); -Array(const TinyVector& lbound, - const TinyVector& extent, - GeneralArrayStorage storage); -@end example - -The argument @code{lbound} is a vector containing the base index value (or -lbound) of the array in each dimension. The argument @code{extent} is a -vector containing the extent (length) of the array in each dimension. The -optional third parameter indicates a storage format. As with the above -constructor, you can use the @code{shape(i1,i2,...)} global function to -create the @code{lbound} and @code{extent} parameters. - -@subsection Constructors with Range arguments -@cindex Array ctor with Range args - -These constructors allow arbitrary bases (starting indices) to be set: - -@example -Array(Range r1); -Array(Range r1, Range r2); -Array(Range r1, Range r2, Range r3); -... -Array(Range r1, Range r2, Range r3, ..., Range r11); -@end example - -For example, this code: - -@example -Array A(Range(10,20), Range(20,30)); -@end example - -will create an 11x11 array whose indices are 10..20 and 20..30. An optional -last parameter provides a storage order: - -@example -Array(Range r1, GeneralArrayStorage storage); -Array(Range r1, Range r2, GeneralArrayStorage storage); -... -@end example - -@subsection Referencing another array -@cindex Array referencing another array - -This constructor makes a shared view of another array's data: -@cindex Array creating a reference of another array - -@example -Array(Array& array); -@end example - -After this constructor is used, both @code{Array} objects refer to the -@emph{same data}. Any changes made to one array will appear in the other -array. If you want to make a duplicate copy of an array, use the -@code{copy()} member function. - -@subsection Constructing an array from an expression - -Arrays may be constructed from expressions, which are described in -@ref{Array Expressions}. The syntax is: - -@example -Array(...array expression...); -@end example - -For example, this code creates an array B which contains the square roots of -the elements in A: - -@example -Array A(N,N); // ... -Array B(sqrt(A)); -@end example - -@subsection Creating an array from pre-existing data -@cindex Array creating from pre-existing data - -When creating an array using a pointer to already existing data, you have -three choices for how Blitz++ will handle the data. These choices are -enumerated by the enum type @code{preexistingMemoryPolicy}: -@cindex Array creating a reference of another array - -@example -enum preexistingMemoryPolicy @{ - duplicateData, - deleteDataWhenDone, - neverDeleteData -@}; -@end example -@findex preexistingMemoryPolicy -@findex duplicateData -@findex deleteDataWhenDone -@findex neverDeleteData - -If you choose @code{duplicateData}, Blitz++ will create an array object -using a copy of the data you provide. If you choose -@code{deleteDataWhenDone}, Blitz++ will not create a copy of the data; and -when no array objects refer to the data anymore, it will deallocate the data -using @code{delete []}. Note that to use @code{deleteDataWhenDone}, your -array data must have been allocated using the C++ @code{new} operator -- for -example, you cannot allocate array data using Fortran or @code{malloc}, then -create a Blitz++ array from it using the @code{deleteDataWhenDone} flag. -The third option is @code{neverDeleteData}, which means that Blitz++ will -not never deallocate the array data. This means it is your responsibility -to determine when the array data is no longer needed, and deallocate it. -You should use this option for memory which has not been allocated using the -C++ @code{new} operator. - -These constructors create array objects from pre-existing data: - -@example -Array(T_numtype* dataFirst, TinyVector shape, - preexistingMemoryPolicy deletePolicy); -Array(T_numtype* dataFirst, TinyVector shape, - preexistingMemoryPolicy deletePolicy, - GeneralArrayStorage storage); -@end example - -The first argument is a pointer to the array data. It should point to the -element of the array which is stored first in memory. The second argument -indicates the shape of the array. You can create this argument using the -@code{shape()} function. For example: - -@example -double data[] = @{ 1, 2, 3, 4 @}; -Array A(data, shape(2,2), neverDeleteData); // Make a 2x2 array -@end example - -@findex shape() - -The @code{shape()} function takes N integer arguments and returns a -@code{TinyVector}. - -By default, Blitz++ arrays are row-major. If you want to work with data -which is stored in column-major order (e.g. a Fortran array), use the second -version of the constructor: - -@cindex Array creating from Fortran arrays - -@example -Array B(data, shape(2,2), neverDeleteData, - FortranArray<2>()); -@end example - -This is a tad awkward, so Blitz++ provides the global object -@code{fortranArray} which will convert to an instance of -@code{GeneralArrayStorage}: - -@example -Array B(data, shape(2,2), neverDeleteData, fortranArray); -@end example - -Another version of this constructor allows you to pass an arbitrary -vector of strides: - -@example -Array(T_numtype* _bz_restrict dataFirst, TinyVector shape, - TinyVector stride, - preexistingMemoryPolicy deletePolicy, - GeneralArrayStorage storage = GeneralArrayStorage()) -@end example - -@subsection Interlacing arrays -@cindex Array interlacing -@findex interlaceArrays() -@findex allocateArrays() - -For some platforms, it can be advantageous to store a set of arrays -interlaced together in memory. Blitz++ provides support for this through -the routines @code{interlaceArrays()} and @code{allocateArrays()}. An -example: - -@example -Array A, B; -interlaceArrays(shape(10,10), A, B); -@end example - -The first parameter of @code{interlaceArrays()} is the shape for the arrays -(10x10). The subsequent arguments are the set of arrays to be interlaced -together. Up to 11 arrays may be interlaced. All arrays must store the -same data type and be of the same rank. In the above example, storage is -allocated so that @code{A(0,0)} is followed immediately by @code{B(0,0)} in -memory, which is folloed by @code{A(0,1)} and @code{B(0,1)}, and so on. - -A related routine is @code{allocateArrays()}, which has identical syntax: - -@example -Array A, B; -allocateArrays(shape(10,10), A, B); -@end example - -Unlike @code{interlaceArrays()}, which always interlaces the arrays, the -routine @code{allocateArrays()} may or may not interlace them, depending on -whether interlacing is considered advantageous for your platform. If the -tuning flag @code{BZ_INTERLACE_ARRAYS} is defined in -@code{}, then the arrays are interlaced. - -Note that the performance effects of interlacing are unpredictable: in some -situations it can be a benefit, and in most others it can slow your code -down substantially. You should only use @code{interlaceArrays()} after -running some benchmarks to determine whether interlacing is beneficial for -your particular algorithm and architecture. - -@subsection A note about reference counting -@cindex Array reference counting -@cindex reference counting - -Blitz++ arrays use reference counting. When you create a new array, a -memory block is allocated. The @code{Array} object acts like a handle for -this memory block. A memory block can be shared among multiple @code{Array} -objects -- for example, when you take subarrays and slices. The memory -block keeps track of how many @code{Array} objects are referring to it. -When a memory block is orphaned -- when no @code{Array} objects are -referring to it -- it automatically deletes itself and frees the allocated -memory. - diff --git a/doc/arrays-debug.texi b/doc/arrays-debug.texi deleted file mode 100644 index 70b69385..00000000 --- a/doc/arrays-debug.texi +++ /dev/null @@ -1,34 +0,0 @@ - -@node Array debug, Array members, Array slicing, Arrays -@section Debug mode -@cindex debugging mode -@cindex bounds checking -@cindex Array bounds checking - -The Blitz++ library has a debugging mode which is enabled by defining the -preprocessor symbol @code{BZ_DEBUG}. For most compilers, the command line -argument @code{-DBZ_DEBUG} should work. - -In debugging mode, your programs will run @emph{very slowly}. This is -because Blitz++ is doing lots of precondition checking and bounds checking. -When it detects something fishy, it will likely halt your program and -display an error message. - -For example, this program attempts to access an element of a 4x4 array which -doesn't exist: - -@smallexample -@include examples/debug.texi -@end smallexample - -When compiled with @code{-DBZ_DEBUG}, the out of bounds indices are detected -and an error message results: - -@smallexample -@include examples/debug.out -@end smallexample - -Precondition failures send their error messages to the standard error stream -(@code{cerr}). After displaying the error message, @code{assert(0)} is -invoked. - diff --git a/doc/arrays-expr.texi b/doc/arrays-expr.texi deleted file mode 100644 index 01de9440..00000000 --- a/doc/arrays-expr.texi +++ /dev/null @@ -1,1506 +0,0 @@ - -@cindex expression templates -@cindex Array expressions -@cindex Array no temporaries -@cindex temporaries -@cindex Array temporaries - -Array expressions in Blitz++ are implemented using the @emph{expression -templates} technique. Unless otherwise noted, expression evaluation will -never generate temporaries or multiple loops; an expression such as - -@example -Array A, B, C, D; // ... - -A = B + C + D; -@end example - -will result in code similar to - -@example -for (int i=A.lbound(firstDim); i <= A.ubound(firstDim); ++i) - A[i] = B[i] + C[i] + D[i]; -@end example - -@node Expression evaluation, Index placeholders, , Array Expressions -@section Expression evaluation order -@cindex Array expression evaluation order -@cindex expression evaluation order -@cindex order of expression evaluation -@cindex traversal order - -A commonly asked question about Blitz++ is what order it uses to evaluate -array expressions. For example, in code such as - -@example -A(Range(2,10)) = A(Range(1,9)) -@end example - -does the expression get evaluated at indices 1, 2, ..., 9 or at 9, 8, ..., -1? This makes a big difference to the result: in one case, the array will -be shifted to the right by one element; in the other case, most of the array -elements will be set to the value in @code{A(1)}. - -Blitz always selects the traversal order it thinks will be fastest. For 1D -arrays, this means it will go from beginning to the end of the array in -memory (see notes below). For multidimensional arrays, it will do one of -two things: - -@itemize @bullet - -@item try to go through the destination array in the order it is laid out -in memory (i.e.@: row-major for row-major arrays, column-major for -column-major arrays). - -@item if the expression is a stencil, Blitz will do tiling to improve cache -use. Under some circumstances blitz will even use a traversal based on a -hilbert curve (a fractal) for 3D arrays. - -@end itemize - -Because the traversal order is not always predictable, it is safest to put -the result in a new array if you are doing a stencil-style expression. -Blitz guarantees this will always work correctly. If you try to put the -result in one of the operands, you have to guess correctly which traversal -order blitz will choose. This is easy for the 1D case, but hard for the -multidimensional case. - -Some special notes about 1D array traversals: - -@itemize @bullet - -@item if your array is stored in reverse order, i.e.@: because of a -A.reverse(firstDim) or funny storage order, blitz will go through the array -from end to beginning in array coordinates, but from beginning to end in -memory locations. - -@item many compilers/architecture combinations are equally fast at reverse -order. But blitz has a specialized version for stride = +1, and it would be -wasteful to also specialize for the case stride = -1. So 1D arrays are -traversed from beginning to end (in memory storage order). - -@end itemize - -@section Expression operands -@cindex Array expression operands - -An expression can contain any mix of these operands: - -@itemize @bullet -@item An array of any type, so long as it is of the same rank. -Expressions which contain a mixture of array types are handled through the -type promotion mechanism described below. - -@item Scalars of type @code{int}, @code{float}, @code{double}, -@code{long double}, or @code{complex} - -@item Index placeholders, described below - -@item Other expressions (e.g. @code{A+(B+C)}) -@end itemize - -@section Array operands - -@unnumberedsubsec Using subarrays in an expression - -@cindex Array using subarrays in expressions - -Subarrays may be used in an expression. For example, this code example -performs a 5-point average on a two-dimensional array: - -@example -Array A(64,64), B(64,64); // ... -Range I(1,62), J(1,62); - -A(I,J) = (B(I,J) + B(I+1,J) + B(I-1,J) - + B(I,J+1) + B(I,J-1)) / 5; -@end example - -@unnumberedsubsec Mixing arrays with different storage formats - -@cindex Array expressions which mix arrays of different storage formats - -Arrays with different storage formats (for example, C-style and -Fortran-style) can be mixed in the same expression. Blitz++ will handle the -different storage formats automatically. However: - -@itemize @bullet - -@item Evaluation may be slower, since a different traversal order may be -used. - -@item If you are using index placeholders (see below) or reductions in -the expression, you may @strong{not} mix array objects with different -starting bases. - -@end itemize - -@section Expression operators -@cindex operators, array expressions -@cindex Array operators -@cindex Array expression operators - -These binary operators are supported: - -@example -+ - * / % > < >= <= == != && || ^ & | -@end example - -@strong{Caution:} operator @code{<<} and @code{>>} are reserved for use in input/output. -If you need a bit-shift operation on arrays, you may define one yourself; -see @ref{User et}. - -These unary operators are supported: - -@example -- ~ ! -@end example - -The operators @code{> < >= <= == != && || !} result in a bool-valued -expression. - -@cindex Array operators applied elementwise -All operators are applied @emph{elementwise}. - -@cindex Array requirements for using operators -You can only use operators which are well-defined for the number type stored -in the arrays. For example, bitwise XOR (@code{^}) is meaningful for -integers, so this code is all right: - -@example -Array A, B, C; // ... -A = B ^ C; -@end example - -Bitwise XOR is @emph{not} meaningful on floating point types, so this code -will generate a compiler error: - -@example -Array A, B, C; // ... -C = B ^ C; -@end example - -Here's the compiler error generated by KAI C++ for the above code: - -@example -"../../blitz/ops.h", line 85: error: expression must have integral or enum type - BZ_DEFINE_OP(BitwiseXor,^); - ^ - detected during: - instantiation of "blitz::BitwiseXor::T_numtype - blitz::BitwiseXor::apply(float, float)" at - line 210 of "../../blitz/arrayexpr.h" - instantiation of ... - . - . -@end example - -@cindex Array arrays of user type -If you are creating arrays using a type you have created yourself, you will -need to overload whatever operators you want to use on arrays. For example, -if I create a class @code{Polynomial}, and want to write code such as: - -@example -Array A, B, C; // ... -C = A * B; -@end example - -I would have to provide @code{operator*} for @code{Polynomial} by -implementing - -@example -Polynomial Polynomial::operator*(Polynomial);) -@end example - -or - -@example -Polynomial operator*(Polynomial, Polynomial);) -@end example - -@section Assignment operators - -@cindex Array assignment operators -These assignment operators are supported: - -@example -= += -= *= /= %= ^= &= |= >>= <<= -@end example - -An array object should appear on the left side of the operator. The right -side can be: - -@itemize @bullet - -@item A constant (or literal) of type @code{T_numtype} - -@item An array of appropriate rank, possibly of a different numeric type - -@item An array expression, with appropriate rank and shape - -@end itemize - -@node Index placeholders, Math functions 1, Expression evaluation, Array Expressions -@section Index placeholders -@cindex Array index placeholders -@cindex index placeholders - -Blitz++ provides objects called @emph{index placeholders} which represent -array indices. They can be used directly in expressions. - -There is a distinct index placeholder type associated with each dimension of -an array. The types are called @code{firstIndex}, @code{secondIndex}, -@code{thirdIndex}, ..., @code{tenthIndex}, @code{eleventhIndex}. -@findex firstIndex -@findex secondIndex -@findex thirdIndex -@findex fourthIndex -Here's an example of using an index placeholder: - -@example -Array A(10); -firstIndex i; -A = i; -@end example - -This generates code which is similar to: - -@example -for (int i=0; i < A.length(); ++i) - A(i) = i; -@end example - -Here's an example which fills an array with a sampled sine wave: - -@example -Array A(16); -firstIndex i; - -A = sin(2 * M_PI * i / 16.); -@end example - -If your destination array has rank greater than 1, you may use -multiple index placeholders: - -@cindex index placeholders multiple - -@example -// Fill a two-dimensional array with a radially -// symmetric, decaying sinusoid - -// Create the array -int N = 64; -Array F(N,N); - -// Some parameters -float midpoint = (N-1)/2.; -int cycles = 3; -float omega = 2.0 * M_PI * cycles / double(N); -float tau = - 10.0 / N; - -// Index placeholders -firstIndex i; -secondIndex j; - -// Fill the array -F = cos(omega * sqrt(pow2(i-midpoint) + pow2(j-midpoint))) - * exp(tau * sqrt(pow2(i-midpoint) + pow2(j-midpoint))); -@end example - -Here's a plot of the resulting array: - -@center @image{sinsoid} -@center Array filled using an index placeholder expression. - -You can use index placeholder expressions in up to 11 dimensions. -Here's a three dimensional example: - -@example -// Fill a three-dimensional array with a Gaussian function -Array A(16,16,16); -firstIndex i; -secondIndex j; -thirdIndex k; -float midpoint = 15/2.; -float c = - 1/3.0; -A = exp(c * (sqr(i-midpoint) + sqr(j-midpoint) - + sqr(k-midpoint))); -@end example - -You can mix array operands and index placeholders: - -@example -Array A(5), B(5); -firstIndex i; - -A = 0, 1, 1, 0, 2; -B = i * A; // Results in [ 0, 1, 2, 0, 8 ] -@end example - -For your convenience, there is a namespace within blitz -called @code{tensor} which declares all the index placeholders: - -@cindex tensor namespace -@cindex @code{i} (index placeholder) -@cindex @code{j} (index placeholder) -@cindex @code{k} (index placeholder) -@cindex @code{l} (index placeholder) -@cindex @code{m} (index placeholder) -@cindex @code{n} (index placeholder) - -@example -namespace blitz @{ - namespace tensor @{ - firstIndex i; - secondIndex j; - thirdIndex k; - ... - eleventhIndex t; - @} -@} -@end example - -So instead of declaring your own index placeholder objects, -you can just say - -@findex blitz::tensor namespace - -@example -namespace blitz::tensor; -@end example - -when you would like to use them. Alternately, you can just preface all the -index placeholders with @code{tensor::}, for example: - -@example -A = sin(2 * M_PI * tensor::i / 16.); -@end example - -This will make your code more readable, since it is immediately clear that -@code{i} is an index placeholder, rather than a scalar value. - -@section Type promotion -@cindex type promotion -@cindex Array type promotion - -When operands of different numeric types are used in an expression, the -result gets promoted according to the usual C-style type promotion. For -example, the result of adding an @code{Array} to an -@code{Arrray} will be promoted to @code{float}. Generally, the -result is promoted to whichever type has greater precision. - -@unnumberedsubsec Type promotion for user-defined types - -@cindex type promotion for user-defined types -@cindex Array type promotion for user-defined types - -The rules for type promotion of user-defined types (or types from another -library) are a bit complicated. Here's how a pair of operand types are -promoted: - -@itemize @bullet - -@item If both types are intrinsic (e.g. bool, int, float) then type -promotion follows the standard C rules. This generally means that the -result will be promoted to whichever type has greater precision. In -Blitz++, these rules have been extended to incorporate -@code{complex}, @code{complex}, and @code{complex}. - -@item If one of the types is intrinsic (or complex), and the other is a -user-defined type, then the result is promoted to the user-defined type. - -@item If both types are user-defined, then the result is promoted to -whichever type requires more storage space (as determined by -@code{sizeof()}). The rationale is that more storage space probably -indicates more precision. - -@end itemize - -If you wish to alter the default type promotion rules above, you have two -choices: - -@itemize @bullet - -@findex promote_trait - -@item If the type promotion behaviour isn't dependent on the type of -operation performed, then you can provide appropriate specializations for -the class @code{promote_trait} which is declared in -@code{}. - -@item If type promotion does depend on the type of operation, then you -will need to specialize the appropriate function objects in -@code{}. - -@end itemize - -Note that you can do these specializations in your own header files (you -don't have to edit @file{promote.h} or @file{ops.h}). - -@unnumberedsubsec Manual casts - -@cindex casts -@cindex Array casts - -There are some inconvenient aspects of C-style type promotion. For example, -when you divide two integers in C, the result gets truncated. The same -problem occurs when dividing two integer arrays in Blitz++: - -@example -Array A(4), B(4); -Array C(4); - -A = 1, 2, 3, 5; -B = 2, 2, 2, 7; - -C = A / B; // Result: [ 0 1 1 0 ] -@end example - -The usual solution to this problem is to cast one of the operands to a -floating type. For this purpose, Blitz++ provides a function -@code{cast(expr,type)} which will cast the result of @emph{expr} as -@emph{type}: - -@findex cast() - -@example -C = A / cast(B, float()); // Result: [ 0.5 1 1.5 0.714 ] -@end example - -The first argument to @code{cast()} is an array or expression. The second -argument is a dummy object of the type to which you want to cast. Once -compilers support templates more thoroughly, it will be possible to use this -cast syntax: - -@example -C = A / cast(B); -@end example - -But this is not yet supported. - -@node Math functions 1, Math functions 2, Index placeholders, Array Expressions -@section Single-argument math functions - -All of the functions described in this section are @emph{element-wise}. For -example, this code-- - -@example -Array A, B; // -A = sin(B); -@end example - -results in @code{A(i,j) = sin(B(i,j))} for all (i,j). - -@unnumberedsubsec ANSI C++ math functions - -These math functions are available on all platforms: - -@cindex math functions -@cindex complex math functions - -@table @code -@item abs() -@findex abs() -Absolute value - -@item acos() -@findex acos() -Inverse cosine. For real arguments, the return value is in the range -@math{[0, \pi]}. - -@item arg() -@findex arg() -Argument of a complex number (@code{atan2(Im,Re)}). - -@item asin() -@findex asin() -Inverse sine. For real arguments, the return value is in the range -@math{[-\pi/2, \pi/2]}. - -@item atan() -@findex atan() -Inverse tangent. For real arguments, the return value is in the range -@math{[-\pi/2, \pi/2]}. See also @code{atan2()} in section -@ref{Math functions 2}. - -@item ceil() -@findex ceil() -Ceiling function: smallest floating-point integer value not less than the -argument. - -@item cexp() -@findex cexp() -Complex exponential; same as @code{exp()}. - -@item conj() -@findex conj() -Conjugate of a complex number. - -@item cos() -@findex cos() -Cosine. Works for @code{complex}. - -@item cosh() -@findex cosh() -Hyperbolic cosine. Works for @code{complex}. - -@item csqrt() -@findex csqrt() -Complex square root; same as @code{sqrt()}. - -@item exp() -@findex exp() -Exponential. Works for @code{complex}. - -@item fabs() -@findex fabs() -Same as @code{abs()}. - -@item floor() -@findex floor() -Floor function: largest floating-point integer value not greater than the -argument. - -@item log() -@findex log() -Natural logarithm. Works for @code{complex}. - -@item log10() -@findex log10() -Base 10 logarithm. Works for @code{complex}. - -@item pow2(), pow3(), pow4(), pow5(), pow6(), pow7(), pow8() -@findex pow2() -@findex pow3() -@findex pow?() -These functions compute an integer power. They expand to a series of -multiplications, so they can be used on any type for which multiplication is -well-defined. - -@item sin() -@findex sin() -Sine. Works for @code{complex}. - -@item sinh() -@findex sinh() -Hyperbolic sine. Works for @code{complex}. - -@item sqr() -@findex sqr() -Same as @code{pow2()}. Computes @code{x*x}. Works for @code{complex}. - -@item sqrt() -@findex sqrt() -Square root. Works for @code{complex}. - -@item tan() -@findex tan() -Tangent. Works for @code{complex}. - -@item tanh() -@findex tanh() -Hyperbolic tangent. Works for @code{complex}. -@end table - -@unnumberedsubsec IEEE/System V math functions - -@cindex IEEE math functions -@cindex System V math functions -@findex libm.a -@findex libmsaa.a - -These functions are only available on platforms which provide the IEEE Math -library (libm.a) and/or System V Math Library (libmsaa.a). Apparently not -all platforms provide all of these functions, so what you can use on your -platform may be a subset of these. If you choose to use one of these -functions, be aware that you may be limiting the portability of your code. - -@findex XOPEN_SOURCE -@findex XOPEN_SOURCE_EXTENDED - -On some platforms, the preprocessor symbols @code{_XOPEN_SOURCE} and/or -@code{_XOPEN_SOURCE_EXTENDED} need to be defined to use these functions. -These symbols can be enabled by compiling with -@code{-DBZ_ENABLE_XOPEN_SOURCE}. (In previous version of Blitz++, -@code{_XOPEN_SOURCE} and @code{_XOPEN_SOURCE_EXTENDED} were declared by -default. This was found to cause too many problems, so users must manually -enable them with @code{-DBZ_ENABLE_XOPEN_SOURCE}.). - -In the current version, Blitz++ divides these functions into two groups: -IEEE and System V. This distinction is probably artificial. If one of the -functions in a group is missing, Blitz++ won't allow you to use any of them. -You can see the division of these functions in the files -@file{Blitz++/compiler/ieeemath.cpp} and -@file{Blitz++/compiler/sysvmath.cpp}. This arrangement is unsatisfactory -and will probably change in a future version. - -You may have to link with @code{-lm} and/or @code{-lmsaa} to use these -functions. - -None of these functions are available for @code{complex}. - -@table @code -@item acosh() -@findex acosh() -Inverse hyperbolic cosine - -@item asinh() -@findex asinh() -Inverse hyperbolic sine - -@item atanh() -@findex atanh() -Inverse hyperbolic tangent - -@item _class() -@findex _class() -Classification of floating point values. The return type is integer and -will be one of: - - @table @code - @item FP_PLUS_NORM -@findex FP_PLUS_NORM - Positive normalized, nonzero - - @item FP_MINUS_NORM -@findex FP_MINUS_NORM - Negative normalized, nonzero - - @item FP_PLUS_DENORM -@findex FP_PLUS_DENORM - Positive denormalized, nonzero - - @item FP_MINUS_DENORM -@findex FP_MINUS_DENORM - Negative denormalized, nonzero - - @item FP_PLUS_ZERO -@findex FP_PLUS_ZERO - +0.0 - - @item FP_MINUS_ZERO -@findex FP_MINUS_ZERO - -0.0 - - @item FP_PLUS_INF -@findex FP_PLUS_INF - Positive infinity - - @item FP_MINUS_INF -@findex FP_MINUS_INF - Negative infinity - - @item FP_NANS -@findex FP_NANS - Signalling Not a Number (NaNS) - - @item FP_NANQ -@findex FP_NANQ - Quiet Not a Number (NaNQ) - @end table - -@item cbrt() -@findex cbrt() -Cubic root - -@item expm1() -@findex expm1() -Computes exp(x)-1 - -@item erf() -@findex erf() -Computes the error function: -@tex -$$ {\rm erf}(x) = {2\over\sqrt\pi}\int_{0}^{x} e^{-t^2} dt $$ -@end tex -@html -erf(x) = 2/sqrt(Pi) * integral(exp(-t^2), t=0..x) -@end html -@ifnottex -@ifnothtml -@math{@r{erf}(x) = 2/@r{sqrt}(Pi) * @r{integral}(@r{exp}(-t^2), t=0..x)} -@end ifnothtml -@end ifnottex - -Note that for large values of the parameter, calculating can result in -extreme loss of accuracy. Instead, use @code{erfc()}. - -@item erfc() -@findex erfc() -Computes the complementary error function @math{@r{erfc}(x) = 1 - @r{erf}(x)}. - -@item finite() -@findex finite() -Returns a nonzero integer if the parameter is a finite number (i.e.@: not -+INF, -INF, NaNQ or NaNS). - -@item ilogb() -@findex ilogb() -Returns an integer which is equal to the unbiased exponent of -the parameter. - -@item blitz_isnan() -@findex blitz_isnan() -@findex isnan() -Returns a nonzero integer if the parameter is NaNQ or -NaNS (quiet or signalling Not a Number). - -@item itrunc() -@findex itrunc() -Round a floating-point number to a signed integer. Returns -the nearest signed integer to the parameter in the direction of 0. - -@item j0() -@findex j0() -@cindex Bessel functions -Bessel function of the first kind, order 0. - -@item j1() -@findex j1() -Bessel function of the first kind, order 1. - -@item lgamma() -@findex lgamma() -@cindex Gamma function -Natural logarithm of the gamma function. The gamma function -is defined as: -@tex -$$ {\rm Gamma}(x) = \int_0^\infty e^{-t}t^{x-1} dt $$ -@end tex -@html -Gamma(x) = integral(e^(-t) * t^(x-1), t=0..infinity)) -@end html -@ifnottex -@ifnothtml -@math{@r{Gamma}(x) = @r{integral}(e^(-t) * t^(x-1), t=0..@r{infinity}))} -@end ifnothtml -@end ifnottex - -@item logb() -@findex logb() -Returns a floating-point double that is equal to the unbiased -exponent of the parameter. - -@item log1p() -@findex log1p() -Calculates log(1+x), where x is the parameter. - -@item nearest() -@findex nearest() -Returns the nearest floating-point integer value to the -parameter. If the parameter is exactly halfway between two integer values, -an even value is returned. - -@item rint() -@findex rint() -@cindex rounding -Rounds the parameter and returns a floating-point integer value. Whether -@code{rint()} rounds up or down or to the nearest integer depends on the -current floating-point rounding mode. If you haven't altered the rounding -mode, @code{rint()} should be equivalent to @code{nearest()}. If rounding -mode is set to round towards +INF, @code{rint()} is equivalent to -@code{ceil()}. If the mode is round toward -INF, @code{rint()} is -equivalent to @code{floor()}. If the mode is round toward zero, -@code{rint()} is equivalent to @code{trunc()}. - -@item rsqrt() -@findex rsqrt() -Reciprocal square root. - -@item uitrunc() -@findex uitrunc() -Returns the nearest unsigned integer to the parameter in the -direction of zero. - -@item y0() -@findex y0() -Bessel function of the second kind, order 0. - -@item y1() -@findex y1() -Bessel function of the second kind, order 1. -@end table - -There may be better descriptions of these functions in your -system man pages. - -@node Math functions 2, User et, Math functions 1, Array Expressions -@section Two-argument math functions - -The math functions described in this section take two arguments. -Most combinations of these types may be used as arguments: - -@itemize @bullet -@item An Array object -@item An Array expression -@item An index placeholder -@item A scalar of type @code{float}, @code{double}, @code{long double}, -or @code{complex} -@end itemize - -@unnumberedsubsec ANSI C++ math functions - -These math functions are available on all platforms, and work for -complex numbers. - -@cindex math functions -@cindex complex math functions - -@table @code -@item atan2(x,y) -@findex atan2() -Inverse tangent of (y/x). The signs of both parameters -are used to determine the quadrant of the return value, which is in the -range @math{[-\pi, \pi]}. Works for @code{complex}. - -@item blitz::polar(r,t) -@findex polar() -Computes ; i.e.@: converts polar-form to -Cartesian form complex numbers. The @code{blitz::} scope qualifier is -needed to disambiguate the ANSI C++ function template @code{polar(T,T)}. -This qualifier will hopefully disappear in a future version. - -@item pow(x,y) -@findex pow() -Computes x to the exponent y. Works for @code{complex}. -@end table - -@unnumberedsubsec IEEE/System V math functions - -See the notes about IEEE/System V math functions in the previous section. -None of these functions work for complex numbers. They will all cast their -arguments to double precision. - -@table @code -@item copysign(x,y) -@findex copysign() -Returns the x parameter with the same sign as the y parameter. - -@item drem(x,y) -@findex drem() -@cindex remainder, floating point @code{drem()} -Computes a floating point remainder. The return value r is equal to r = x - -n * y, where n is equal to @code{nearest(x/y)} (the nearest integer to x/y). -The return value will lie in the range [ -y/2, +y/2 ]. If y is zero or x is -+INF or -INF, NaNQ is returned. - -@item fmod(x,y) -@findex fmod() -@cindex modulo, floating point @code{fmod()} -Computes a floating point modulo remainder. The return value r is equal to -r = x - n * y, where n is selected so that r has the same sign as x and -magnitude less than abs(y). In order words, if x > 0, r is in the range [0, -|y|], and if x < 0, r is in the range [-|y|, 0]. - -@item hypot(x,y) -@findex hypot() -Computes so that underflow does not occur and overflow occurs only if the -final result warrants it. - -@item nextafter(x,y) -@findex nextafter() -Returns the next representable number after x in the direction of y. - -@item remainder(x,y) -@findex remainder() -Equivalent to drem(x,y). - -@item scalb(x,y) -@findex scalb() -Calculates. - -@item unordered(x,y) -@findex unordered() -Returns a nonzero value if a floating-point comparison between x and y would -be unordered. Otherwise, it returns zero. -@end table - -@node User et, Where expr, Math functions 2, Array Expressions -@section Declaring your own math functions on arrays - -@cindex math functions declaring your own -@cindex Array declaring your own math functions on - -There are four macros which make it easy to turn your own scalar functions -into functions defined on arrays. They are: - -@findex BZ_DECLARE_FUNCTION - -@example -BZ_DECLARE_FUNCTION(f) // 1 -BZ_DECLARE_FUNCTION_RET(f,return_type) // 2 -BZ_DECLARE_FUNCTION2(f) // 3 -BZ_DECLARE_FUNCTION2_RET(f,return_type) // 4 -@end example - -Use version 1 when you have a function which takes one argument and returns -a result of the same type. For example: - -@example -#include - -using namespace blitz; - -double myFunction(double x) -@{ - return 1.0 / (1 + x); -@} - -BZ_DECLARE_FUNCTION(myFunction) - -int main() -@{ - Array A(4,4), B(4,4); // ... - B = myFunction(A); -@} -@end example - -Use version 2 when you have a one argument function whose return type is -different than the argument type, such as - -@example -int g(double x); -@end example - -Use version 3 for a function which takes two arguments and returns a result -of the same type, such as: - -@example -double g(double x, double y); -@end example - -Use version 4 for a function of two arguments which returns a different -type, such as: - -@example -int g(double x, double y); -@end example - -@section Tensor notation - -@cindex tensor notation -@cindex Array tensor notation - -Blitz++ arrays support a tensor-like notation. Here's an example of -real-world tensor notation: -@tex -$$ A^{ijk} = B^{ij} C^k $$ -@end tex -@html -
- ijk    ij k
-A    = B  C
-
-@end html -@ifnottex -@ifnothtml -@example - ijk ij k -A = B C -@end example -@end ifnothtml -@end ifnottex - -@math{A} is a rank 3 tensor (a three dimensional array), @math{B} is a rank -2 tensor (a two dimensional array), and @math{C} is a rank 1 tensor (a one -dimensional array). The above expression sets -@code{A(i,j,k) = B(i,j) * C(k)}. - -To implement this product using Blitz++, we'll need the arrays and some -index placeholders: - -@cindex index placeholders used for tensor notation - -@example -Array A(4,4,4); -Array B(4,4); -Array C(4); - -firstIndex i; // Alternately, could just say -secondIndex j; // using namespace blitz::tensor; -thirdIndex k; -@end example - -Here's the Blitz++ code which is equivalent to the tensor expression: - -@example -A = B(i,j) * C(k); -@end example - -The index placeholder arguments tell an array how to map its dimensions onto -the dimensions of the destination array. For example, here's some -real-world tensor notation: -@tex -$$ C^{ijk} = A^{ij} x^{k} - A^{jk} y^{i} $$ -@end tex -@html -
- ijk    ij k    jk i
-C    = A  x  - A  y
-
-@end html -@ifnottex -@ifnothtml -@example - ijk ij k jk i -C = A x - A y -@end example -@end ifnothtml -@end ifnottex - -In Blitz++, this would be coded as: - -@example -using namespace blitz::tensor; - -C = A(i,j) * x(k) - A(j,k) * y(i); -@end example - -This tensor expression can be visualized in the following way: - -@center @image{tensor1} -@center Examples of array indexing, subarrays, and slicing. - -Here's an example which computes an outer product of two one-dimensional -arrays: -@cindex outer product -@cindex kronecker product -@cindex tensor product - -@smallexample -@include examples/outer.texi -@end smallexample - -And the output: - -@smallexample -@include examples/outer.out -@end smallexample - -Index placeholders can @emph{not} be used on the left-hand side of an -expression. If you need to reorder the indices, you must do this on the -right-hand side. - -In real-world tensor notation, repeated indices imply a contraction (or -summation). For example, this tensor expression computes a matrix-matrix -product: -@tex -$$ C^{ij} = A^{ik} B^{kj} $$ -@end tex -@html -
- ij    ik  kj
-C   = A   B
-
-@end html -@ifnottex -@ifnothtml -@example - ij ik kj -C = A B -@end example -@end ifnothtml -@end ifnottex - -The repeated k index is interpreted as meaning -@tex -$$ c_{ij} = \sum_{k} a_{ik} b_{kj} $$ -@end tex -@html -
-c    = sum of (a   * b  ) over k
- ij             ik    kj
-
-@end html -@ifnottex -@ifnothtml -@example -c = sum of @{a * b @} over k - ij ik kj -@end example -@end ifnothtml -@end ifnottex - -@cindex contraction -@cindex tensor contraction - -In Blitz++, repeated indices do @emph{not} imply contraction. If you want -to contract (sum along) an index, you must use the @code{sum()} function: - -@example -Array A, B, C; // ... -firstIndex i; -secondIndex j; -thirdIndex k; - -C = sum(A(i,k) * B(k,j), k); -@end example - -The @code{sum()} function is an example of an @emph{array reduction}, -described in the next section. - -Index placeholders can be used in any order in an expression. This example -computes a kronecker product of a pair of two-dimensional arrays, and -permutes the indices along the way: - -@example -Array A, B; // ... -Array C; // ... -fourthIndex l; - -C = A(l,j) * B(k,i); -@end example - -This is equivalent to the tensor notation -@tex -$$ C^{ijkl} = A^{lj} B^{ki} $$ -@end tex -@html -
- ijkl    lj ki
-C     = A  B
- 
-@end html -@ifnottex -@ifnothtml -@example - ijkl lj ki -C = A B -@end example -@end ifnothtml -@end ifnottex - -Tensor-like notation can be mixed with other array notations: - -@example -Array A, B; // ... -Array C; // ... - -C = cos(A(l,j)) * sin(B(k,i)) + 1./(i+j+k+l); -@end example - -@cindex tensor notation efficiency issues -An important efficiency note about tensor-like notation: the right-hand side -of an expression is @emph{completely evaluated} for @emph{every} element in -the destination array. For example, in this code: - -@example -Array x(4), y(4); -Array A(4,4): - -A = cos(x(i)) * sin(y(j)); -@end example - -The resulting implementation will look something like this: - -@example -for (int n=0; n < 4; ++n) - for (int m=0; m < 4; ++m) - A(n,m) = cos(x(n)) * sin(y(m)); -@end example - -The functions @code{cos} and @code{sin} will be invoked sixteen times each. -It's possible that a good optimizing compiler could hoist the @code{cos} -evaluation out of the inner loop, but don't hold your breath -- there's a -lot of complicated machinery behind the scenes to handle tensor notation, -and most optimizing compilers are easily confused. In a situation like the -above, you are probably best off manually creating temporaries for -@code{cos(x)} and @code{sin(y)} first. - -@section Array reductions -@cindex Array reductions -@cindex reductions - -Currently, Blitz++ arrays support two forms of reduction: - -@itemize @bullet - -@item Reductions which transform an array into a scalar (for example, -summing the elements). These are referred to as @strong{complete -reductions}. - -@item Reducing an N dimensional array (or array expression) to an N-1 -dimensional array expression. These are called @strong{partial reductions}. - -@end itemize - -@cindex Array reductions complete -@cindex complete reductions -@cindex reductions complete - -@section Complete reductions - -Complete reductions transform an array (or array expression) into -a scalar. Here are some examples: - -@example -Array A(3,3); -A = 0, 1, 2, - 3, 4, 5, - 6, 7, 8; -cout << sum(A) << endl // 36 - << min(A) << endl // 0 - << count(A >= 4) << endl; // 5 -@end example - -Here are the available complete reductions: - -@table @code -@item sum() -@cindex @code{sum()} reduction -Summation (may be promoted to a higher-precision type) - -@item product() -@cindex @code{product()} reduction -Product - -@item mean() -@cindex @code{mean()} reduction -Arithmetic mean (promoted to floating-point type if necessary) - -@item min() -@cindex @code{min()} reduction -Minimum value - -@item max() -@cindex @code{max()} reduction -Maximum value - -@item minmax() -@cindex @code{minmax()} reduction -Simultaneous minimum and maximum value (returns a value of type MinMaxValue) - -@item minIndex() -@cindex @code{minIndex()} reduction -Index of the minimum value (@code{TinyVector}) - -@item maxIndex() -@cindex @code{maxIndex()} reduction -Index of the maximum value (@code{TinyVector}) - -@item count() -@cindex @code{count()} reduction -Counts the number of times the expression is logical true (@code{int}) - -@item any() -@cindex @code{any()} reduction -True if the expression is true anywhere (@code{bool}) - -@item all() -@cindex @code{all()} reduction -True if the expression is true everywhere (@code{bool}) -@end table - -@strong{Caution:} @code{minIndex()} and @code{maxIndex()} return TinyVectors, -even when the rank of the array (or array expression) is 1. - -Reductions can be combined with @code{where} expressions (@ref{Where expr}) -to reduce over some part of an array. For example, @code{sum(where(A > 0, -A, 0))} sums only the positive elements in an array. - -@section Partial Reductions - -@cindex Array reductions partial -@cindex partial reductions -@cindex reductions partial - -Here's an example which computes the sum of each row of a two-dimensional -array: - -@example -Array A; // ... -Array rs; // ... -firstIndex i; -secondIndex j; - -rs = sum(A, j); -@end example - -The reduction @code{sum()} takes two arguments: - -@itemize @bullet - -@item The first argument is an array or array expression. - -@item The second argument is an index placeholder indicating the -dimension over which the reduction is to occur. - -@end itemize - -Reductions have an @strong{important restriction}: It is currently only -possible to reduce over the @emph{last} dimension of an array or array -expression. Reducing a dimension other than the last would require Blitz++ -to reorder the dimensions to fill the hole left behind. For example, in -order for this reduction to work: - -@example -Array A; // ... -Array B; // ... -secondIndex j; - -// Reduce over dimension 2 of a 3-D array? -B = sum(A, j); -@end example - -Blitz++ would have to remap the dimensions so that the third dimension -became the second. It's not currently smart enough to do this. - -However, there is a simple workaround which solves some of the problems -created by this limitation: you can do the reordering manually, prior to the -reduction: - -@example -B = sum(A(i,k,j), k); -@end example - -Writing @code{A(i,k,j)} interchanges the second and third dimensions, -permitting you to reduce over the second dimension. Here's a list of the -reduction operations currently supported: - -@table @code -@item sum() -Summation - -@item product() -Product - -@item mean() -Arithmetic mean (promoted to floating-point type if necessary) - -@item min() -Minimum value - -@item max() -Maximum value - -@item minIndex() -Index of the minimum value (int) - -@item maxIndex() -Index of the maximum value (int) - -@item count() -Counts the number of times the expression is logical true (int) - -@item any() -True if the expression is true anywhere (bool) - -@item all() -True if the expression is true everywhere (bool) - -@item first() -First index at which the expression is logical true (int); if the expression -is logical true nowhere, then @code{tiny(int())} (INT_MIN) is returned. - -@item last() -Last index at which the expression is logical true (int); if the expression -is logical true nowhere, then @code{huge(int())} (INT_MAX) is returned. -@end table - -The reductions @code{any()}, @code{all()}, and @code{first()} have -short-circuit semantics: the reduction will halt as soon as the answer is -known. For example, if you use @code{any()}, scanning of the expression -will stop as soon as the first true value is encountered. - -To illustrate, here's an example: - -@example -Array A(4,4); - -A = 3, 8, 0, 1, - 1, -1, 9, 3, - 2, -5, -1, 1, - 4, 3, 4, 2; - -Array z(4); -firstIndex i; -secondIndex j; - -z = sum(A(j,i), j); -@end example - -The array @code{z} now contains the sum of @code{A} along each column: - -@example -[ 10 5 12 7 ] -@end example - -This table shows what the result stored in @code{z} would be if -@code{sum()} were replaced with other reductions: - -@example -sum [ 10 5 12 7 ] -mean [ 2.5 1.25 3 1.75 ] -min [ 1 -5 -1 1 ] -minIndex [ 1 2 2 0 ] -max [ 4 8 9 3 ] -maxIndex [ 3 0 1 1 ] -first((A < 0), j) [ -2147483648 1 2 -2147483648 ] -product [ 24 120 0 6 ] -count((A(j,i) > 0), j) [ 4 2 2 4 ] -any(abs(A(j,i)) > 4, j) [ 0 1 1 0 ] -all(A(j,i) > 0, j) [ 1 0 0 1 ] -@end example - -Note: the odd numbers for first() are @code{tiny(int())} i.e.@: the smallest -number representable by an int. The exact value is machine-dependent. - -@cindex Array reductions chaining -@cindex partial reductions chaining -@cindex reductions chaining - -The result of a reduction is an array expression, so reductions -can be used as operands in an array expression: - -@example -Array A; -Array B; -Array C; // ... - -secondIndex j; -thirdIndex k; - -B = sqrt(sum(sqr(A), k)); - -// Do two reductions in a row -C = sum(sum(A, k), j); -@end example - -Note that this is not allowed: - -@example -Array A; -firstIndex i; -secondIndex j; - -// Completely sum the array? -int result = sum(sum(A, j), i); -@end example - -You cannot reduce an array to zero dimensions! Instead, use one of the -global functions described in the previous section. - - -@node Where expr, , User et, Array Expressions -@section where statements -@cindex @code{where} statements -@cindex functional if (@code{where}) -@cindex @code{if} (@code{where}) - -Blitz++ provides the @code{where} function as an array expression version of the -@code{( ? : )} operator. The syntax is: - -@example -where(array-expr1, array-expr2, array-expr3) -@end example - -Wherever @code{array-expr1} is true, @code{array-expr2} is returned. Where -@code{array-expr1} is false, @code{array-expr3} is returned. For example, -suppose we wanted to sum the squares of only the positive elements of an -array. This can be implemented using a where function: - -@example -double posSquareSum = sum(where(A > 0, pow2(A), 0)); -@end example - diff --git a/doc/arrays-globals.texi b/doc/arrays-globals.texi deleted file mode 100644 index c3b4a425..00000000 --- a/doc/arrays-globals.texi +++ /dev/null @@ -1,166 +0,0 @@ - -@node Array globals, Array I/O, Array members, Arrays -@section Global functions - -@example -void allocateArrays(TinyVector& shape, - Array& A, - Array& B, ...); -@end example -@findex allocateArrays() - -This function will allocate interlaced arrays, but only if interlacing is -desirable for your architecture. This is controlled by the -@code{BZ_INTERLACE_ARRAYS} flag in @file{blitz/tuning.h}. You can provide up to -11 arrays as parameters. Any views currently associated with the array -objects are lost. Here is a typical use: - -@example -Array A, B, C; -allocateArrays(shape(64,64),A,B,C); -@end example - -@cindex interlacing -@cindex Array interlacing - -If array interlacing is enabled, then the arrays are stored in memory like -this: @code{A(0,0)}, @code{B(0,0)}, @code{C(0,0)}, @code{A(0,1)}, -@code{B(0,1)}, ... If interlacing is disabled, then the arrays are -allocated in the normal fashion: each array has its own block of memory. -Once interlaced arrays are allocated, they can be used just like regular -arrays. - -@cindex convolution, 1-D -@cindex Array convolution -@findex convolve() - -@example -#include -Array convolve(const Array& B, - const Array& C); -@end example - -This function computes the 1-D convolution of the arrays B and C: -@tex -$$ A[i] = \sum_j B[j] C[i-j] $$ -@end tex -@html -A[i] = sum(B[j] * C[i-j], j) -@end html -@ifnottex -@ifnothtml -@example -A[i] = sum(B[j] * C[i-j], j) -@end example -@end ifnothtml -@end ifnottex -If the array @math{B} has domain @math{b_l \ldots b_h}, and array @math{C} -has domain @math{c_l \ldots c_h}, then the resulting array has domain -@math{a_l \ldots a_h}, with @math{l = b_l + c_l} and @math{a_h = b_h + c_h}. - -A new array is allocated to contain the result. To avoid copying the result -array, you should use it as a constructor argument. For example: -@code{Array A = convolve(B,C);} The convolution is computed in the -spatial domain. Frequency-domain transforms are not used. If you are -convolving two large arrays, then this will be slower than using a Fourier -transform. - -@cindex correlation -@cindex Array correlation - -Note that if you need a cross-correlation, you can use the convolve function -with one of the arrays reversed. For example: - -@example -Array A = convolve(B,C.reverse()); -@end example - -Autocorrelation can be performed using the same approach. - -@example -void cycleArrays(Array& A, Array& B); -void cycleArrays(Array& A, Array& B, - Array& C); -void cycleArrays(Array& A, Array& B, - Array& C, Array& D); -void cycleArrays(Array& A, Array& B, - Array& C, Array& D, - Array& E); -@end example - -@findex cycleArrays() -@cindex time-stepping - -These routines are useful for time-stepping PDEs. They take a set of arrays -such as [@code{A,B,C,D}] and cyclically rotate them to [@code{B,C,D,A}]; -i.e.@: the @code{A} array then refers to what was @code{B}'s data, the -@code{B} array refers to what was @code{C}'s data, and the @code{D} array -refers to what was @code{A}'s data. These functions operate in constant -time, since only the handles change (i.e.@: no data is copied; only pointers -change). - -@example -void find(Array,1>& indices, - const _bz_ArrayExpr& expr); -void find(Array,1>& indices, - const Array& exprVals); -@end example - -This is an analogue to the Matlab @code{find()} method, which takes a -boolean array expression or an array of bools and returns a 1d array -of indices for all locations where the array or expression is true. - -@findex find() - -@example -Array imag(Array,N>&); -@end example - -This method returns a view of the imaginary portion of the array. - -@findex imag() - -@example -void interlaceArrays(TinyVector& shape, - Array& A, - Array& B, ...); -@end example - -This function is similar to @code{allocateArrays()} above, except that the -arrays are @strong{always} interlaced, regardless of the setting of the -@code{BZ_INTERLACE_ARRAYS} flag. - -@findex interlaceArrays() - -@example -Array real(Array,N>&); -@end example - -This method returns a view of the real portion of the array. - -@findex real() - -@example -TinyVector shape(int L); -TinyVector shape(int L, int M); -TinyVector shape(int L, int M, int N); -TinyVector shape(int L, int M, int N, int O); -... [up to 11 dimensions] -@end example - -@findex shape() - -These functions may be used to create shape parameters. They package the -set of integer arguments as a @code{TinyVector} of appropriate length. For -an example use, see @code{allocateArrays()} above. - -@example -void swap(Array& A, Array& B); -@end example - -This function swaps the storage of two arrays, just like the @code{std::swap()} -function does for STL container types. This is a synonym for the -two-argument version of @code{cycleArrays()} above. - -@findex swap() - diff --git a/doc/arrays-indirect.texi b/doc/arrays-indirect.texi deleted file mode 100644 index 7e254f89..00000000 --- a/doc/arrays-indirect.texi +++ /dev/null @@ -1,258 +0,0 @@ - -@cindex indirection -@cindex Array indirection - -@strong{Indirection} is the ability to modify or access an array at a set of -selected index values. Blitz++ provides several forms of indirection: - -@itemize @bullet - -@item @strong{Using a list of array positions}: this approach is useful -if you need to modify an array at a set of scattered points. - -@item @strong{Cartesian-product indirection}: as an example, for a -two-dimensional array you might have a list @code{I} of rows and a list -@code{J} of columns, and you want to modify the array at all (i,j) positions -where i is in @code{I} and j is in @code{J}. This is a @strong{cartesian -product} of the index sets @code{I} and @code{J}. - -@item @strong{Over a set of strips}: for efficiency, you can represent an -arbitrarily-shaped subset of an array as a list of one-dimensional strips. -This is a useful way of handling @strong{Regions Of Interest} (ROIs). - -@end itemize - -@center @image{indirect} -@center Three styles of indirection. @footnote{From top to bottom: (1) using a list of array positions; (2) Cartesian-product indirection; (3) using a set of strips to represent an arbitrarily-shaped subset of an array.} - -@cindex STL, for indirection - -In all cases, Blitz++ expects a Standard Template Library container. Some -useful STL containers are @code{list<>}, @code{vector<>}, @code{deque<>} and -@code{set<>}. Documentation of these classes is often provided with your -compiler, or see also the good documentation at -@uref{http://www.sgi.com/Technology/STL/}. STL containers are used because -they are widely available and provide easier manipulation of ``sets'' than -Blitz++ arrays. For example, you can easily expand and merge sets which are -stored in STL containers; doing this is not so easy with Blitz++ arrays, -which are designed for numerical work. - -STL containers are generally included by writing - -@example -#include // for list<> -#include // for vector<> -#include // for deque<> -#include // for set<> -@end example - -@cindex [] operator, for indirection - -The @code{[]} operator is overloaded on arrays so that the syntax -@code{array[container]} provides an indirect view of the array. So far, -this indirect view may only be used as an lvalue (i.e.@: on the left-hand side -of an assignment statement). - -The examples in the next sections are available in the Blitz++ distribution -in @file{}. - -@node Indirection position list, Indirection Cartesian product, , Indirection -@section Indirection using lists of array positions - -@cindex Array indirection list of positions -@cindex indirection list of positions - -The simplest kind of indirection uses a list of points. For one-dimensional -arrays, you can just use an STL container of integers. Example: - -@example - Array A(5), B(5); - A = 0; - B = 1, 2, 3, 4, 5; - - vector I; - I.push_back(2); - I.push_back(4); - I.push_back(1); - - A[I] = B; -@end example - -After this code, the array A contains @code{[ 0 2 3 0 5 ]}. - -Note that arrays on the right-hand-side of the assignment must have the same -shape as the array on the left-hand-side (before indirection). In the -statement @code{A[I] = B}, A and B must have the same shape, not I and B. - -For multidimensional arrays, you can use an STL container of -@code{TinyVector} objects. Example: - -@example - Array A(4,4), B(4,4); - A = 0; - B = 10*tensor::i + tensor::j; - - typedef TinyVector coord; - - list I; - I.push_back(coord(1,1)); - I.push_back(coord(2,2)); - - A[I] = B; -@end example - -After this code, the array A contains: - -@example - 0 0 0 0 - 0 11 0 0 - 0 0 22 0 - 0 0 0 0 -@end example - -(The @code{tensor::i} notation is explained in the section on index -placeholders @ref{Index placeholders}). - -@node Indirection Cartesian product, Indirection strip list, Indirection position list, Indirection -@section Cartesian-product indirection - -@cindex Array indirection Cartesian-product -@cindex indirection Cartesian-product - -The Cartesian product of the sets I, J and K is the set of (i,j,k) tuples -for which i is in I, j is in J, and k is in K. - -Blitz++ implements cartesian-product indirection using an @strong{adaptor} -which takes a set of STL containers and iterates through their Cartesian -product. Note that the cartesian product is never explicitly created. You -create the Cartesian-product adaptor by calling the function: - -@example -template -indexSet(T_container& c1, T_container& c2, ...) -@end example - -The returned adaptor can then be used in the @code{[]} operator of an array -object. - -Here is a two-dimensional example: - -@cindex rank-1 update - -@example - Array A(6,6), B(6,6); - A = 0; - B = 10*tensor::i + tensor::j; - - vector I, J; - I.push_back(1); - I.push_back(2); - I.push_back(4); - - J.push_back(0); - J.push_back(2); - J.push_back(5); - - A[indexSet(I,J)] = B; -@end example - -After this code, the A array contains: - -@example - 0 0 0 0 0 0 -10 0 12 0 0 15 -20 0 22 0 0 25 - 0 0 0 0 0 0 -40 0 42 0 0 45 - 0 0 0 0 0 0 -@end example - -All the containers used in a cartesian product must be the same type (e.g. -all @code{vector} or all @code{set >}), but they may -be different sizes. Singleton containers (containers containing a single -value) are fine. - -@node Indirection strip list, , Indirection Cartesian product, Indirection -@section Indirection with lists of strips - -@cindex Array indirection list of strips -@cindex indirection list of strips - -You can also do indirection with a container of one-dimensional -@strong{strips}. This is useful when you want to manipulate some -arbitrarily-shaped, well-connected subdomain of an array. By representing -the subdomain as a list of strips, you allow Blitz++ to operate on vectors, -rather than scattered points; this is much more efficient. - -@findex RectDomain - -Strips are represented by objects of type @code{RectDomain}, where -@code{N} is the dimensionality of the array. The @code{RectDomain} class -can be used to represent any rectangular subdomain, but for indirection it -is only used to represent strips. - -You create a strip by using this function: - -@findex strip() - -@example -RectDomain strip(TinyVector start, - int stripDimension, int ubound); -@end example - -The @code{start} parameter is where the strip starts; @code{stripDimension} -is the dimension in which the strip runs; @code{ubound} is the last index -value for the strip. For example, to create a 2-dimensional strip from -(2,5) to (2,9), one would write: - -@example -TinyVector start(2,5); -RectDomain<2> myStrip = strip(start,secondDim,9); -@end example - -Here is a more substantial example which creates a list of strips -representing a circle subset of an array: - -@example - const int N = 7; - Array A(N,N), B(N,N); - typedef TinyVector coord; - - A = 0; - B = 1; - - double centre_i = (N-1)/2.0; - double centre_j = (N-1)/2.0; - double radius = 0.8 * N/2.0; - - // circle will contain a list of strips which represent a circular - // subdomain. - - list > circle; - for (int i=0; i < N; ++i) - @{ - double jdist2 = pow2(radius) - pow2(i-centre_i); - if (jdist2 < 0.0) - continue; - - int jdist = int(sqrt(jdist2)); - coord startPos(i, int(centre_j - jdist)); - circle.push_back(strip(startPos, secondDim, int(centre_j + jdist))); - @} - - // Set only those points in the circle subdomain to 1 - A[circle] = B; -@end example - -After this code, the A array contains: - -@example - 0 0 0 0 0 0 0 - 0 0 1 1 1 0 0 - 0 1 1 1 1 1 0 - 0 1 1 1 1 1 0 - 0 1 1 1 1 1 0 - 0 0 1 1 1 0 0 - 0 0 0 0 0 0 0 -@end example - diff --git a/doc/arrays-intro.texi b/doc/arrays-intro.texi deleted file mode 100644 index a528cde2..00000000 --- a/doc/arrays-intro.texi +++ /dev/null @@ -1,166 +0,0 @@ - -@node Array intro, Array types, , Arrays -@section Getting started -@cindex Array overview - -Currently, Blitz++ provides a single array class, called -@code{Array}. This array class provides a dynamically -allocated N-dimensional array, with reference counting, arbitrary storage -ordering, subarrays and slicing, flexible expression handling, and many -other useful features. - -@subsection Template parameters -@cindex Array template parameters - -The @code{Array} class takes two template parameters: - -@itemize @bullet -@item @code{T_numtype} -is the numeric type to be stored in the array. @code{T_numtype} can be an -integral type (@code{bool}, @code{char}, @code{unsigned char}, @code{short -int}, @code{short unsigned int}, @code{int}, @code{unsigned int}, -@code{long}, @code{unsigned long}), floating point type (@code{float}, -@code{double}, @code{long double}), complex type (@code{complex}, -@code{complex}, @code{complex}) or any user-defined -type with appropriate numeric semantics. - -@item @code{N_rank} -@cindex Array rank parameter -@cindex rank parameter of arrays -is the @strong{rank} (or dimensionality) of the array. This should be a -positive integer. - -@end itemize - -To use the @code{Array} class, include the header @code{} and -use the namespace @code{blitz}: - -@findex using namespace blitz -@findex namespace blitz -@cindex blitz namespace - -@example -#include - -using namespace blitz; - -Array x; // A one-dimensional array of int -Array y; // A two-dimensional array of double -. -. -Array, 12> z; // A twelve-dimensional array of complex -@end example - -When no constructor arguments are provided, the array is empty, and no -memory is allocated. To create an array which contains some data, provide -the size of the array as constructor arguments: - -@example -Array y(4,4); // A 4x4 array of double -@end example - -The contents of a newly-created array are garbage. To initialize -the array, you can write: - -@example -y = 0; -@end example - -and all the elements of the array will be set to zero. If the contents of -the array are known, you can initialize it using a comma-delimited list of -values. For example, this code excerpt sets @code{y} equal to a 4x4 -identity matrix: - -@example -y = 1, 0, 0, 0, - 0, 1, 0, 0, - 0, 0, 1, 0, - 0, 0, 0, 1; -@end example - -@subsection Array types -@cindex Array types - -The @code{Array} class supports a variety of arrays: - -@itemize @bullet - -@cindex Array scalar arrays -@item Arrays of scalar types, such as @code{Array} and -@code{Array} - -@cindex Array complex arrays -@cindex complex arrays -@item Complex arrays, such as @code{Array,2>} - -@cindex Array of user-defined types -@cindex Array of TinyVector -@cindex vector field -@cindex Array of TinyMatrix -@cindex Array nested -@cindex Array nested homogeneous -@cindex nested arrays -@cindex nested arrays homogeneous -@item Arrays of user-defined types. If you have a class called -@code{Polynomial}, then @code{Array} is an array of -@code{Polynomial} objects. - -@cindex Array of Array -@cindex Array nested heterogeneous -@cindex nested arrays heterogeneous -@item Nested homogeneous arrays using @code{TinyVector} and -@code{TinyMatrix}, in which each element is a fixed-size vector or array. -For example, @code{Array,3>} is a three-dimensional -vector field. - -@item Nested heterogeneous arrays, such as @code{Array,1>}, in -which each element is a variable-length array. -@end itemize - -@subsection A simple example - -Here's an example program which creates two 3x3 arrays, initializes -them, and adds them: - -@smallexample -@include examples/simple.texi -@end smallexample - -and the output: - -@smallexample -@include examples/simple.out -@end smallexample - -@subsection Storage orders -@cindex Array storage order -@cindex storage orders for arrays -@cindex row major -@cindex column major -@findex fortranArray -@cindex Array row major -@cindex Array column major -@cindex Array fortran-style - -Blitz++ is very flexible about the way arrays are stored in memory. - -The default storage format is row-major, C-style arrays whose indices start -at zero. - -Fortran-style arrays can also be created. Fortran arrays are stored in -column-major order, and have indices which start at one. To create a -Fortran-style array, use this syntax: @code{Array A(3, 3, -fortranArray);} The last parameter, @code{fortranArray}, tells the -@code{Array} constructor to use a fortran-style array format. - -@code{fortranArray} is a global object which has an automatic conversion to -type @code{GeneralArrayStorage}. @code{GeneralArrayStorage} -encapsulates information about how an array is laid out in memory. By -altering the contents of a @code{GeneralArrayStorage} object, you can lay -out your arrays any way you want: the dimensions can be ordered arbitrarily -and stored in ascending or descending order, and the starting indices can be -arbitrary. - -Creating custom array storage formats is described in a later section -(@ref{Array storage}). - diff --git a/doc/arrays-io.texi b/doc/arrays-io.texi deleted file mode 100644 index 35b0b474..00000000 --- a/doc/arrays-io.texi +++ /dev/null @@ -1,79 +0,0 @@ - -@node Array I/O, Array storage, Array globals, Arrays -@section Inputting and Outputting Arrays - -@subsection Output formatting - -@cindex persistence -@cindex Array persistence -@cindex output formatting -@cindex Array output formatting -@cindex saving arrays -@cindex writing arrays to output streams -@cindex Array saving to output stream -@cindex Array writing to output stream - -The current version of Blitz++ includes rudimentary output formatting for -arrays. Here's an example: - -@smallexample -@include examples/output.texi -@end smallexample - -And the output: - -@smallexample -@include examples/output.out -@end smallexample - -@subsection Inputting arrays - -@cindex inputting arrays from an input stream -@cindex Array inputting from istream -@cindex restoring arrays from an input stream -@cindex Array restoring from istream - -Arrays may be restored from an istream using the @code{>>} operator. - -@strong{Caution:} you must know the dimensionality of the array being restored -from the stream. The @code{>>} operator expects an array in the same input -format as generated by the @code{<<} operator, namely: - -@cindex Array persistence format - -@itemize @bullet - -@item The size of the array, for example ``32'' for a 1-dimensional array of -32 elements, ``12 x 64 x 128'' for a 3-dimensional array of size 12x64x128. - -@item The symbol @code{'['} indicating the start of the array data - -@item The array elements, listed in memory storage order - -@item The symbol @code{']'} indicating the end of the array data - -@end itemize - -The operator prototype is: - -@example -template -istream& operator>>(istream&, Array&); -@end example - -Here is an example of saving and restoring arrays from files. You can find -this example in the Blitz++ distribution as @file{examples/io.cpp}. - -@smallexample -@include examples/io.texi -@end smallexample - -@strong{Caution:} The storage order and starting indices are not restored from -the input stream. If you are restoring (for example) a Fortran-style array, -you must create a Fortran-style array, and then restore it. For example, -this code restores a Fortran-style array from the standard input stream: - -@example -Array B(fortranArray); -cin >> B; -@end example diff --git a/doc/arrays-members.texi b/doc/arrays-members.texi deleted file mode 100644 index 8327e079..00000000 --- a/doc/arrays-members.texi +++ /dev/null @@ -1,555 +0,0 @@ - -@node Array members, Array globals, Array debug, Arrays -@section Member functions - -@subsection A note about dimension parameters -@cindex dimension parameters -@cindex Array dimension parameters - -Several of the member functions take a @emph{dimension parameter} which is -an integer in the range 0 .. @code{N_rank}-1. For example, the method -@code{extent(int n)} returns the extent (or length) of the array in -dimension @code{n}. - -These parameters are problematic: - -@itemize @bullet - -@item They make the code cryptic. Someone unfamiliar with the -@code{reverse()} member function won't stand a chance of understanding what -@code{A.reverse(2)} does. - -@item Some users are used to dimensions being 1 .. @code{N_rank}, rather -than 0 .. @code{N_rank}-1. This makes dimension numbers inherently -error-prone. Even though I'm a experienced C/C++ programmer, I @emph{still} -want to think of the first dimension as 1 -- it doesn't make sense to talk -about the ``zeroth'' dimension. - -@end itemize - -As a solution to this problem, Blitz++ provides a series of symbolic -constants which you can use to refer to dimensions: - -@findex firstDim -@findex secondDim -@findex thirdDim -@findex fourthDim - -@example -const int firstDim = 0; -const int secondDim = 1; -const int thirdDim = 2; - . - . -const int eleventhDim = 10; -@end example - -These symbols should be used in place of the numerals 0, 1, ... @code{N_rank}-1. -For example: - -@example -A.reverse(thirdDim); -@end example - -This code is clearer: you can see that the parameter refers to a dimension, -and it isn't much of a leap to realize that it's reversing the element -ordering in the third dimension. - -If you find @code{firstDim}, @code{secondDim}, ... aesthetically unpleasing, -there are equivalent symbols @code{firstRank}, @code{secondRank}, -@code{thirdRank}, ..., @code{eleventhRank}. - -@cindex eleven, end of the universe at - -@unnumberedsubsubsec Why stop at eleven? - -The symbols had to stop somewhere, and eleven seemed an appropriate place to -stop. Besides, if you're working in more than eleven dimensions your code -is going to be confusing no matter what help Blitz++ provides. - -@cindex Array member functions -@subsection Member function descriptions -@cindex Array member functions @code{base()} -@findex base() -@example -const TinyVector& base() const; -int base(int dimension) const; -@end example - -The @emph{base} of a dimension is the first valid index value. A typical -C-style array will have base of zero; a Fortran-style array will have base -of one. The base can be different for each dimension, but only if you -deliberately use a Range-argument constructor or design a custom storage -ordering. - -The first version returns a reference to the vector of base values. -The second version returns the base for just one dimension; it's -equivalent to the @code{lbound()} member function. See the -note on dimension parameters such as @code{firstDim} above. - -@cindex iterators for arrays -@findex const_iterator -@cindex Array iterators -@cindex Array member functions @code{begin()} -@cindex STL iterators for arrays -@findex begin() - -@example -Array::iterator begin(); -Array::const_iterator begin() const; -@end example - -These functions return STL-style forward and input iterators, respectively, -positioned at the first element of the array. Note that the array data is -traversed in memory order (i.e.@: by rows for C-style arrays, and by columns -for Fortran-style arrays). The @code{Array::const_iterator} has these -methods: - -@example -const_iterator(const Array&); -T operator*() const; -const T* [restrict] operator->() const; -const_iterator& operator++(); -void operator++(int); -bool operator==(const const_iterator&) const; -bool operator!=(const const_iterator&) const; -const TinyVector& position() const; -@end example - -Note that postfix ++ returns void (this is not STL-compliant, but is done -for efficiency). The method @code{position()} returns a vector containing -current index positions of the iterator. The @code{Array::iterator} -has the same methods as @code{const_iterator}, with these exceptions: -@code{iterator& operator++(); T& operator*(); T* [restrict] operator->();} -The @code{iterator} type may be used to modify array elements. To obtain -iterator positioned at the end of the array, use the @code{end()} methods. - -@cindex Array member functions @code{cols()} -@cindex Array member functions @code{columns()} -@findex cols() -@findex columns() -@example -int cols() const; -int columns() const; -@end example - -Both of these functions return the extent of the array in the -second dimension. Equivalent to @code{extent(secondDim)}. -See also @code{rows()} and @code{depth()}. - -@cindex Array member functions @code{copy()} -@cindex Array copying -@findex copy() -@example -Array copy() const; -@end example - -This method creates a copy of the array's data, using the same storage -ordering as the current array. The returned array is guaranteed to be -stored contiguously in memory, and to be the only object referring to its -memory block (i.e.@: the data isn't shared with any other array object). - -@cindex Array getting pointer to array data -@findex data() -@cindex Array member functions @code{data()} -@findex dataZero() -@cindex Array member functions @code{dataZero()} -@findex dataFirst() -@cindex Array member functions @code{dataFirst()} -@example -const T_numtype* [restrict] data() const; - T_numtype* [restrict] data(); -const T_numtype* [restrict] dataZero() const; - T_numtype* [restrict] dataZero(); -const T_numtype* [restrict] dataFirst() const; - T_numtype* [restrict] dataFirst(); -@end example - -These member functions all return pointers to the array data. The NCEG -@code{restrict} qualifier is used only if your compiler supports it. If -you're working with the default storage order (C-style arrays with base -zero), you'll only need to use @code{data()}. Otherwise, things get -complicated: - -@code{data()} returns a pointer to the element whose indices are equal to -the array base. With a C-style array, this means the element (0,0,...,0); -with a Fortran-style array, this means the element (1,1,...,1). If @code{A} -is an array object, @code{A.data()} is equivalent to (&A(A.base(firstDim), -A.base(secondDim), ...)). If any of the dimensions are stored in reverse -order, @code{data()} will not refer to the element which comes first in -memory. - -@code{dataZero()} returns a pointer to the element (0,0,...,0), even if such -an element does not exist in the array. What's the point of having such a -pointer? Say you want to access the element (i,j,k). If you add to the -pointer the dot product of (i,j,k) with the stride vector -(@code{A.stride()}), you get a pointer to the element (i,j,k). - -@code{dataFirst()} returns a pointer to the element of the array which comes -first in memory. Note however, that under some circumstances (e.g. -subarrays), the data will not be stored contiguously in memory. You have to -be very careful when meddling directly with an array's data. - -Other relevant functions are: @code{isStorageContiguous()} and -@code{zeroOffset()}. - -@cindex Array member functions @code{depth()} -@findex depth() -@example -int depth() const; -@end example - -Returns the extent of the array in the third dimension. This function is -equivalent to @code{extent(thirdDim)}. See also @code{rows()} and -@code{columns()}. - -@findex dimensions() -@cindex Array member functions @code{dimensions()} -@example -int dimensions() const; -@end example - -Returns the number of dimensions (rank) of the array. The return value is -the second template parameter (@code{N_rank}) of the @code{Array} object. -Same as @code{rank()}. - -@findex domain() -@cindex Array member functions @code{domain()} -@cindex Array obtaining domain of -@example -RectDomain domain() const; -@end example - -Returns the domain of the array. The domain consists of a vector of lower -bounds and a vector of upper bounds for the indices. NEEDS_WORK-- need a -section to explain methods of @code{RectDomain}. - -@findex end() -@cindex Array member functions @code{end()} -@example -Array::iterator end(); -Array::const_iterator end() const; -@end example - -Returns STL-style forward and input iterators (respectively) for the array, -positioned at the end of the array. - -@findex extent() -@cindex Array member functions @code{extent()} -@example -int extent(int dimension) const; -@end example - -The first version the extent (length) of the array in the specified -dimension. See the note about dimension parameters such as @code{firstDim} -in the previous section. - -@findex extractComponent() -@cindex Array member functions @code{extractComponent()} -@cindex Array extracting components -@example -Array extractComponent(T_numtype2, - int componentNumber, int numComponents); -@end example - -This method returns an array view of a single component of a multicomponent -array. In a multicomponent array, each element is a tuple of fixed size. -The components are numbered 0, 1, ..., @code{numComponents-1}. Example: - -@example -Array,2> A(128,128); // A 128x128 array of int[3] - -Array B = A.extractComponent(int(), 1, 3); -@end example - -Now the B array refers to the 2nd component of every element in A. Note: -for complex arrays, special global functions @code{real(A)} and -@code{imag(A)} are provided to obtain real and imaginary components of an -array. See the @strong{Global Functions} section. - -@findex free() -@cindex Array member functions @code{free()} -@cindex Array freeing an -@example -void free(); -@end example - -This method resizes an array to zero size. If the array data is not being -shared with another array object, then it is freed. - -@findex isMajorRank() -@cindex Array member functions @code{isMajorRank()} -@example -bool isMajorRank(int dimension) const; -@end example - -Returns true if the dimension has the largest stride. For C-style arrays -(the default), the first dimension always has the largest stride. For -Fortran-style arrays, the last dimension has the largest stride. See also -@code{isMinorRank()} below and the note about dimension parameters such as -@code{firstDim} in the previous section. - -@findex isMinorRank() -@cindex Array member functions @code{isMinorRank()} -@example -bool isMinorRank(int dimension) const; -@end example - -Returns true if the dimension @emph{does not} have the largest stride. See -also @code{isMajorRank()}. - -@findex isRankStoredAscending() -@cindex Array member functions @code{isRankStoredAscending()} -@example -bool isRankStoredAscending(int dimension) const; -@end example - -Returns true if the dimension is stored in ascending order in memory. This -is the default. It will only return false if you have reversed a dimension -using @code{reverse()} or have created a custom storage order with a -descending dimension. - -@findex isStorageContiguous() -@cindex Array member functions @code{isStorageContiguous()} -@example -bool isStorageContiguous() const; -@end example - -Returns true if the array data is stored contiguously in memory. If you -slice the array or work on subarrays, there can be skips -- the array data -is interspersed with other data not part of the array. See also the various -@code{data..()} functions. If you need to ensure that the storage is -contiguous, try @code{reference(copy())}. - -@findex lbound() -@cindex Array member functions @code{lbound()} -@example -int lbound(int dimension) const; -TinyVector lbound() const; -@end example - -The first version returns the lower bound of the valid index range for a -dimension. The second version returns a vector of lower bounds for all -dimensions. The lower bound is the first valid index value. If you're -using a C-style array (the default), the lbound will be zero; Fortran-style -arrays have lbound equal to one. The lbound can be different for each -dimension, but only if you deliberately set them that way using a Range -constructor or a custom storage ordering. This function is equivalent to -@code{base(dimension)}. See the note about dimension parameters such as -@code{firstDim} in the previous section. - - -@findex makeUnique() -@cindex Array member functions @code{makeUnique()} -@cindex Array making unique copy -@example -void makeUnique(); -@end example - -If the array's data is being shared with another Blitz++ array object, this -member function creates a copy so the array object has a unique view of the -data. - -@findex numElements() -@cindex Array member functions @code{numElements()} -@cindex Array number of elements in -@example -int numElements() const; -@end example - -Returns the total number of elements in the array, calculated by taking the -product of the extent in each dimension. Same as @code{size()}. - -@findex ordering() -@cindex Array member functions @code{ordering()} -@cindex Array storage ordering of -@example -const TinyVector& ordering() const; -int ordering(int storageRankIndex) const; -@end example - -These member functions return information about how the data is ordered in -memory. The first version returns the complete ordering vector; the second -version returns a single element from the ordering vector. The argument for -the second version must be in the range 0 .. @code{N_rank}-1. The ordering -vector is a list of dimensions in increasing order of stride; -@code{ordering(0)} will return the dimension number with the smallest -stride, and @code{ordering(N_rank-1)} will return the dimension number with -largest stride. For a C-style array, the ordering vector contains the -elements (@code{N_rank}-1, @code{N_rank}-2, ..., 0). For a Fortran-style -array, the ordering vector is (0, 1, ..., @code{N_rank}-1). See also the -description of custom storage orders in section @ref{Array storage}. - -@findex rank() -@cindex Array member functions @code{rank()} -@example -int rank() const; -@end example - -Returns the rank (number of dimensions) of the array. The return value is -equal to @code{N_rank}. Equivalent to @code{dimensions()}. - -@findex reference() -@cindex Array member functions @code{reference()} -@cindex Array referencing another -@example -void reference(Array& A); -@end example - -This causes the array to adopt another array's data as its own. After this -member function is used, the array object and the array @code{A} are -indistinguishable -- they have identical sizes, index ranges, and data. The -data is shared between the two arrays. - -@findex reindex(), reindexSelf() -@cindex Array member functions @code{reindex()} -@cindex Array member functions @code{reindexSelf()} -@cindex Array reindexing -@example -void reindexSelf(const TinyVector&); -Array reindex(const TinyVector&); -@end example - -These methods reindex an array to use a new base vector. The first version -reindexes the array, and the second just returns a reindexed view of the -array, leaving the original array unmodified. - -@findex resize() -@cindex Array member functions @code{resize()} -@cindex Array resizing -@example -void resize(int extent1, ...); -void resize(const TinyVector&); -@end example - -These functions resize an array to the specified size. If the array is -already the size specified, then no memory is allocated. After resizing, -the contents of the array are garbage. See also @code{resizeAndPreserve()}. - -@findex resizeAndPreserve() -@cindex Array member functions @code{resizeAndPreserve()} -@example -void resizeAndPreserve(int extent1, ...); -void resizeAndPreserve(const TinyVector&); -@end example - -These functions resize an array to the specified size. If the array is -already the size specified, then no change occurs (the array is not -reallocated and copied). The contents of the array are preserved whenever -possible; if the new array size is smaller, then some data will be lost. -Any new elements created by resizing the array are left uninitialized. - -@findex reverse(), reverseSelf() -@cindex Array member functions @code{reverse()} -@cindex Array member functions @code{reverseSelf()} -@cindex Array reversing -@example -Array reverse(int dimension); -void reverseSelf(int dimension); -@end example - -This method reverses the array in the specified dimension. For example, if -@code{reverse(firstDim)} is invoked on a 2-dimensional array, then the -ordering of rows in the array will be reversed; @code{reverse(secondDim)} -would reverse the order of the columns. Note that this is implemented by -twiddling the strides of the array, and doesn't cause any data copying. The -first version returns a reversed ``view'' of the array data; the second -version applies the reversal to the array itself. - -@findex rows() -@cindex Array member functions @code{rows()} -@example -int rows() const; -@end example - -Returns the extent (length) of the array in the first dimension. This -function is equivalent to @code{extent(firstDim)}. See also -@code{columns()}, and @code{depth()}. - -@findex size() -@cindex Array member functions @code{size()} -@example -int size() const; -@end example - -Returns the total number of elements in the array, calculated by taking the -product of the extent in each dimension. Same as @code{numElements()}. - -@cindex @code{shape()} (Array method) -@cindex Array member functions @code{shape()} -@cindex Array shape of -@example -const TinyVector& shape() const; -@end example - -Returns the vector of extents (lengths) of the array. - -@findex stride() -@cindex Array member functions @code{stride()} -@cindex Array strides of -@example -const TinyVector& stride() const; -int stride(int dimension) const; -@end example - -The first version returns the stride vector; the second version returns the -stride associated with a dimension. A stride is the distance between -pointers to two array elements which are adjacent in a dimension. For -example, @code{A.stride(firstDim)} is equal to @code{&A(1,0,0) - &A(0,0,0)}. -The stride for the second dimension, @code{A.stride(secondDim)}, is equal to -@code{&A(0,1,0) - &A(0,0,0)}, and so on. For more information about -strides, see the description of custom storage formats in Section -@ref{Array storage}. See also the description of parameters like -@code{firstDim} and @code{secondDim} in the previous section. - -@cindex Array member functions @code{transpose()} -@cindex Array member functions @code{transposeSelf()} -@cindex Array transposing -@cindex transposing arrays -@findex transpose(), transposeSelf() -@example -Array transpose(int dimension1, - int dimension2, ...); -void transposeSelf(int dimension1, - int dimension2, ...); -@end example - -These methods permute the dimensions of the array. The dimensions of the -array are reordered so that the first dimension is @code{dimension1}, the -second is @code{dimension2}, and so on. The arguments should be a -permutation of the symbols @code{firstDim, secondDim, ...}. Note that this -is implemented by twiddling the strides of the array, and doesn't cause any -data copying. The first version returns a transposed ``view'' of the array -data; the second version transposes the array itself. - -@cindex Array member functions @code{ubound()} -@findex ubound() -@example -int ubound(int dimension) const; -TinyVector ubound() const; -@end example - -The first version returns the upper bound of the valid index range for a -dimension. The second version returns a vector of upper bounds for all -dimensions. The upper bound is the last valid index value. If you're using -a C-style array (the default), the ubound will be equal to the -@code{extent(dimension)-1}. Fortran-style arrays will have ubound equal to -@code{extent(dimension)}. The ubound can be different for each dimension. -The return value of @code{ubound(dimension)} will always be equal to -@code{lbound(dimension)+extent(dimension)-1}. See the note about -dimension parameters such as @code{firstDim} in the previous section. - -@findex zeroOffset() -@cindex Array member functions @code{zeroOffset()} -@example -int zeroOffset() const; -@end example - -This function has to do with the storage of arrays in memory. You may want -to refer to the description of the @code{data..()} member functions and of -custom storage orders in Section @ref{Array storage} for -clarification. The return value of @code{zeroOffset()} is the distance from -the first element in the array to the (possibly nonexistant) element -@code{(0,0,...,0)}. In this context, ``first element'' returns to the element -@code{(base(firstDim),base(secondDim),...)}. - diff --git a/doc/arrays-multi.texi b/doc/arrays-multi.texi deleted file mode 100644 index 5f085641..00000000 --- a/doc/arrays-multi.texi +++ /dev/null @@ -1,193 +0,0 @@ - -@node Array multi, Array usertype, , Customized Arrays -@section Multicomponent and complex arrays -@cindex Array multicomponent -@cindex multicomponent arrays - -Multicomponent arrays have elements which are vectors. Examples of such -arrays are vector fields, colour images (which contain, say, RGB tuples), -and multispectral images. Complex-valued arrays can also be regarded as -multicomponent arrays, since each element is a 2-tuple of real values. - -Here are some examples of multicomponent arrays: - -@cindex RGB24 example - -@example -// A 3-dimensional array; each element is a length 3 vector of float -Array,3> A; - -// A complex 2-dimensional array -Array,2> B; - -// A 2-dimensional image containing RGB tuples -struct RGB24 @{ - unsigned char r, g, b; -@}; - -Array C; -@end example - -@subsection Extracting components - -@cindex extracting components -@cindex Array extracting components - -Blitz++ provides some special support for such arrays. The most important -is the ability to extract a single component. For example: - -@example -Array,2> A(128,128); -Array B = A.extractComponent(float(), 1, 3); -B = 0; -@end example - -The call to @code{extractComponent} returns an array of floats; this array -is a view of the second component of each element of A. The arguments of -@code{extractComponent} are: (1) the type of the component (in this example, -float); (2) the component number to extract (numbered 0, 1, ... N-1); and -(3) the number of components in the array. - -This is a little bit messy, so Blitz++ provides a handy shortcut using -@code{operator[]}: - -@example -Array,2> A(128,128); -A[1] = 0; -@end example - -The number inside the square brackets is the component number. However, for -this operation to work, Blitz++ has to already know how many components -there are, and what type they are. It knows this already for -@code{TinyVector} and @code{complex}. If you use your own type, though, -you will have to tell Blitz++ this information using the macro -@code{BZ_DECLARE_MULTICOMPONENT_TYPE()}. This macro has three arguments: - -@findex BZ_DECLARE_MULTICOMPONENT_TYPE - -@example -BZ_DECLARE_MULTICOMPONENT_TYPE(T_element, T_componentType, numComponents) -@end example - -@code{T_element} is the element type of the array. @code{T_componentType} -is the type of the components of that element. @code{numComponents} is the -number of components in each element. - -An example will clarify this. Suppose we wanted to make a colour image, -stored in 24-bit HSV (hue-saturation-value) format. We can make a class -@code{HSV24} which represents a single pixel: - -@cindex HSV24 example - -@example -#include - -using namespace blitz; - -class HSV24 @{ -public: - // These constants will makes the code below cleaner; we can - // refer to the components by name, rather than number. - - static const int hue=0, saturation=1, value=2; - - HSV24() @{ @} - HSV24(int hue, int saturation, int value) - : h_(hue), s_(saturation), v_(value) - @{ @} - - // Some other stuff here, obviously - -private: - unsigned char h_, s_, v_; -@}; -@end example - -Right after the class declaration, we will invoke the macro -@code{BZ_DECLARE_MULTICOMPONENT_TYPE} to tell Blitz++ about HSV24: - -@example -// HSV24 has 3 components of type unsigned char -BZ_DECLARE_MULTICOMPONENT_TYPE(HSV24, unsigned char, 3); -@end example - -Now we can create HSV images and modify the individual components: - -@example -int main() -@{ - Array A(128,128); // A 128x128 HSV image - ... - - // Extract a greyscale version of the image - Array A_greyscale = A[HSV24::value]; - - // Bump up the saturation component to get a - // pastel effect - A[HSV24::saturation] *= 1.3; - - // Brighten up the middle of the image - Range middle(32,96); - A[HSV24::value](middle,middle) *= 1.2; -@} -@end example - -@subsection Special support for complex arrays - -@cindex Array complex -@cindex complex arrays - -Since complex arrays are used frequently, Blitz++ provides two special -methods for getting the real and imaginary components: - -@example -Array,2> A(32,32); - -real(A) = 1.0; -imag(A) = 0.0; -@end example - -The function @code{real(A)} returns an array view of the real component; -@code{imag(A)} returns a view of the imaginary component. - -Note: Blitz++ provides numerous math functions defined over complex-valued -arrays, such as @code{conj}, @code{polar}, @code{arg}, @code{abs}, -@code{cos}, @code{pow}, etc. See the section on math functions -(@ref{Math functions 1}) for details. - -@subsection Zipping together expressions -@cindex zipping expressions -@cindex Array zipping expressions - -Blitz++ provides a function @code{zip()} which lets you combine two or more -expressions into a single component. For example, you can combine two real -expressions into a complex expression, or three integer expressions into an -HSV24 expression. The function has this syntax: - -@example -resultexpr zip(expr1, expr2, T_element) -resultexpr zip(expr1, expr2, expr3, T_element) ** not available yet -resultexpr zip(expr1, expr2, expr3, expr4, T_element) ** not available yet -@end example - -The types @code{resultexpr}, @code{expr1} and @code{expr2} are array -expressions. The third argument is the type you want to create. For -example: - -@example -int N = 16; -Array,1> A(N); -Array theta(N); - - ... - -A = zip(cos(theta), sin(theta), complex()); -@end example - -The above line is equivalent to: - -@example -for (int i=0; i < N; ++i) - A[i] = complex(cos(theta[i]), sin(theta[i])); -@end example - diff --git a/doc/arrays-slicing.texi b/doc/arrays-slicing.texi deleted file mode 100644 index dcf8658c..00000000 --- a/doc/arrays-slicing.texi +++ /dev/null @@ -1,300 +0,0 @@ - -@node Array slicing, Array debug, Array ctors, Arrays -@section Indexing, subarrays, and slicing - -This section describes how to access the elements of an array. There are -three main ways: - -@itemize @bullet - -@item @strong{Indexing} obtains a single element - -@item Creating a @strong{subarray} which refers to a smaller portion of -an array - -@item @strong{Slicing} to produce a smaller-dimensional view of a portion -of an array - -@end itemize - -Indexing, subarrays and slicing all use the overloaded parenthesis -@code{operator()}. - -As a running example, we'll consider the three dimensional array pictured -below, which has index ranges (0..7, 0..7, 0..7). Shaded portions of the -array show regions which have been obtained by indexing, creating a -subarray, and slicing. - -@center @image{slice} -@center Examples of array indexing, subarrays, and slicing. - -@subsection Indexing -@cindex Array indexing -@cindex indexing an array - -There are two ways to get a single element from an array. The simplest is -to provide a set of integer operands to @code{operator()}: - -@example -A(7,0,0) = 5; -cout << "A(7,0,0) = " << A(7,0,0) << endl; -@end example - -This version of indexing is available for arrays of rank one through eleven. -If the array object isn't @code{const}, the return type of -@code{operator()} is a reference; if the array object is @code{const}, the -return type is a value. - -You can also get an element by providing an operand of type -@code{TinyVector} where @code{N_rank} is the rank of the array -object: - -@example -TinyVector index; -index = 7, 0, 0; -A(index) = 5; -cout << "A(7,0,0) = " << A(index) << endl; -@end example - -This version of @code{operator()} is also available in a const-overloaded -version. - -It's possible to use fewer than @code{N_rank} indices. However, missing -indices are @strong{assumed to be zero}, which will cause bounds errors if -the valid index range does not include zero (e.g. Fortran arrays). For this -reason, and for code clarity, it's a bad idea to omit indices. - -@subsection Subarrays -@cindex Array subarrays -@cindex subarrays -@cindex Range objects - -You can obtain a subarray by providing @code{Range} operands to -@code{operator()}. A @code{Range} object represents a set of regularly -spaced index values. For example, - -@example -Array B = A(Range(5,7), Range(5,7), Range(0,2)); -@end example - -The object B now refers to elements (5..7,5..7,0..2) of the array A. - -The returned subarray is of type @code{Array}. This means -that subarrays can be used wherever arrays can be: in expressions, as -lvalues, etc. Some examples: - -@example -// A three-dimensional stencil (used in solving PDEs) -Range I(1,6), J(1,6), K(1,6); -B = (A(I,J,K) + A(I+1,J,K) + A(I-1,J,K) + A(I,J+1,K) - + A(I,J-1,K) + A(I,J+1,K) + A(I,J,K+1) + A(I,J,K-1)) / 7.0; - -// Set a subarray of A to zero -A(Range(5,7), Range(5,7), Range(5,7)) = 0.; -@end example - -The bases of the subarray are equal to the bases of the original array: - -@example -Array D(Range(1,5), Range(1,5)); // 1..5, 1..5 -Array E = D(Range(2,3), Range(2,3)); // 1..2, 1..2 -@end example - -An array can be used on both sides of an expression only if the subarrays -don't overlap. If the arrays overlap, the result may depend on the order in -which the array is traversed. - -@subsection RectDomain and StridedDomain -@cindex RectDomain -@findex RectDomain -@cindex StridedDomain -@findex StridedDomain -@cindex TinyVector of Range (use @code{RectDomain}) - -The classes @code{RectDomain} and @code{StridedDomain}, defined in -@code{blitz/domain.h}, offer a dimension-independent notation for subarrays. - -@code{RectDomain} and @code{StridedDomain} can be thought of as a -@code{TinyVector}. Both have a vector of lower- and upper-bounds; -@code{StridedDomain} has a stride vector. For example, the subarray: - -@example -Array B = A(Range(4,7), Range(8,11)); // 4..7, 8..11 -@end example - -could be obtained using @code{RectDomain} this way: - -@example -TinyVector lowerBounds(4, 8); -TinyVector upperBounds(7, 11); -RectDomain<2> subdomain(lowerBounds, upperBounds); - -Array B = A(subdomain); -@end example - -Here are the prototypes of @code{RectDomain} and @code{StridedDomain}. - -@example -template -class RectDomain @{ - -public: - RectDomain(const TinyVector& lbound, - const TinyVector& ubound); - - const TinyVector& lbound() const; - int lbound(int i) const; - const TinyVector& ubound() const; - int ubound(int i) const; - Range operator[](int rank) const; - void shrink(int amount); - void shrink(int dim, int amount); - void expand(int amount); - void expand(int dim, int amount); -@}; - -template -class StridedDomain @{ - -public: - StridedDomain(const TinyVector& lbound, - const TinyVector& ubound, - const TinyVector& stride); - - const TinyVector& lbound() const; - int lbound(int i) const; - const TinyVector& ubound() const; - int ubound(int i) const; - const TinyVector& stride() const; - int stride(int i) const; - Range operator[](int rank) const; - void shrink(int amount); - void shrink(int dim, int amount); - void expand(int amount); - void expand(int dim, int amount); -@}; -@end example - -@subsection Slicing -@cindex Array slicing -@cindex slicing arrays - -A combination of integer and Range operands produces a @strong{slice}. Each -integer operand reduces the rank of the array by one. For example: - -@example -Array F = A(Range::all(), 2, Range::all()); -Array G = A(2, 7, Range::all()); -@end example - -Range and integer operands can be used in any combination, for arrays -up to rank 11. - -@strong{Caution:} Using a combination of integer and Range operands requires a -newer language feature (partial ordering of member templates) which not all -compilers support. If your compiler does provide this feature, -@code{BZ_PARTIAL_ORDERING} will be defined in @code{}. If -not, you can use this workaround: - -@example -Array F = A(Range::all(), Range(2,2), Range::all()); -Array G = A(Range(2,2), Range(7,7), Range::all()); -@end example - -@subsection More about Range objects -@cindex Range objects - -A @code{Range} object represents an ordered set of uniformly spaced -integers. Here are some examples of using Range objects to obtain -subarrays: - -@smallexample -@include examples/range.texi -@end smallexample - -The optional third constructor argument specifies a stride. For example, -@code{Range(1,5,2)} refers to elements [1 3 5]. Strides can also be -negative: @code{Range(5,1,-2)} refers to elements [5 3 1]. - -Note that if you use the same Range frequently, you can just construct one -object and use it multiple times. For example: - -@example -Range all = Range::all(); -A(0,all,all) = A(N-1,all,all); -A(all,0,all) = A(all,N-1,all); -A(all,all,0) = A(all,all,N-1); -@end example - -Here's an example of using strides with a two-dimensional -array: - -@smallexample -@include examples/strideslice.texi -@end smallexample - -Here's an illustration of the @code{B} subarray: - -@center @image{strideslice} -@center Using strides to create non-contiguous subarrays. - -And the program output: - -@smallexample -@include examples/strideslice.out -@end smallexample - -@subsection A note about assignment -@cindex Array =, meaning of -@cindex =, meaning of -@cindex shallow copies, see also reference() -@cindex assignment operator - -The assignment operator (@code{=}) always results in the expression on the -right-hand side (rhs) being @emph{copied} to the lhs (i.e.@: the data on the -lhs is overwritten with the result from the rhs). This is different from -some array packages in which the assignment operator makes the lhs a -reference (or alias) to the rhs. To further confuse the issue, the copy -constructor for arrays @emph{does} have reference semantics. Here's an -example which should clarify things: - -@example -Array A(5), B(10); -A = B(Range(0,4)); // Statement 1 -Array C = B(Range(0,4)); // Statement 2 -@end example - -Statement 1 results in a portion of @code{B}'s data being copied into -@code{A}. After Statement 1, both @code{A} and @code{B} have their own -(nonoverlapping) blocks of data. Contrast this behaviour with that of -Statement 2, which is @strong{not} an assignment (it uses the copy -constructor). After Statement 2 is executed, the array @code{C} is a -reference (or alias) to @code{B}'s data. - -So to summarize: If you want to copy the rhs, use an assignment operator. -If you want to reference (or alias) the rhs, use the copy constructor (or -alternately, the @code{reference()} member function in @ref{Array members}). - -@strong{Very important:} whenever you have an assignment operator (@code{=}, -@code{+=}, @code{-=}, etc.) the lhs @strong{must} have the same shape as the -@strong{rhs}. If you want the array on the left hand side to be resized to -the proper shape, you must do so by calling the @code{resize} method, for -example: - -@example -A.resize(B.shape()); // Make A the same size as B -A = B; -@end example - -@subsection An example - -@smallexample -@include examples/slicing.texi -@end smallexample - -The output: - -@smallexample -@include examples/slicing.out -@end smallexample - diff --git a/doc/arrays-stencils.texi b/doc/arrays-stencils.texi deleted file mode 100644 index 32e44051..00000000 --- a/doc/arrays-stencils.texi +++ /dev/null @@ -1,569 +0,0 @@ -@cindex stencil objects -@cindex Array stencils - -Blitz++ provides an implementation of stencil objects which is currently -@strong{experimental}. This means that the exact details of how they are -declared and used may change in future releases. Use at your own risk. - -@section Motivation: a nicer notation for stencils - -Suppose we wanted to implement the 3-D acoustic wave equation using finite -differencing. Here is how a single iteration would look using subarray -syntax: - -@example -Range I(1,N-2), J(1,N-2), K(1,N-2); - -P3(I,J,K) = (2-6*c(I,J,K)) * P2(I,J,K) - + c(I,J,K)*(P2(I-1,J,K) + P2(I+1,J,K) + P2(I,J-1,K) + P2(I,J+1,K) - + P2(I,J,K-1) + P2(I,J,K+1)) - P1(I,J,K); -@end example - -This syntax is a bit klunky. With stencil objects, the implementation -becomes: - -@example -BZ_DECLARE_STENCIL4(acoustic3D_stencil,P1,P2,P3,c) - P3 = 2 * P2 + c * Laplacian3D(P2) - P1; -BZ_END_STENCIL - - . - . - -applyStencil(acoustic3D_stencil(), P1, P2, P3, c); -@end example - - -@node Stencil object, Stencil operator, , Stencils -@section Declaring stencil objects -@cindex stencil objects declaring - -A stencil declaration may not be inside a function. It can appear inside a -class declaration (in which case the stencil object is a nested type). - -Stencil objects are declared using the macros @code{BZ_DECLARE_STENCIL1}, -@code{BZ_DECLARE_STENCIL2}, etc. The number suffix is how many arrays are -involved in the stencil (in the above example, 4 arrays-- P1, P2, P3, c -- are -used, so the macro @code{BZ_DECLARE_STENCIL4} is invoked). - -The first argument is a name for the stencil object. Subsequent arguments -are names for the arrays on which the stencil operates. - -After the stencil declaration, the macro @code{BZ_END_STENCIL} must appear -(or the macro @code{BZ_END_STENCIL_WITH_SHAPE}, described in the next -section). - -In between the two macros, you can have multiple assignment statements, -if/else/elseif constructs, function calls, loops, etc. - -Here are some simple examples: - -@findex BZ_DECLARE_STENCIL - -@example -BZ_DECLARE_STENCIL2(smooth2D,A,B) - A = (B(0,0) + B(0,1) + B(0,-1) + B(1,0) + B(-1,0)) / 5.0; -BZ_END_STENCIL - -BZ_DECLARE_STENCIL4(acoustic2D,P1,P2,P3,c) - A = 2 * P2 + c * (-4 * P2(0,0) + P2(0,1) + P2(0,-1) + P2(1,0) + P2(-1,0)) - - P1; -BZ_END_STENCIL - -BZ_DECLARE_STENCIL8(prop2D,E1,E2,E3,M1,M2,M3,cE,cM) - E3 = 2 * E2 + cE * Laplacian2D(E2) - E1; - M3 = 2 * M2 + cM * Laplacian2D(M2) - M1; -BZ_END_STENCIL - -BZ_DECLARE_STENCIL3(smooth2Db,A,B,c) - if ((c > 0.0) && (c < 1.0)) - A = c * (B(0,0) + B(0,1) + B(0,-1) + B(1,0) + B(-1,0)) / 5.0 - + (1-c)*B; - else - A = 0; -BZ_END_STENCIL -@end example - -Currently, a stencil can take up to 11 array parameters. - -You can use the notation @code{A(i,j,k)} to read the element at an offset -@code{(i,j,k)} from the current element. If you omit the parentheses -(i.e.@: as in ``@code{A}'' then the current element is read. - -You can invoke @emph{stencil operators} which calculate finite differences -and laplacians. - -@section Automatic determination of stencil extent - -In stencil declarations such as - -@example -BZ_DECLARE_STENCIL2(smooth2D,A,B) - A = (B(0,0) + B(0,1) + B(0,-1) + B(1,0) + B(-1,0)) / 5.0; -BZ_END_STENCIL -@end example - -Blitz++ will try to automatically determine the spatial extent of the -stencil. This will usually work for stencils defined on integer or float -arrays. However, the mechanism does not work well for complex-valued -arrays, or arrays of user-defined types. If you get a peculiar error when -you try to use a stencil, you probably need to tell Blitz++ the special -extent of the stencil manually. - -You do this by ending a stencil declaration with -@code{BZ_END_STENCIL_WITH_SHAPE}: - -@example -BZ_DECLARE_STENCIL2(smooth2D,A,B) - A = (B(0,0) + B(0,1) + B(0,-1) + B(1,0) + B(-1,0)) / 5.0; -BZ_END_STENCIL_WITH_SHAPE(shape(-1,-1),shape(+1,+1)) -@end example - -The parameters of this macro are: a @code{TinyVector} (constructed by the -@code{shape()} function) containing the lower bounds of the stencil offsets, -and a @code{TinyVector} containing the upper bounds. You can determine this -by looking at the the terms in the stencil and finding the minimum and -maximum value of each index: - -@example - A = (B(0, 0) - + B(0, +1) - + B(0, -1) - + B(+1, 0) - + B(-1, 0)) / 5.0; - -------- -min indices -1, -1 -max indices +1, +1 -@end example - -@node Stencil operator, Stencil customize, Stencil object, Stencils -@section Stencil operators -@cindex stencil operators - -This section lists all the stencil operators provided by Blitz++. They -assume that an array represents evenly spaced data points separated by a -distance of @code{h}. A 2nd-order accurate operator has error term -@math{O(h^2)}; a 4th-order accurate operator has error term @math{O(h^4)}. - -All of the stencils have factors associated with them. For example, the -@code{central12} operator is a discrete first derivative which is 2nd-order -accurate. Its factor is 2h; this means that to get the first derivative of -an array A, you need to use @code{central12(A,firstDim)}@math{/(2h)}. -Typically when designing stencils, one factors out all of the @math{h} terms -for efficiency. - -The factor terms always consist of an integer multiplier (often 1) and a -power of @math{h}. For ease of use, all of the operators listed below are -provided in a second ``normalized'' version in which the integer multiplier -is 1. The normalized versions have an @code{n} appended to the name, for -example @code{central12n} is the normalized version of @code{central12}, and -has factor @math{h} instead of @math{2h}. - -These operators are defined in @code{blitz/array/stencilops.h} if you wish -to see the implementation. - -@subsection Central differences -@cindex central differences - -@table @code -@item central12(A,dimension) -1st derivative, 2nd order accurate. Factor: @math{2h} -@include stencils/central12.texi - -@item central22(A,dimension) -2nd derivative, 2nd order accurate. Factor: @math{h^2} -@include stencils/central22.texi - -@item central32(A,dimension) -3rd derivative, 2nd order accurate. Factor: @math{2h^3} -@include stencils/central32.texi - -@item central42(A,dimension) -4th derivative, 2nd order accurate. Factor: @math{h^4} -@include stencils/central42.texi - -@item central14(A,dimension) -1st derivative, 4th order accurate. Factor: @math{12h} -@include stencils/central14.texi - -@item central24(A,dimension) -2nd derivative, 4th order accurate. Factor: @math{12h^2} -@include stencils/central24.texi - -@item central34(A,dimension) -3rd derivative, 4th order accurate. Factor: @math{8h^3} -@include stencils/central34.texi - -@item central44(A,dimension) -4th derivative, 4th order accurate. Factor: @math{6h^4} -@include stencils/central44.texi -@end table - -Note that the above are available in normalized versions @code{central12n}, -@code{central22n}, ..., @code{central44n} which have factors of @math{h}, -@math{h^2}, @math{h^3}, or @math{h^4} as appropriate. - -These are available in multicomponent versions: for example, -@code{central12(A,component,dimension)} gives the central12 operator for the -specified component (Components are numbered 0, 1, ... N-1). - -@subsection Forward differences -@cindex forward differences - -@table @code -@item forward11(A,dimension) -1st derivative, 1st order accurate. Factor: @math{h} -@include stencils/forward11.texi - -@item forward21(A,dimension) -2nd derivative, 1st order accurate. Factor: @math{h^2} -@include stencils/forward21.texi - -@item forward31(A,dimension) -3rd derivative, 1st order accurate. Factor: @math{h^3} -@include stencils/forward31.texi - -@item forward41(A,dimension) -4th derivative, 1st order accurate. Factor: @math{h^4} -@include stencils/forward41.texi - -@item forward12(A,dimension) -1st derivative, 2nd order accurate. Factor: @math{2h} -@include stencils/forward12.texi - -@item forward22(A,dimension) -2nd derivative, 2nd order accurate. Factor: @math{h^2} -@include stencils/forward22.texi - -@item forward32(A,dimension) -3rd derivative, 2nd order accurate. Factor: @math{2h^3} -@include stencils/forward32.texi - -@item forward42(A,dimension) -4th derivative, 2nd order accurate. Factor: @math{h^4} -@include stencils/forward42.texi -@end table - -Note that the above are available in normalized versions @code{forward11n}, -@code{forward21n}, ..., @code{forward42n} which have factors of @math{h}, -@math{h^2}, @math{h^3}, or @math{h^4} as appropriate. - -These are available in multicomponent versions: for example, -@code{forward11(A,component,dimension)} gives the forward11 operator for the -specified component (Components are numbered 0, 1, ... N-1). - -@subsection Backward differences -@cindex backward differences - -@table @code -@item backward11(A,dimension) -1st derivative, 1st order accurate. Factor: @math{h} -@include stencils/backward11.texi - -@item backward21(A,dimension) -2nd derivative, 1st order accurate. Factor: @math{h^2} -@include stencils/backward21.texi - -@item backward31(A,dimension) -3rd derivative, 1st order accurate. Factor: @math{h^3} -@include stencils/backward31.texi - -@item backward41(A,dimension) -4th derivative, 1st order accurate. Factor: @math{h^4} -@include stencils/backward41.texi - -@item backward12(A,dimension) -1st derivative, 2nd order accurate. Factor: @math{2h} -@include stencils/backward12.texi - -@item backward22(A,dimension) -2nd derivative, 2nd order accurate. Factor: @math{h^2} -@include stencils/backward22.texi - -@item backward32(A,dimension) -3rd derivative, 2nd order accurate. Factor: @math{2h^3} -@include stencils/backward32.texi - -@item backward42(A,dimension) -4th derivative, 2nd order accurate. Factor: @math{h^4} -@include stencils/backward42.texi -@end table - -Note that the above are available in normalized versions @code{backward11n}, -@code{backward21n}, ..., @code{backward42n} which have factors of @math{h}, -@math{h^2}, @math{h^3}, or @math{h^4} as appropriate. - -These are available in multicomponent versions: for example, -@code{backward42(A,component,dimension)} gives the backward42 operator for -the specified component (Components are numbered 0, 1, ... N-1). - -@subsection Laplacian (@math{@nabla ^2}) operators -@cindex Laplacian operators - -@table @code -@item Laplacian2D(A) -2nd order accurate, 2-dimensional laplacian. Factor: @math{h^2} -@include stencils/Laplacian2D.texi - -@item Laplacian3D(A) -2nd order accurate, 3-dimensional laplacian. Factor: @math{h^2} - -@item Laplacian2D4(A) -4th order accurate, 2-dimensional laplacian. Factor: @math{12h^2} -@include stencils/Laplacian2D4.texi - -@item Laplacian3D4(A) -4th order accurate, 3-dimensional laplacian. Factor: @math{12h^2} -@end table - -Note that the above are available in normalized versions -@code{Laplacian2D4n}, @code{Laplacian3D4n} which have factors @math{h^2}. - -@subsection Gradient (@math{@nabla}) operators -@cindex gradient operators - -These return @code{TinyVector}s of the appropriate numeric type and length: - -@table @code - -@item grad2D(A) -2nd order, 2-dimensional gradient (vector of first derivatives), generated -using the central12 operator. Factor: @math{2h} - -@item grad2D4(A) -4th order, 2-dimensional gradient, using central14 operator. Factor: @math{12h} - -@item grad3D(A) -2nd order, 3-dimensional gradient, using central12 operator. Factor: @math{2h} - -@item grad3D4(A) -4th order, 3-dimensional gradient, using central14 operator. Factor: @math{12h} -@end table - -These are available in normalized versions @code{grad2Dn}, @code{grad2D4n}, -@code{grad3Dn} and @code{grad3D4n} which have factors @math{h}. - -@subsection Jacobian operators -@cindex Jacobian operators - -The Jacobian operators are defined over 3D vector fields only (e.g. -@code{Array,3>}). They return a -@code{TinyMatrix} where T is the numeric type of the vector field. - -@table @code -@item Jacobian3D(A) -2nd order, 3-dimensional Jacobian using the central12 operator. Factor: -@math{2h}. - -@item Jacobian3D4(A) -4th order, 3-dimensional Jacobian using the central14 operator. Factor: -@math{12h}. -@end table - -These are also available in normalized versions @code{Jacobian3Dn} and -@code{Jacobain3D4n} which have factors @math{h}. - -@subsection Grad-squared operators -@cindex Grad-squared operators - -There are also grad-squared operators, which return @code{TinyVector}s of -second derivatives: - -@table @code -@item gradSqr2D(A) -2nd order, 2-dimensional grad-squared (vector of second derivatives), -generated using the central22 operator. Factor: @math{h^2} - -@item gradSqr2D4(A) -4th order, 2-dimensional grad-squared, using central24 operator. Factor: -@math{12h^2} - -@item gradSqr3D(A) -2nd order, 3-dimensional grad-squared, using the central22 operator. -Factor: @math{h^2} - -@item gradSqr3D4(A) -4th order, 3-dimensional grad-squared, using central24 operator. Factor: -@math{12h^2} -@end table - -Note that the above are available in normalized versions @code{gradSqr2Dn}, -@code{gradSqr2D4n}, @code{gradSqr3Dn}, @code{gradSqr3D4n} which have factors -@math{h^2}. - -@subsection Curl (@math{@nabla @times}) operators -@cindex curl operator - -These curl operators return scalar values: - -@table @code -@item curl(Vx,Vy) -2nd order curl operator using the central12 operator. Factor: @math{2h} - -@item curl4(Vx,Vy) -4th order curl operator using the central14 operator. Factor: @math{12h} - -@item curl2D(V) -2nd order curl operator on a 2D vector field (e.g.@: -@code{Array,2>}), using the central12 operator. Factor: -@math{2h} - -@item curl2D4(V) -4th order curl operator on a 2D vector field, using the central12 operator. -Factor: @math{12h} -@end table - -Available in normalized forms @code{curln}, @code{curl4n}, @code{curl2Dn}, -@code{curl2D4n}. - -These curl operators return three-dimensional @code{TinyVector}s of the -appropriate numeric type: - -@table @code -@item curl(Vx,Vy,Vz) -2nd order curl operator using the central12 operator. Factor: @math{2h} - -@item curl4(Vx,Vy,Vz) -4th order curl operator using the central14 operator. Factor: @math{12h} - -@item curl(V) -2nd order curl operator on a 3D vector field (e.g.@: -@code{Array,3>}, using the central12 operator. Factor: -@math{2h} - -@item curl4(V) -4th order curl operator on a 3D vector field, using the central14 operator. -Factor: @math{12h} -@end table - -Note that the above are available in normalized versions @code{curln} and -@code{curl4n}, which have factors of @code{h}. - -@subsection Divergence (@math{@nabla @cdot}) operators -@cindex divergence operator - -The divergence operators return a scalar value. - -@table @code -@item div(Vx,Vy) -2nd order div operator using the central12 operator. Factor: @math{2h} - -@item div4(Vx,Vy) -4th order div operator using the central14 operator. Factor: @math{12h} - -@item div(Vx,Vy,Vz) -2nd order div operator using the central12 operator. Factor: @math{2h} - -@item div4(Vx,Vy,Vz) -4th order div operator using the central14 operator. Factor: @math{12h} - -@item div2D(V) -2nd order div operator on a 2D vector field, using the central12 operator. -Factor: @math{2h} - -@item div2D4(V) -2nd order div operator on a 2D vector field, using the central14 operator. -Factor: @math{12h} - -@item div3D(V) -2nd order div operator on a 3D vector field, using the central12 operator. -Factor: @math{2h} - -@item div3D4(V) -2nd order div operator on a 3D vector field using the central14 operator. -Factor: @math{12h} -@end table - -These are available in normalized versions -@code{divn}, @code{div4n}, @code{div2Dn}, @code{div2D4n}, @code{div3Dn}, and -@code{div3D4n} which have factors of @math{h}. - -@subsection Mixed partial derivatives -@cindex mixed partial operators - -@table @code -@item mixed22(A,dim1,dim2) -2nd order accurate, 2nd mixed partial derivative. Factor: @math{4h^2} - -@item mixed24(A,dim1,dim2) -4th order accurate, 2nd mixed partial derivative. Factor: @math{144h^2} -@end table - -There are also normalized versions of the above, @code{mixed22n} and -@code{mixed24n} which have factors @math{h^2}. - -@node Stencil customize, Stencil apply, Stencil operator, Stencils -@section Declaring your own stencil operators -@cindex stencil operators declaring your own - -You can declare your own stencil operators using the macro -@code{BZ_DECLARE_STENCIL_OPERATOR1}. For example, here is the declaration -of @code{Laplacian2D}: - -@example -BZ_DECLARE_STENCIL_OPERATOR1(Laplacian2D, A) - return -4*A(0,0) + A(-1,0) + A(1,0) + A(0,-1) + A(0,1); -BZ_END_STENCIL_OPERATOR -@end example - -To declare a stencil operator on 3 operands, use the macro -@code{BZ_DECLARE_STENCIL_OPERATOR3}. Here is the declaration of @code{div}: - -@example -BZ_DECLARE_STENCIL_OPERATOR3(div,vx,vy,vz) - return central12(vx,firstDim) + central12(vy,secondDim) - + central12(vz,thirdDim); -BZ_END_STENCIL_OPERATOR -@end example - -The macros aren't magical; they just declare an inline template function -with the names and arguments you specify. For example, the declaration of -@code{div} could also be written - -@example -template -inline typename T::T_numtype div(T& vx, T& vy, T& vz) -@{ - return central12(vx,firstDim) + central12(vy,secondDim) - + central12(vz,thirdDim); -@} -@end example - -The template parameter @code{T} is an iterator type for arrays. - -You are encouraged to use the macros when possible, because it is possible -the implementation could be changed in the future. - -To declare a difference operator, use this syntax: - -@example -BZ_DECLARE_DIFF(central12,A) @{ - return A.shift(1,dim) - A.shift(-1,dim); -@} -@end example - -The method @code{shift(offset,dim)} retrieves the element at -@code{offset} in dimension @code{dim}. - -Stencil operator declarations cannot occur inside a function. If -declared inside a class, they are scoped by the class. - -@node Stencil apply, , Stencil customize, Stencils -@section Applying a stencil object -@cindex stencil objects applying - -The syntax for applying a stencil is: - -@example -applyStencil(stencilname(),A,B,C...,F); -@end example - -Where @code{stencilname} is the name of the stencil, and @code{A,B,C,...,F} -are the arrays on which the stencil operates. - -For examples, see @file{examples/stencil.cpp} and @file{examples/stencil2.cpp}. - -Blitz++ interrogates the stencil object to find out how large its footprint -is. It only applies the stencil over the region of the arrays where it -won't overrun the boundaries. - diff --git a/doc/arrays-storage.texi b/doc/arrays-storage.texi deleted file mode 100644 index 135ba7b8..00000000 --- a/doc/arrays-storage.texi +++ /dev/null @@ -1,345 +0,0 @@ - -@node Array storage, , Array I/O, Arrays -@section Array storage orders -@cindex Array storage formats -@cindex storage of arrays - -Blitz++ is very flexible about the way arrays are stored in memory. -Starting indices can be 0, 1, or arbitrary numbers; arrays can be stored in -row major, column major or an order based on any permutation of the -dimensions; each dimension can be stored in either ascending or descending -order. An N dimensional array can be stored in @math{N! 2^N} possible ways. - -Before getting into the messy details, a review of array storage formats is -useful. If you're already familiar with strides and bases, you might -want to skip on to the next section. - -@subsection Fortran and C-style arrays - -Suppose we want to store this two-dimensional array in memory: - -@example -[ 1 2 3 ] -[ 4 5 6 ] -[ 7 8 9 ] -@end example - -@unnumberedsubsubsec Row major vs. column major - -To lay the array out in memory, it's necessary to map the indices (i,j) into -a one-dimensional block. Here are two ways the array might appear in -memory: - -@example -[ 1 2 3 4 5 6 7 8 9 ] -[ 1 4 7 2 5 8 3 6 9 ] -@end example - -The first order corresponds to a C or C++ style array, and is called -@emph{row-major ordering}: the data is stored first by row, and then by -column. The second order corresponds to a Fortran style array, and is -called @emph{column-major ordering}: the data is stored first by column, and -then by row. - -The simplest way of mapping the indices (i,j) into one-dimensional memory is -to take a linear combination.@footnote{Taking a linear combination is -sufficient for dense, asymmetric arrays, such as are provided by the Blitz++ -@code{Array} class.} Here's the appropriate linear combination for row -major ordering: - -@example -memory offset = 3*i + 1*j -@end example - -And for column major ordering: - -@example -memory offset = 1*i + 3*j -@end example - -The coefficients of the (i,j) indices are called @emph{strides}. For a row -major storage of this array, the @emph{row stride} is 3 -- you have to skip -three memory locations to move down a row. The @emph{column stride} is 1 -- -you move one memory location to move to the next column. This is also known -as @emph{unit stride}. For column major ordering, the row and column -strides are 1 and 3, respectively. - -@unnumberedsubsubsec Bases - -To throw another complication into this scheme, C-style arrays have indices -which start at zero, and Fortran-style arrays have indices which start at -one. The first valid index value is called the @emph{base}. To account for -a non-zero base, it's necessary to include an offset term in addition to the -linear combination. Here's the mapping for a C-style array with i=0..3 and -j=0..3: - -@example -memory offset = 0 + 3*i + 1*j -@end example - -No offset is necessary since the indices start at zero for C-style arrays. -For a Fortran-style array with i=1..4 and j=1..4, the mapping would be: - -@example -memory offset = -4 + 3*i + 1*j -@end example - -By default, Blitz++ creates arrays in the C-style storage format (base zero, -row major ordering). To create a Fortran-style array, you can use this -syntax: - -@example -Array A(3, 3, FortranArray<2>()); -@end example - -The third parameter, @code{FortranArray<2>()}, tells the @code{Array} -constructor to use a storage format appropriate for two-dimensional Fortran -arrays (base one, column major ordering). - -A similar object, @code{ColumnMajorArray}, tells the @code{Array} constructor -to use column major ordering, with base zero: - -@example -Array B(3, 3, ColumnMajorArray<2>()); -@end example - -This creates a 3x3 array with indices i=0..2 and j=0..2. - -In addition to supporting the 0 and 1 conventions for C and Fortran-style -arrays, Blitz++ allows you to choose arbitrary bases, possibly different for -each dimension. For example, this declaration creates an array whose -indices have ranges i=5..8 and j=2..5: - -@example -Array A(Range(5,8), Range(2,5)); -@end example - -@subsection Creating custom storage orders - -@cindex storage order, creating your own -@cindex Array storage order, creating your own - -All @code{Array} constructors take an optional parameter of type -@code{GeneralArrayStorage}. This parameter encapsulates a complete -description of the storage format. If you want a storage format other than -C or Fortran-style, you have two choices: - -@itemize @bullet - -@item You can create an object of type -@code{GeneralArrayStorage}, customize the storage format, and use -the object as a argument for the @code{Array} constructor. - -@item You can create your own storage format object which inherits from -@code{GeneralArrayStorage}. This is useful if you will be using the -storage format many times. This approach (inheriting from -@code{GeneralArrayStorage}) was used to create the -@code{FortranArray} objects. If you want to take this approach, you -can use the declaration of @code{FortranArray} in -@code{} as a guide. - -@end itemize - -The next sections describe how to modify a -@code{GeneralArrayStorage} object to suit your needs. - -@unnumberedsubsubsec In higher dimensions - -In more than two dimensions, the choice of storage order becomes more -complicated. Suppose we had a 3x3x3 array. To map the indices (i,j,k) into -memory, we might choose one of these mappings: - -@example -memory offset = 9*i + 3*j + 1*k -memory offset = 1*i + 3*j + 9*k -@end example - -The first corresponds to a C-style array, and the second to a Fortran-style -array. But there are other choices; we can permute the strides (1,3,9) any -which way: - -@example -memory offset = 1*i + 9*j + 3*k -memory offset = 3*i + 1*j + 9*k -memory offset = 3*i + 9*j + 1*k -memory offset = 9*i + 1*j + 3*k -@end example - -For an N dimensional array, there are N! such permutations. Blitz++ allows -you to select any permutation of the dimensions as a storage order. First -you need to create an object of type @code{GeneralArrayStorage}: - -@example -GeneralArrayStorage<3> storage; -@end example - -@code{GeneralArrayStorage} contains a vector called @code{ordering} -which controls the order in which dimensions are stored in memory. The -@code{ordering} vector will contain a permutation of the numbers 0, 1, ..., -N_rank-1. Since some people are used to the first dimension being 1 rather -than 0, a set of symbols (firstDim, secondDim, ..., eleventhDim) are -provided which make the code more legible. - -The @code{ordering} vector lists the dimensions in increasing order of -stride. You can access this vector using the member function -@code{ordering()}. A C-style array, the default, would have: - -@example -storage.ordering() = thirdDim, secondDim, firstDim; -@end example - -meaning that the third index (k) is associated with the smallest stride, and -the first index (i) is associated with the largest stride. A Fortran-style -array would have: - -@example -storage.ordering() = firstDim, secondDim, thirdDim; -@end example - -@unnumberedsubsubsec Reversed dimensions - -To add yet another wrinkle, there are some applications where the rows or -columns need to be stored in reverse order.@footnote{For example, certain -bitmap formats store image rows from bottom to top rather than top to -bottom.} - -Blitz++ allows you to store each dimension in either ascending or descending -order. By default, arrays are always stored in ascending order. The -@code{GeneralArrayStorage} object contains a vector called -@code{ascendingFlag} which indicates whether each dimension is stored -ascending (@code{true}) or descending (@code{false}). To alter the contents -of this vector, use the @code{ascendingFlag()} method: - -@example -// Store the third dimension in descending order -storage.ascendingFlag() = true, true, false; - -// Store all the dimensions in descending order -storage.ascendingFlag() = false, false, false; -@end example - -@unnumberedsubsubsec Setting the base vector - -@code{GeneralArrayStorage} also has a @code{base} vector which -contains the base index value for each dimension. By default, the base -vector is set to zero. @code{FortranArray} sets the base vector to -one. - -To set your own set of bases, you have two choices: - -@itemize @bullet - -@item You can modify the @code{base} vector inside the -@code{GeneralArrayStorage} object. The method @code{base()} returns -a mutable reference to the @code{base} vector which you can use to set the -bases. - -@item You can provide a set of @code{Range} arguments to the -@code{Array} constructor. - -@end itemize - -Here are some examples of the first approach: - -@example -// Set all bases equal to 5 -storage.base() = 5; - -// Set the bases to [ 1 0 1 ] -storage.base() = 1, 0, 1; -@end example - -And of the second approach: - -@example -// Have bases of 5, but otherwise C-style storage -Array A(Range(5,7), Range(5,7), Range(5,7)); - -// Have bases of [ 1 0 1 ] and use a custom storage -Array B(Range(1,4), Range(0,3), Range(1,4), storage); -@end example - -@unnumberedsubsubsec Working simultaneously with different storage orders - -Once you have created an array object, you will probably never have to worry -about its storage order. Blitz++ should handle arrays of different storage -orders transparently. It's possible to mix arrays of different storage -orders in one expression, and still get the correct result. - -Note however, that mixing different storage orders in an expression may -incur a performance penalty, since Blitz++ will have to pay more attention -to differences in indexing than it normally would. - -You may not mix arrays with different domains in the same expression. For -example, adding a base zero to a base one array is a no-no. The reason for -this restriction is that certain expressions become ambiguous, for example: - -@example -Array A(Range(0,5)), B(Range(1,6)); -A=0; -B=0; -using namespace blitz::tensor; -int result = sum(A+B+i); -@end example - -Should the index @code{i} take its domain from array @code{A} or array -@code{B}? To avoid such ambiguities, users are forbidden from mixing arrays -with different domains in an expression. - -@unnumberedsubsubsec Debug dumps of storage order information - -In debug mode (@code{-DBZ_DEBUG}), class @code{Array} provides a member -function @code{dumpStructureInformation()} which displays information about -the array storage: - -@example -Array A(3,7,8,2,FortranArray<4>()); -A.dumpStructureInformation(cerr); -@end example - -The optional argument is an @code{ostream} to dump information to. It -defaults to @code{cout}. Here's the output: - -@smallexample -@include examples/dump.out -@end smallexample - -@unnumberedsubsubsec A note about storage orders and initialization - -When initializing arrays with comma delimited lists, note that the array is -filled in storage order: from the first memory location to the last memory -location. This won't cause any problems if you stick with C-style arrays, -but it can be confusing for Fortran-style arrays: - -@example -Array A(3, 3, FortranArray<2>()); -A = 1, 2, 3, - 4, 5, 6, - 7, 8, 9; -cout << A << endl; -@end example - -The output from this code excerpt will be: - -@example -A = 3 x 3 - 1 4 7 - 2 5 8 - 3 6 9 -@end example - -This is because Fortran-style arrays are stored in column -major order. - -@subsection Storage orders example - -@smallexample -@include examples/storage.texi -@end smallexample - -And the output: - -@smallexample -@include examples/storage.out -@end smallexample - diff --git a/doc/arrays-types.texi b/doc/arrays-types.texi deleted file mode 100644 index d80f5d26..00000000 --- a/doc/arrays-types.texi +++ /dev/null @@ -1,22 +0,0 @@ - -@node Array types, Array ctors, Array intro, Arrays -@section Public types - -The @code{Array} class declares these public types: - -@itemize @bullet - -@item @code{T_numtype} is the element type stored in the array. For -example, the type @code{Array::T_numtype} would be @code{double}. - -@item @code{T_index} is a vector index into the array. The class -@code{TinyVector} is used for this purpose. - -@item @code{T_array} is the array type itself -(@code{Array}) - -@item @code{T_iterator} is an iterator type. NB: this iterator is not -yet fully implemented, and is NOT STL compatible at the present time. - -@end itemize - diff --git a/doc/arrays-usertype.texi b/doc/arrays-usertype.texi deleted file mode 100644 index afb372c8..00000000 --- a/doc/arrays-usertype.texi +++ /dev/null @@ -1,42 +0,0 @@ - -@node Array usertype, , Array multi, Customized Arrays -@section Creating arrays of a user type -@cindex Array of your own types - -You can use the @code{Array} class with types you have created yourself, or -types from another library. If you want to do arithmetic on the array, -whatever operators you use on the arrays have to be defined on the -underlying type. - -For example, here's a simple class for doing fixed point computations in the -interval [0,1]: - -@smallexample -@include examples/fixed-point.texi -@end smallexample - -The function @code{huge(T)} returns the largest representable value for type -T; in the example above, it's equal to @code{UINT_MAX}. - -The @code{FixedPoint} class declares three useful operations: conversion -from @code{double}, addition, and outputing to an @code{ostream}. We can -use all of these operations on an @code{Array} object: - -@smallexample -@include examples/fixed.texi -@end smallexample - -Note that the array @code{A} is initialized using a comma-delimited list of -@code{double}; this makes use of the constructor @code{FixedPoint(double)}. -The assignment @code{B = A + 0.05} uses -@code{FixedPoint::operator+(FixedPoint)}, with an implicit conversion from -@code{double} to @code{FixedPoint}. Formatting the array @code{B} onto the -standard output stream is done using the output operator defined for -@code{FixedPoint}. - -Here's the program output: - -@smallexample -@include examples/fixed.out -@end smallexample - diff --git a/doc/blitz.gif b/doc/blitz.gif deleted file mode 100644 index d59a78fdec3f8e729d896de67c7b283f8df80244..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3875 zcmV+;58UuaNk%w1VdVjJ0OJ4v|KI@szySZC0RMOZ|4;z`C;M)j$~<` zXsWJk>%MR-&vY#V2#(u)@BhFRA~z%=g2<%OV{Go0(5Q4OBI>Nzt2WEsda=~7t=kow z&uHLxO=i2>@VA_shQ;Icync@Y==^|xfr5cKgM@&FgNlWRh<=QTj**a)gM1Z>mWPyt zn4h3wI73ewbEkKxs;R7}XREMBJXfK$wiI+2j=Q|QzQ4f1!o$5%#K(TKySL0^TP9j9 zvD2;9)NV`G+5k0g&EGMeCy6tSoST~G>gnv}?UL)}N$BqN@POU+kl_3*U@1CiARCx$ z1i^y^jUh}3V_3h3eHc#ol>wi?0Dv(1(FlN}$Bh0UH->Z(vSZ1QB2$_a83yIbmmkZi zbcqq7O%g;hl-ZCY-@;%welDXzP^Pe(NI^)n5UIn`hA>Y`omtWb)vHmjPRYtuqfDAr zd3a5_mD`3u9-h7sN>LeGi%8N=n> zvQEuLwki_iN6R2FSB@G$jY-X)Ig9SRWbz_$~c&g zK=K%5jzaQ?M3F=$X=8s%2H9Mb&n;;Ii~(-g-F`e~spXbjc4?%2UWVx6eze&~^Zo(S`RJk#p`w=Z<@UY3H805lLpBd!`uVoJrN`VrmwqSLUK!+Q8_ejp_+$ zm}EkFCZ$hqxn`CF_UP%5o;vy|lA|_R;}WH&s$+wowpw1RU#6I3c&BPG>7=6GYALF^ zjyV{vHZ6LWnzwPfo~g*H>SKq!{yqvtvyu7$Y_Mpav8)Kh&KPT_&|*r100DqoBe>-9 zSgyF?ri<>m*J{V^xayi4FTCLX`fj}F_E#>w>8fyVxg+$8Lb~v-N$!ivCJSnm-In=m z!`q5A?L!Wyuq(o~o{%jJY;tfg3-$WN?#TF-OmfIcoNV&SAG^ykzCf@GgUT&q0B{C0 zznrtbFsF9&1{tpqw6+lYs$kLW63wV~Mk|N3tw?t~u%^uR`XyTeCg()=;}G_t|h;+%?cBSMc`UbDJwc(j@?%Le7NitVH33H;8z+ zGb}E`;*j@@fzFhZY(nJzCr^&~%Mu7+OSl4%8#?HsmrnZWp_^`B>Zp&-`s=QH9&_ua zryc|BwZ|@d2f6o7`|r44t2^++2Os(d#V7B(@yB04{PM|vvpM7nP9JX&k$1Sc2iE^= zLgnDsjC1(pdw*f~B|rBZ`ZQNxe)@Ph-h%ZT=Iwm@8hcOuxxBxR`33Bg+|^#NI~fE} zWk+J);lih!?lq8QgJWFa_}88Ta;^dj)Ib42wm#)O?|DdI9`wZb0}-~)Umi5!22YT} z6+TXaBsg9DS~x=D0kCuclpzQNhbQpd$Edhv^3Y+et-NJcM4p^Rp{92h%?MlLe3j3^M}|@rxAr;~T^1L=yC|j%l>O8U;y6NDdN>HMry+$EZm$ zC~^do#NZ|u8Ojnqa%Pc4WhG}gMIdHUiBxi zs7W~Dv6#lB6gbu)C|6hJt~>CJM2Q#yfI!Ynrqh7&EI~KBi9CX8aG>-|0YVqbPKG+P zp%C4t3;rHTQE`4Eq8LRe4Jx`&ka4tx9kqc&XAshPW)q6Q)MhEII7nMG6QGzRB_gM3 zI~%O@nl&YXK?|Bt3G6fm9L=augG$t)8dajrDC$7JRa6!P)dvyf!9!)hP!>WFsT{!S z3-q~8uEz78T@CA3y$aT`_EZXHO=}Iv`qROAb&U9&<~R*FR}*LzXKmGBTzVh|2 zevPUp`MS}*&hMkM)Tj?edb^SWQwo5MY+xlzS&TwqvKFPPUp;En#AX(x5Y_BbrJC0e z4&b!nMD1w@s@m4F_O+>n?P_C7+u7RIwYA0VZDp(5yY^tWxg9N4dHdQH0C%>DJuUWb z{%aHAE}^oNjU#k7`vbu4)vzmg>RKC$gsL`IxlvfHc*jfL@|qXC8$GXj*Sp%qvRAw$ zeXnl?%HFlMx4Y!MiF1`OUBue7zm}EiWa*0C6&#hR>^z)fAA(x69=Er|&24od9NY>c zce@WRu4pMdU!6u6y)bC)2pS9<73^079X7FiR}9}Euvoq#W$|#|YuxqD7{nmBZ;4Ut z0Uc*m1_UT?kjXpb@fKOULr#K_mrUgR#&@+xhO&<*I)Esj_Q*9xvXfs>WcsFf$3E>b zj(H5!7r*$qWXRYns-uhII^ZP3so$8p<_R^#Oie>sg07(U#71h>M+TVmG_dq;2^4&86NS6%evthulQCguJK(b zJl9#~c!xp0fr)?Ix6*l??{Hrk;zma?(Yw8LqhGt~cAhrSKhW-Nt9-;h*Eyj-cA)@y)o7Z9NCHLvpB*S&Xi_x;>|$2%1G&hEhLz1$P^d*KmZ^t&(K zx9bLh;T=!-b8|iIbKkb(H!pY+be;vA554F|PkNb?{`8#ZGU`+B?$zJj0<4$3>toOL z%=6RV+rBy4e@^e+8yxh$?>5{A?t6iYVCFY5fbwm!`8~}3`LtgG^h61J9C7*ui zOYnT8DF6Eq8N2f*PyWaIo$QoXeB~LR^YjDW_!c;P`SCXa$(W0pSOdoB zi_M6NwdjAd_=*=0g0VP_)tHK^XaUt|jo0{%v?z|l^09gU$7>NH!i#`~S?0AYVu#gJriAdm(>IjdY zSdSWDi}uKh+qjPgNs#53kqCJJ1=*1v8ImN)j~~f>68Vr5nTag#y78y+mBcuYQOS!nX_KFT zlL9G}LHAi$Vw*^qE)m#KM|cKL{_`I?t`3z=D#T^X1_8IIiunxWa3BtV)UV48;5 zn3Xx4mPrD{Ih?WC46~V-wON>gX_6#qlw|3gh-sT4pq9buYk2vZ+u5C}>7A?@p45uPOlGLf5 z)>)bb8U+Pl0L;jsx#*w^+Mp8ppc5LQ6ndc+nxPQ7p%uEHchR5B`JV?GpwT&?&pDbV z$^i_Tpi8i#5$d8a`l2!#qcl3BHd><$>Y=_@cE?E37xqqqyzb*2P%sZY6duZqg=YBUP_-#+N9D6qy}o6W+|YgNsVbrpn?da zQ&6Eu388VilyVBE4H~8vxt#4;rp|e$p9!GPX{Jzm0^hfw%D13N(4~s{r8$bHI;y9G z`jyierQTD|)D( zI;)jPqJ_DfUOA?k$&)}Tj(jSRp}?t#N~^@mjXNopRVt~5S)I8Gnv|NXKv|?x5UW>O ztkz1Z-`Jkn%BRp8sN9;S7|E>FdadNjpVC?a^4XaMI`Jj3 l`+@j6u-m$^Ae*PVi3B4XvL<`7D4Vh>yRt0XvKAQu06QU3vS9!K diff --git a/doc/blitz.texi b/doc/blitz.texi deleted file mode 100644 index d4e6fa8d..00000000 --- a/doc/blitz.texi +++ /dev/null @@ -1,295 +0,0 @@ -\input texinfo @c -*-texinfo-*- -@c %**start of header -@setfilename blitz.info -@include version.texi -@settitle Blitz++ -@setchapternewpage odd -@finalout -@iftex -@afourpaper -@end iftex -@c %**end of header - -@dircategory Blitz++ library -@direntry -* Blitz++: (blitz++). High-performance C++ numeric library -@end direntry - - -@macro faq{question} -@strong{@bullet{} \question\} -@end macro - -@c kludge workaround from Karl Berry for math in @subsection -@ifnottex -@macro nabla -\\nabla -@end macro -@macro times -\\times -@end macro -@macro cdot -\\cdot -@end macro -@end ifnottex - -@titlepage -@title Blitz++ User's Guide -@subtitle A C++ class library for scientific computing -@subtitle for version @value{VERSION}, @value{UPDATED} -@author Todd Veldhuizen -@page -@vskip 0pt plus 1filll -@include copyright.texi -@end titlepage - -@summarycontents -@contents -@page - -@ifnottex -@node Top, , , (DIR) -@top Top -@end ifnottex - -@menu -* Introduction:: Introduction -* Arrays:: Arrays -* Array Expressions:: Array Expressions -* Stencils:: Stencils -* Customized Arrays:: Multicomponent, complex, and user type Arrays -* Indirection:: Indirection -* TinyVector:: TinyVector -* Parallel Computing:: Parallel Computing with Blitz++ -* Random Number Generators:: Random Number Generators -* Numeric properties:: Numeric properties -* FAQ:: Frequently Asked Questions -* Keyword Index:: Blitz Keyword Index -* Concept Index:: Concept Index - -@detailmenu - --- The Detailed Node Listing --- - -General considerations - -* about:: About this document -* platforms:: Platform/compiler notes -* download:: How to download Blitz++ -* install:: Installation and porting -* compiling:: Compiling with Blitz++ -* legal:: Licensing terms -* help:: Mailing lists and support - -The Blitz++ Array class - -* Array intro:: Getting started -* Array types:: Public types -* Array ctors:: Constructors -* Array slicing:: Indexing, subarrays, and slicing -* Array debug:: Debug mode -* Array members:: Member functions -* Array globals:: Global functions -* Array I/O:: Inputting and Outputting Arrays -* Array storage:: Array storage orders - -Writing expressions with the Array class - -* Expression evaluation:: Expression evaluation order -* Index placeholders:: Index placeholders -* Math functions 1:: Single-argument math functions -* Math functions 2:: Two-argument math functions -* User et:: Declaring your own math functions on arrays -* Where expr:: where statements - -Array Stencils - -* Stencil object:: Declaring stencil objects -* Stencil operator:: Stencil operators -* Stencil customize:: Declaring your own stencil operators -* Stencil apply:: Applying a stencil object - -Customization of the Array class - -* Array multi:: Multicomponent and complex arrays -* Array usertype:: Creating arrays of a user type - -Array indirect addressing - -* Indirection position list:: Indirection using lists of array positions -* Indirection Cartesian product:: Cartesian-product indirection -* Indirection strip list:: Indirection with lists of strips - -The Blitz++ TinyVector class - -* TinyVec params:: Template parameters and types -* TinyVec ctors:: Constructors -* TinyVec members:: Member functions -* TinyVec assignment:: Assignment operators -* TinyVec exprs:: Expressions -* TinyVec globals:: Global functions -* TinyVec arrays:: Arrays of TinyVector -* TinyVec io:: Input/output - -Parallel Computing with Blitz++ - -* Thread safety:: Blitz++ and thread safety - -Random Number Generators in Blitz++ - -* RNG overview:: Overview -* RNG seeding:: Seeding a random number generator -* RNG details:: Detailed description of RNGs -* RNG params:: Template parameters -* RNG members:: Member functions -* RNG listings:: Detailed listing of RNGs - -Numeric properties functions in Blitz++ - -* Numeric limits:: Introduction -* Numeric functions:: Function descriptions - -@end detailmenu -@end menu - -@node Introduction, Arrays, , Top -@chapter Introduction -@menu -* about:: About this document -* platforms:: Platform/compiler notes -* download:: How to download Blitz++ -* install:: Installation and porting -* compiling:: Compiling with Blitz++ -* legal:: Licensing terms -* help:: Mailing lists and support -@end menu -@include about.texi -@include platforms.texi -@include download.texi -@include install.texi -@include compiling.texi -@include legal.texi -@include help.texi - -@node Arrays, Array Expressions, Introduction, Top -@chapter Arrays -@cindex Array -@findex Array -@menu -* Array intro:: Getting started -* Array types:: Public types -* Array ctors:: Constructors -* Array slicing:: Indexing, subarrays, and slicing -* Array debug:: Debug mode -* Array members:: Member functions -* Array globals:: Global functions -* Array I/O:: Inputting and Outputting Arrays -* Array storage:: Array storage orders -@end menu -@include arrays-intro.texi -@include arrays-types.texi -@include arrays-ctors.texi -@include arrays-slicing.texi -@include arrays-debug.texi -@include arrays-members.texi -@include arrays-globals.texi -@include arrays-io.texi -@include arrays-storage.texi - -@node Array Expressions, Stencils, Arrays, Top -@chapter Array Expressions -@menu -* Expression evaluation:: Expression evaluation order -* Index placeholders:: Index placeholders -* Math functions 1:: Single-argument math functions -* Math functions 2:: Two-argument math functions -* User et:: Declaring your own math functions on arrays -* Where expr:: where statements -@end menu -@include arrays-expr.texi - -@node Stencils, Customized Arrays, Array Expressions, Top -@chapter Stencils -@menu -* Stencil object:: Declaring stencil objects -* Stencil operator:: Stencil operators -* Stencil customize:: Declaring your own stencil operators -* Stencil apply:: Applying a stencil object -@end menu -@include arrays-stencils.texi - -@node Customized Arrays, Indirection, Stencils, Top -@chapter Multicomponent, complex, and user type Arrays -@menu -* Array multi:: Multicomponent and complex arrays -* Array usertype:: Creating arrays of a user type -@end menu -@include arrays-multi.texi -@include arrays-usertype.texi - -@node Indirection, TinyVector, Customized Arrays, Top -@chapter Indirection -@menu -* Indirection position list:: Indirection using lists of array positions -* Indirection Cartesian product:: Cartesian-product indirection -* Indirection strip list:: Indirection with lists of strips -@end menu -@include arrays-indirect.texi - -@node TinyVector, Parallel Computing, Indirection, Top -@chapter TinyVector -@menu -* TinyVec params:: Template parameters and types -* TinyVec ctors:: Constructors -* TinyVec members:: Member functions -* TinyVec assignment:: Assignment operators -* TinyVec exprs:: Expressions -* TinyVec globals:: Global functions -* TinyVec arrays:: Arrays of TinyVector -* TinyVec io:: Input/output -@end menu -@include tinyvector.texi - -@node Parallel Computing, Random Number Generators, TinyVector, Top -@chapter Parallel Computing with Blitz++ -@menu -* Thread safety:: Blitz++ and thread safety -@end menu -@include parallel.texi - -@node Random Number Generators, Numeric properties, Parallel Computing, Top -@chapter Random Number Generators -@menu -* RNG overview:: Overview -* RNG seeding:: Seeding a random number generator -* RNG details:: Detailed description of RNGs -* RNG params:: Template parameters -* RNG members:: Member functions -* RNG listings:: Detailed listing of RNGs -@end menu -@include random.texi - -@node Numeric properties, FAQ, Random Number Generators, Top -@chapter Numeric properties -@menu -* Numeric limits:: Introduction -* Numeric functions:: Function descriptions -@end menu -@include numinquire.texi - -@node FAQ, Keyword Index, Numeric properties, Top -@chapter Frequently Asked Questions -@include faq.texi - -@node Keyword Index, Concept Index, FAQ, Top -@unnumbered Blitz Keyword Index -@printindex fn - -@node Concept Index, , Keyword Index, Top -@unnumbered Concept Index -@printindex cp - -@c --------------------------------------------------------------------- -@c Epilogue -@c --------------------------------------------------------------------- - -@bye diff --git a/doc/blitztiny.jpg b/doc/blitztiny.jpg deleted file mode 100644 index dd726835f5ac79d0714f47ac058e1f6be89951d0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6006 zcmbW&cTf{fp9kR3yGjS8_bN5?A}U?rM{fb7NeR7oL_v@$T>=E@gceF5bfkqUh=Cv= zT{=N}FXwxAb2oE;-Q8zrcV}n6vop`^&g}iq`(?l*Ej3Lw01gfSfb+Kj?(YB^q@*tl zR8@@h4CL6%OxR2ueSBR!J=i3kii)za88`;Wu}O$Xh>FNcu(9d;+q${<1+yu#Ndtx0 z4Bb6F{hVDqoc^vwN$=+YF95`ZghYe{#6(0yBqYS7;vUFh6Eu zV&i(s%f=zZ$;8Ab%`YS>CMhY&$}6iVBd+jNLQ?!+hv1NqkdTv+Gf+@4h_f@Xi~pZ< z{}n(<4A2Iw;Nfrpa4B){C~@xl0jz(;1UUa5z<&k@7Z0C+kcgOsl@e4=-rKDwK*4x)VfEpYc9{GWun#RohoSj=)UHiSh zvAMOqb9i)oa(Z@tae4JG7Y+dLKUjaq|A74u7v*0UE zm5M%*tv5A?coZ?s^VGun9uiIog9BPSpGi_WF3A<{!++8KUG~3&z59R3{tNcst~mfX z9?sv(!=nT!1CBqI`UU)6ZE(aaFlHKmWGQRn_2LoSH7@;*xpEuG^xa489p~iD{A}gHWYwhk70@9msl{ zoy&EN%jBweA?MMj3dr7^?(X;49TVnR+a1B8nGWR*FsYxPQ}ZXhm^Tg%-S&1!pn8ksYub)Y)ekS zsBGiM>~=&+Ko^75h4v7M$oEvsx*!~U>(_A_HCLv!!?xfGi}GAgq|ZA%a`D^n?W{RF z^Rll#@24$PEc;%CK>2Zq0wk1Rw#9UF7ydxsEzMa8mty1f6^O&M28@_W-j;rt;v>m0eo< z3M++bB3$6+X)W&A&g$yQ*#0%rkGItLTR2dtc`Acj`bgF~TtM{9XmtV69+DqXzZyz! z$dc=haEkS4nxL>dmpx8dH*&7M3Uc*Z8_8Y_{+u{NZw(rE4{>%Ks0G27aAIq{z-mh- z@&8EhptlBT$0lFa9b{|MzmOR)2pP=@&JS)CKDSJ94KtM56I-5PX^>;862GO;V?rj2 zr%2%ZqBiT`Xx=Q>i+hs!i}@yJR!Uu#H6PTRHODZj0kwy|J*f@VM2q|x*@Hgs%a4W3 zxJw%#-4qdtL|(15JtmLzQb!H94lag`QzXBv=1Aui91lAmr{4t&tFL#TKTZ+82eeG* zr*2ziSWpXp2l(jGE(JyQN*ng`B#UdD)re5sD5V>qv8M_EMm#|1p*4Kvniz2{hKJMM zjp?|$Kw4LOo#ex(_rhwGrOIOxGl_z23}a&T(&nZtL31iz)w8n-w!P4PT)AUwr&Sx$ z>Im6*)X2CYN6ax}#A=%KZ-+Go+d1a%&@e8c_)1&4SS5Q-%LdQ zgTl*_hf}_bknkn9awh zW=NJb)96s7mzlq2-&3;KAAhLy$AWSYJVsey6QAnY@AQ)sY=?xu%RPYj#594AQfqAe z^IGBjtw4Vl*N@_+)&gw38`5e#@PWMjjxOLFcw(gZuCEu!RV)SHg!_(Er3{9@F?gSu zK5x>6P2s!kQYBMH5 z^ULHwOCw>?nJUZF!!i@h6JH2B%jSr&?smvHJDE=$*c`j#n@{P^_asfRN4P@K#{5tp%8o z9LQCpoS>%|iAPSWiSE-Xo^|g!L+%9R3+5iKx{3#h=v(n~r5(FwXiSWKmCM4LStm-q zm7mcx`U4b{7b45aWJv0fYgY7<51nbXLP-`#Xa+;ANVI_Vd{u})o-als7*ZIlE~3h& z{pNGYh&ksBg62ejJ;L40I+a~ZDGjA$SJTrfECJI&7EANAo=&K3Apx}2pG zdwA9qX4EdHX;t=zEJHK%N&R33x6^4MmrquAru$9K*Ia~!-6Nb>59Rw=4^o1{%i>9x z*G(=bd-w40IN5&xP9zVxQ?glBj#E9&vCvi(8|$GBtr+G?|8=A+@JGS(LCWfUdDDk& z9)~1@7f-yNamP(WcHRZoa_WA+0j_AQK#vakD(LKmtY5}F`xUVAZuzDKo-@$FRcawX zpMMn*8GIr`Dj>;8Qu8>_B=G2x<c@wWFbj(d#D>t0+rR3v=Wu_HA3*6Iw_Rnlbwgj?6?vq!0E?Lf7{$i>8 zeRMi3A#L8+@p3=-e3p0f+&yaX%eIws!dN0L=)Kuo8RRV2QNS3qmKuNM>OWOFG9mpW zVbUs9q3Vs$1&#xOQm1 z@w>1A)%b`d*BmNp&{IcK9?6}+2NYF2G zS5yq~>?I3w8BQ`Pp&CxLpVW15Lc7(rUJ5PvGP?dU_iRmOqdH8XF4Ts^k`eu26bep1 z?5e<=8}`GGD2L!_1K+C*ykNW$Z?IbhJHbC{44FU6>4&JE963RQKct9Giy%_8lzu1* z&irbGF+8GSybo)37y2zf^5kJ9qH|w4&32mI1;{XZdj@%@k6eA7q(|Kx~4|a za|tb6)}El)#vF~7jj^nt!QVLFymB~vpH#d>=p{E3Tzrv61{w}g5ma-hyDg(q59c61Fs=f=hrOW08o%ibI8h*-1$n1tss{3ImoH9OT+FNj z>RjetQmF;DwtU9R0(ligjYcj3BbU2#q|#xo)F6-JH)bZ{O8Z~U@~$sG4-(lj7h=SQwYr>mLrAnVna2l@GJi<&DdeB=BR!GPj^9T?tZ%r1W}Q{Yq!bHc%L=`3k5fRj>c&VV5rmp3HL{ zP(hHC6c@;*?RUVb(Z_&Vl|bWG&Y3Zs{3Pzo!vBaXWcv|-6NPDLN_s9YC|gdid3 z-xq6MDo{1AQLlpew7d5lbEyx=s70H&Q%xm120K&e{urn|9L8V05UFLw!u%x=mW5 zzGpyLkjY16z+Gh8WqwPduHeH_Ekr9Eyj9)Nu@vBC&2X?8(;-|k&(kQJX7!51q|M*z z=Lr#-^SCVNT=n4m|jY(~? zVUq&Mwyt-s>ID@-G#o5`bmz&ev>9b@p={9MNUi(8Tl79J3S{=G7sy}f!fTAI={9sz zu#c;GoCXtI0*7`VpgISm!iYS-m!`pIFAoGs=sTMX2{%gC?l#Gbmyj3yKCUtmAiR2);*!K5Cq$hc7VQ9w0 zI^_F>8}=2&|?iCvkoQN1LL>-DY6x7 zZ3V7S33QB@ISwHhF$?c5={9)w`yS4AVd3 zxx1+92rV!jzhE{A`R0r=`>SCGS*|$i%Sbk6(%Kl*G$$ z)Bk!=vU(mZT27_bwW}EkJF|r<5nF%Xcf#W~Vtea#))p9o`v*XJxg5rEo+u?(rwyS3QSnEZXIEbj| z>@tQv&eLY^jgsgd(dk{Lg(T7@V;yi(;m@Z!!WMHb4#lYrG2-v7!3?TuJ(c7g$(GnN z{^7ww0yXLp_+sCExr%i6w~Wn5TJh1Gd#LaL42{*!_~h{V{dX3yuilYxX68SHikg4^ zN^}6B-uzJ2f& z=l#$tc|D_UAQDUO?^qwZQC9F!$K!UC6u27>v*%)OGpD@@Jurm zA|An_B=*C>eix|VsCO<4ecqS`AhW!IgS6|DQH7+krXF-vY`Uqwodf=A~O|j=h#>xvTSvxg}j| zdrkFv+xYl+ljgCU=i*`CTovjVNJaU`OKfAr;~kI1ekS@9L-6u8a#e9c-LzgkRr1^X z`NNLuIzEzGe^NEZKXhK-#}{JQ)XE8Qry&@0z!{K==K~NCmZ|Hl+6+xe{xglrRb6js zPRWc;-*#g#f7G+ xIh)HTfX4t#8b2=M+_NYQ&y~G{X`MG-FMJg9{QH2rE&g*#j^C2=+l#!P{WoGOys-cP diff --git a/doc/compiling.texi b/doc/compiling.texi deleted file mode 100644 index 3a09e310..00000000 --- a/doc/compiling.texi +++ /dev/null @@ -1,75 +0,0 @@ - -@node compiling, legal, install, Introduction -@section Compiling with Blitz++ - -@subsection Header files -@cindex header files, convention -@cindex @file{blitz} header files - -Blitz++ follows an X-windows style convention for header files. All headers -are referred to with a prefix of @file{blitz}. For example, to use the -@code{Array} class, one needs to include @code{} instead -of just @code{}. To make this work, the main Blitz++ directory -must be in your include path. For example, if Blitz++ was installed in -@file{/software/Blitz++}, you will need to compile with @code{-I -/software/Blitz++}. - -If you have root privileges, you may want to put in a symbolic link from the -standard include path (e.g. @file{/usr/include/blitz/}) to the @code{blitz} -directory of the distribution. This will allow you to omit the @code{-I -...} option when compiling. - -@subsection Linking to the Blitz++ library -@findex libblitz.a -@cindex library (@file{libblitz.a}) - -The Blitz++ library file @file{libblitz.a} contains a few pieces of global -data. You should ensure that the @file{lib} subdirectory of the Blitz++ -distribution is in your library path (e.g. -@code{-L/usr/local/blitz-0.5/lib}) and include @code{-lblitz} on your -command line. If you use math functions, you should also compile with -@code{-lm}. - -@subsection An example Makefile -@cindex makefile, example - -Here is a typical skeletal Makefile for compiling with Blitz++ under gcc: - -@smallexample -@include examples/makefile.example -@end smallexample - -There are more example makefiles in the examples, testsuite, and benchmarks -directories of the distribution. - -@subsection Explicit instantiation -@cindex explicit instantiation -@cindex Array explicit instantiation - -It is not possible to do explicit instantiation of Blitz++ arrays. If you -aren't familiar with explicit instantiation of templates, then this fact -will never bother you. - -The reason is that explicit instantiation results in all members of a class -template being instantiated. This is @strong{not} the case for implicit -instantiation, in which only required members are instantiated. The -@code{Array} class contains members which are not valid for all types -@code{T}: for example, the binary AND operation @code{&=} is nonsensical if -@code{T=float}. If you attempt to explicitly instantiate an array class, -e.g. - -@code{template class Array;} - -then you will be rewarded with many compile errors, due to methods such as -@code{&=} which are nonsensical for @code{float}. - -As some consolation, explicit instantiation would not be much help with -Blitz++ arrays. The typical use for explicit instantiation is to -instantiate all the templates you need in one compilation unit, and turn off -implicit instantiation in the others -- to avoid duplicate instantiations -and reduce compile times. This is only possible if you can predict ahead of -time what needs instantiation. Easy for simple templates, but impossible -for classes like @code{Array}. Almost every line of code you write using -@code{Array} will cause a different set of things to be implicitly -instantiated. - diff --git a/doc/constants.texi b/doc/constants.texi deleted file mode 100644 index e69de29b..00000000 diff --git a/doc/copyright.texi b/doc/copyright.texi deleted file mode 100644 index a25daf48..00000000 --- a/doc/copyright.texi +++ /dev/null @@ -1,14 +0,0 @@ - - -The Blitz++ library is licensed under both the GPL and the more permissive -``Blitz++ Artistic License''. Take your pick. They are detailed in GPL -and LICENSE, respectively. The artistic license is more appropriate for -commercial use, since it lacks the ``viral'' properties of the GPL. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -Copyright @copyright{} 1996--2003 Free Software Foundation, Inc. - diff --git a/doc/download.texi b/doc/download.texi deleted file mode 100644 index 0d7c73ce..00000000 --- a/doc/download.texi +++ /dev/null @@ -1,12 +0,0 @@ - -@node download, install, platforms, Introduction -@section How to download Blitz++ - -The Blitz++ project is now being served via SourceForge. -To download the Blitz++ library, go to the blitz project web page, at -@uref{http://sourceforge.net/projects/blitz}. - -More information about supported platforms and C++ compilers is available -in this document or on the official Blitz++ home page, at -@uref{http://oonumerics.org/blitz}. - diff --git a/doc/doxygen/CMakeLists.txt b/doc/doxygen/CMakeLists.txt deleted file mode 100644 index b08fd7f9..00000000 --- a/doc/doxygen/CMakeLists.txt +++ /dev/null @@ -1,38 +0,0 @@ -# Configure the script and the Doxyfile, then add target - -find_package(Doxygen) - -if (DOXYGEN_FOUND) - if (NOT DOXYGEN_DOT_PATH) - get_filename_component(DOT_PATH ${DOT} PATH) - endif() - - option(DISABLE_REFMAN_PDF "Disable generation of refman.pdf" OFF) - - set(top_srcdir ${CMAKE_SOURCE_DIR}) - set(top_builddir ${CMAKE_BINARY_DIR}) - set(enable_latex_docs YES) - set(enable_html_docs YES) - set(enable_dot YES) - set(PACKAGE_NAME ${CMAKE_PROJECT_NAME}) - set(PACKAGE_VERSION ${blitz_VERSION}) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY) - - add_custom_target(doxygen ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile) - install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html/ DESTINATION ${CMAKE_INSTALL_DOCDIR}/reference) - add_dependencies(blitz-doc doxygen) - - if (NOT DISABLE_REFMAN_PDF) - add_custom_command(OUTPUT latex/refman.pdf - COMMAND make - WORKING_DIRECTORY latex - DEPENDS doxygen) - - add_custom_target(doxygen-latex DEPENDS latex/refman.pdf) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.pdf DESTINATION ${CMAKE_INSTALL_DOCDIR}) - add_dependencies(blitz-doc doxygen-latex) - endif() - - set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "html;latex;doxygen-warning") -endif() diff --git a/doc/doxygen/Doxyfile.in b/doc/doxygen/Doxyfile.in deleted file mode 100644 index 8584609b..00000000 --- a/doc/doxygen/Doxyfile.in +++ /dev/null @@ -1,2513 +0,0 @@ -# Doxyfile 1.8.15 - -# This file describes the settings to be used by the documentation system -# doxygen (www.doxygen.org) for a project. -# -# All text after a double hash (##) is considered a comment and is placed in -# front of the TAG it is preceding. -# -# All text after a single hash (#) is considered a comment and will be ignored. -# The format is: -# TAG = value [value, ...] -# For lists, items can also be appended using: -# TAG += value [value, ...] -# Values that contain spaces should be placed between quotes (\" \"). - -#--------------------------------------------------------------------------- -# Project related configuration options -#--------------------------------------------------------------------------- - -# This tag specifies the encoding used for all characters in the configuration -# file that follow. The default is UTF-8 which is also the encoding used for all -# text before the first occurrence of this tag. Doxygen uses libiconv (or the -# iconv built into libc) for the transcoding. See -# https://www.gnu.org/software/libiconv/ for the list of possible encodings. -# The default value is: UTF-8. - -DOXYFILE_ENCODING = UTF-8 - -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by -# double-quotes, unless you are using Doxywizard) that should identify the -# project for which the documentation is generated. This name is used in the -# title of most generated pages and in a few other places. -# The default value is: My Project. - -PROJECT_NAME = @PACKAGE_NAME@ - -# The PROJECT_NUMBER tag can be used to enter a project or revision number. This -# could be handy for archiving the generated documentation or if some version -# control system is used. - -PROJECT_NUMBER = "Version @PACKAGE_VERSION@" - -# Using the PROJECT_BRIEF tag one can provide an optional one line description -# for a project that appears at the top of each page and should give viewer a -# quick idea about the purpose of the project. Keep the description short. - -PROJECT_BRIEF = - -# With the PROJECT_LOGO tag one can specify a logo or an icon that is included -# in the documentation. The maximum height of the logo should not exceed 55 -# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy -# the logo to the output directory. - -PROJECT_LOGO = - -# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path -# into which the generated documentation will be written. If a relative path is -# entered, it will be relative to the location where doxygen was started. If -# left blank the current directory will be used. - -OUTPUT_DIRECTORY = - -# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- -# directories (in 2 levels) under the output directory of each output format and -# will distribute the generated files over these directories. Enabling this -# option can be useful when feeding doxygen a huge amount of source files, where -# putting all generated files in the same directory would otherwise causes -# performance problems for the file system. -# The default value is: NO. - -CREATE_SUBDIRS = NO - -# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII -# characters to appear in the names of generated files. If set to NO, non-ASCII -# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode -# U+3044. -# The default value is: NO. - -ALLOW_UNICODE_NAMES = NO - -# The OUTPUT_LANGUAGE tag is used to specify the language in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all constant output in the proper language. -# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, -# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), -# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, -# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), -# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, -# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, -# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, -# Ukrainian and Vietnamese. -# The default value is: English. - -OUTPUT_LANGUAGE = English - -# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all generated output in the proper direction. -# Possible values are: None, LTR, RTL and Context. -# The default value is: None. - -OUTPUT_TEXT_DIRECTION = None - -# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member -# descriptions after the members that are listed in the file and class -# documentation (similar to Javadoc). Set to NO to disable this. -# The default value is: YES. - -BRIEF_MEMBER_DESC = YES - -# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief -# description of a member or function before the detailed description -# -# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the -# brief descriptions will be completely suppressed. -# The default value is: YES. - -REPEAT_BRIEF = YES - -# This tag implements a quasi-intelligent brief description abbreviator that is -# used to form the text in various listings. Each string in this list, if found -# as the leading text of the brief description, will be stripped from the text -# and the result, after processing the whole list, is used as the annotated -# text. Otherwise, the brief description is used as-is. If left blank, the -# following values are used ($name is automatically replaced with the name of -# the entity):The $name class, The $name widget, The $name file, is, provides, -# specifies, contains, represents, a, an and the. - -ABBREVIATE_BRIEF = YES - -# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then -# doxygen will generate a detailed section even if there is only a brief -# description. -# The default value is: NO. - -ALWAYS_DETAILED_SEC = NO - -# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all -# inherited members of a class in the documentation of that class as if those -# members were ordinary class members. Constructors, destructors and assignment -# operators of the base classes will not be shown. -# The default value is: NO. - -INLINE_INHERITED_MEMB = NO - -# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path -# before files name in the file list and in the header files. If set to NO the -# shortest path that makes the file name unique will be used -# The default value is: YES. - -FULL_PATH_NAMES = YES - -# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. -# Stripping is only done if one of the specified strings matches the left-hand -# part of the path. The tag can be used to show relative paths in the file list. -# If left blank the directory from which doxygen is run is used as the path to -# strip. -# -# Note that you can specify absolute paths here, but also relative paths, which -# will be relative from the directory where doxygen is started. -# This tag requires that the tag FULL_PATH_NAMES is set to YES. - -STRIP_FROM_PATH = @top_srcdir@/ \ - @top_builddir@/ - -# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the -# path mentioned in the documentation of a class, which tells the reader which -# header file to include in order to use a class. If left blank only the name of -# the header file containing the class definition is used. Otherwise one should -# specify the list of include paths that are normally passed to the compiler -# using the -I flag. - -STRIP_FROM_INC_PATH = - -# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but -# less readable) file names. This can be useful is your file systems doesn't -# support long names like on DOS, Mac, or CD-ROM. -# The default value is: NO. - -SHORT_NAMES = NO - -# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the -# first line (until the first dot) of a Javadoc-style comment as the brief -# description. If set to NO, the Javadoc-style will behave just like regular Qt- -# style comments (thus requiring an explicit @brief command for a brief -# description.) -# The default value is: NO. - -JAVADOC_AUTOBRIEF = YES - -# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first -# line (until the first dot) of a Qt-style comment as the brief description. If -# set to NO, the Qt-style will behave just like regular Qt-style comments (thus -# requiring an explicit \brief command for a brief description.) -# The default value is: NO. - -QT_AUTOBRIEF = NO - -# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a -# multi-line C++ special comment block (i.e. a block of //! or /// comments) as -# a brief description. This used to be the default behavior. The new default is -# to treat a multi-line C++ comment block as a detailed description. Set this -# tag to YES if you prefer the old behavior instead. -# -# Note that setting this tag to YES also means that rational rose comments are -# not recognized any more. -# The default value is: NO. - -MULTILINE_CPP_IS_BRIEF = NO - -# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the -# documentation from any documented member that it re-implements. -# The default value is: YES. - -INHERIT_DOCS = YES - -# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new -# page for each member. If set to NO, the documentation of a member will be part -# of the file/class/namespace that contains it. -# The default value is: NO. - -SEPARATE_MEMBER_PAGES = NO - -# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen -# uses this value to replace tabs by spaces in code fragments. -# Minimum value: 1, maximum value: 16, default value: 4. - -TAB_SIZE = 2 - -# This tag can be used to specify a number of aliases that act as commands in -# the documentation. An alias has the form: -# name=value -# For example adding -# "sideeffect=@par Side Effects:\n" -# will allow you to put the command \sideeffect (or @sideeffect) in the -# documentation, which will result in a user-defined paragraph with heading -# "Side Effects:". You can put \n's in the value part of an alias to insert -# newlines (in the resulting output). You can put ^^ in the value part of an -# alias to insert a newline as if a physical newline was in the original file. -# When you need a literal { or } or , in the value part of an alias you have to -# escape them by means of a backslash (\), this can lead to conflicts with the -# commands \{ and \} for these it is advised to use the version @{ and @} or use -# a double escape (\\{ and \\}) - -ALIASES = "docme=\todo\nDoc me!" - -# This tag can be used to specify a number of word-keyword mappings (TCL only). -# A mapping has the form "name=value". For example adding "class=itcl::class" -# will allow you to use the command class in the itcl::class meaning. - -TCL_SUBST = - -# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources -# only. Doxygen will then generate output that is more tailored for C. For -# instance, some of the names that are used will be different. The list of all -# members will be omitted, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_FOR_C = NO - -# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or -# Python sources only. Doxygen will then generate output that is more tailored -# for that language. For instance, namespaces will be presented as packages, -# qualified scopes will look different, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_JAVA = NO - -# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran -# sources. Doxygen will then generate output that is tailored for Fortran. -# The default value is: NO. - -OPTIMIZE_FOR_FORTRAN = NO - -# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL -# sources. Doxygen will then generate output that is tailored for VHDL. -# The default value is: NO. - -OPTIMIZE_OUTPUT_VHDL = NO - -# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice -# sources only. Doxygen will then generate output that is more tailored for that -# language. For instance, namespaces will be presented as modules, types will be -# separated into more groups, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_SLICE = NO - -# Doxygen selects the parser to use depending on the extension of the files it -# parses. With this tag you can assign which parser to use for a given -# extension. Doxygen has a built-in mapping, but you can override or extend it -# using this tag. The format is ext=language, where ext is a file extension, and -# language is one of the parsers supported by doxygen: IDL, Java, Javascript, -# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, -# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: -# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser -# tries to guess whether the code is fixed or free formatted code, this is the -# default for Fortran type files), VHDL, tcl. For instance to make doxygen treat -# .inc files as Fortran files (default is PHP), and .f files as C (default is -# Fortran), use: inc=Fortran f=C. -# -# Note: For files without extension you can use no_extension as a placeholder. -# -# Note that for custom extensions you also need to set FILE_PATTERNS otherwise -# the files are not read by doxygen. - -EXTENSION_MAPPING = - -# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments -# according to the Markdown format, which allows for more readable -# documentation. See https://daringfireball.net/projects/markdown/ for details. -# The output of markdown processing is further processed by doxygen, so you can -# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in -# case of backward compatibilities issues. -# The default value is: YES. - -MARKDOWN_SUPPORT = YES - -# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up -# to that level are automatically included in the table of contents, even if -# they do not have an id attribute. -# Note: This feature currently applies only to Markdown headings. -# Minimum value: 0, maximum value: 99, default value: 0. -# This tag requires that the tag MARKDOWN_SUPPORT is set to YES. - -TOC_INCLUDE_HEADINGS = 0 - -# When enabled doxygen tries to link words that correspond to documented -# classes, or namespaces to their corresponding documentation. Such a link can -# be prevented in individual cases by putting a % sign in front of the word or -# globally by setting AUTOLINK_SUPPORT to NO. -# The default value is: YES. - -AUTOLINK_SUPPORT = YES - -# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want -# to include (a tag file for) the STL sources as input, then you should set this -# tag to YES in order to let doxygen match functions declarations and -# definitions whose arguments contain STL classes (e.g. func(std::string); -# versus func(std::string) {}). This also make the inheritance and collaboration -# diagrams that involve STL classes more complete and accurate. -# The default value is: NO. - -BUILTIN_STL_SUPPORT = NO - -# If you use Microsoft's C++/CLI language, you should set this option to YES to -# enable parsing support. -# The default value is: NO. - -CPP_CLI_SUPPORT = NO - -# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: -# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen -# will parse them like normal C++ but will assume all classes use public instead -# of private inheritance when no explicit protection keyword is present. -# The default value is: NO. - -SIP_SUPPORT = NO - -# For Microsoft's IDL there are propget and propput attributes to indicate -# getter and setter methods for a property. Setting this option to YES will make -# doxygen to replace the get and set methods by a property in the documentation. -# This will only work if the methods are indeed getting or setting a simple -# type. If this is not the case, or you want to show the methods anyway, you -# should set this option to NO. -# The default value is: YES. - -IDL_PROPERTY_SUPPORT = YES - -# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC -# tag is set to YES then doxygen will reuse the documentation of the first -# member in the group (if any) for the other members of the group. By default -# all members of a group must be documented explicitly. -# The default value is: NO. - -DISTRIBUTE_GROUP_DOC = YES - -# If one adds a struct or class to a group and this option is enabled, then also -# any nested class or struct is added to the same group. By default this option -# is disabled and one has to add nested compounds explicitly via \ingroup. -# The default value is: NO. - -GROUP_NESTED_COMPOUNDS = NO - -# Set the SUBGROUPING tag to YES to allow class member groups of the same type -# (for instance a group of public functions) to be put as a subgroup of that -# type (e.g. under the Public Functions section). Set it to NO to prevent -# subgrouping. Alternatively, this can be done per class using the -# \nosubgrouping command. -# The default value is: YES. - -SUBGROUPING = YES - -# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions -# are shown inside the group in which they are included (e.g. using \ingroup) -# instead of on a separate page (for HTML and Man pages) or section (for LaTeX -# and RTF). -# -# Note that this feature does not work in combination with -# SEPARATE_MEMBER_PAGES. -# The default value is: NO. - -INLINE_GROUPED_CLASSES = NO - -# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions -# with only public data fields or simple typedef fields will be shown inline in -# the documentation of the scope in which they are defined (i.e. file, -# namespace, or group documentation), provided this scope is documented. If set -# to NO, structs, classes, and unions are shown on a separate page (for HTML and -# Man pages) or section (for LaTeX and RTF). -# The default value is: NO. - -INLINE_SIMPLE_STRUCTS = NO - -# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or -# enum is documented as struct, union, or enum with the name of the typedef. So -# typedef struct TypeS {} TypeT, will appear in the documentation as a struct -# with name TypeT. When disabled the typedef will appear as a member of a file, -# namespace, or class. And the struct will be named TypeS. This can typically be -# useful for C code in case the coding convention dictates that all compound -# types are typedef'ed and only the typedef is referenced, never the tag name. -# The default value is: NO. - -TYPEDEF_HIDES_STRUCT = NO - -# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This -# cache is used to resolve symbols given their name and scope. Since this can be -# an expensive process and often the same symbol appears multiple times in the -# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small -# doxygen will become slower. If the cache is too large, memory is wasted. The -# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range -# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 -# symbols. At the end of a run doxygen will report the cache usage and suggest -# the optimal cache size from a speed point of view. -# Minimum value: 0, maximum value: 9, default value: 0. - -LOOKUP_CACHE_SIZE = 0 - -#--------------------------------------------------------------------------- -# Build related configuration options -#--------------------------------------------------------------------------- - -# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in -# documentation are documented, even if no documentation was available. Private -# class members and static file members will be hidden unless the -# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. -# Note: This will also disable the warnings about undocumented members that are -# normally produced when WARNINGS is set to YES. -# The default value is: NO. - -EXTRACT_ALL = YES - -# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will -# be included in the documentation. -# The default value is: NO. - -EXTRACT_PRIVATE = YES - -# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal -# scope will be included in the documentation. -# The default value is: NO. - -EXTRACT_PACKAGE = NO - -# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be -# included in the documentation. -# The default value is: NO. - -EXTRACT_STATIC = YES - -# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined -# locally in source files will be included in the documentation. If set to NO, -# only classes defined in header files are included. Does not have any effect -# for Java sources. -# The default value is: YES. - -EXTRACT_LOCAL_CLASSES = YES - -# This flag is only useful for Objective-C code. If set to YES, local methods, -# which are defined in the implementation section but not in the interface are -# included in the documentation. If set to NO, only methods in the interface are -# included. -# The default value is: NO. - -EXTRACT_LOCAL_METHODS = NO - -# If this flag is set to YES, the members of anonymous namespaces will be -# extracted and appear in the documentation as a namespace called -# 'anonymous_namespace{file}', where file will be replaced with the base name of -# the file that contains the anonymous namespace. By default anonymous namespace -# are hidden. -# The default value is: NO. - -EXTRACT_ANON_NSPACES = NO - -# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all -# undocumented members inside documented classes or files. If set to NO these -# members will be included in the various overviews, but no documentation -# section is generated. This option has no effect if EXTRACT_ALL is enabled. -# The default value is: NO. - -HIDE_UNDOC_MEMBERS = NO - -# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all -# undocumented classes that are normally visible in the class hierarchy. If set -# to NO, these classes will be included in the various overviews. This option -# has no effect if EXTRACT_ALL is enabled. -# The default value is: NO. - -HIDE_UNDOC_CLASSES = NO - -# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend -# (class|struct|union) declarations. If set to NO, these declarations will be -# included in the documentation. -# The default value is: NO. - -HIDE_FRIEND_COMPOUNDS = NO - -# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any -# documentation blocks found inside the body of a function. If set to NO, these -# blocks will be appended to the function's detailed documentation block. -# The default value is: NO. - -HIDE_IN_BODY_DOCS = NO - -# The INTERNAL_DOCS tag determines if documentation that is typed after a -# \internal command is included. If the tag is set to NO then the documentation -# will be excluded. Set it to YES to include the internal documentation. -# The default value is: NO. - -INTERNAL_DOCS = YES - -# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file -# names in lower-case letters. If set to YES, upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# and Mac users are advised to set this option to NO. -# The default value is: system dependent. - -CASE_SENSE_NAMES = YES - -# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with -# their full class and namespace scopes in the documentation. If set to YES, the -# scope will be hidden. -# The default value is: NO. - -HIDE_SCOPE_NAMES = NO - -# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will -# append additional text to a page's title, such as Class Reference. If set to -# YES the compound reference will be hidden. -# The default value is: NO. - -HIDE_COMPOUND_REFERENCE= NO - -# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of -# the files that are included by a file in the documentation of that file. -# The default value is: YES. - -SHOW_INCLUDE_FILES = YES - -# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each -# grouped member an include statement to the documentation, telling the reader -# which file to include in order to use the member. -# The default value is: NO. - -SHOW_GROUPED_MEMB_INC = NO - -# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include -# files with double quotes in the documentation rather than with sharp brackets. -# The default value is: NO. - -FORCE_LOCAL_INCLUDES = NO - -# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the -# documentation for inline members. -# The default value is: YES. - -INLINE_INFO = YES - -# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the -# (detailed) documentation of file and class members alphabetically by member -# name. If set to NO, the members will appear in declaration order. -# The default value is: YES. - -SORT_MEMBER_DOCS = YES - -# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief -# descriptions of file, namespace and class members alphabetically by member -# name. If set to NO, the members will appear in declaration order. Note that -# this will also influence the order of the classes in the class list. -# The default value is: NO. - -SORT_BRIEF_DOCS = NO - -# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the -# (brief and detailed) documentation of class members so that constructors and -# destructors are listed first. If set to NO the constructors will appear in the -# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. -# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief -# member documentation. -# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting -# detailed member documentation. -# The default value is: NO. - -SORT_MEMBERS_CTORS_1ST = NO - -# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy -# of group names into alphabetical order. If set to NO the group names will -# appear in their defined order. -# The default value is: NO. - -SORT_GROUP_NAMES = NO - -# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by -# fully-qualified names, including namespaces. If set to NO, the class list will -# be sorted only by class name, not including the namespace part. -# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. -# Note: This option applies only to the class list, not to the alphabetical -# list. -# The default value is: NO. - -SORT_BY_SCOPE_NAME = NO - -# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper -# type resolution of all parameters of a function it will reject a match between -# the prototype and the implementation of a member function even if there is -# only one candidate or it is obvious which candidate to choose by doing a -# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still -# accept a match between prototype and implementation in such cases. -# The default value is: NO. - -STRICT_PROTO_MATCHING = NO - -# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo -# list. This list is created by putting \todo commands in the documentation. -# The default value is: YES. - -GENERATE_TODOLIST = YES - -# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test -# list. This list is created by putting \test commands in the documentation. -# The default value is: YES. - -GENERATE_TESTLIST = YES - -# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug -# list. This list is created by putting \bug commands in the documentation. -# The default value is: YES. - -GENERATE_BUGLIST = YES - -# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) -# the deprecated list. This list is created by putting \deprecated commands in -# the documentation. -# The default value is: YES. - -GENERATE_DEPRECATEDLIST= YES - -# The ENABLED_SECTIONS tag can be used to enable conditional documentation -# sections, marked by \if ... \endif and \cond -# ... \endcond blocks. - -ENABLED_SECTIONS = - -# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the -# initial value of a variable or macro / define can have for it to appear in the -# documentation. If the initializer consists of more lines than specified here -# it will be hidden. Use a value of 0 to hide initializers completely. The -# appearance of the value of individual variables and macros / defines can be -# controlled using \showinitializer or \hideinitializer command in the -# documentation regardless of this setting. -# Minimum value: 0, maximum value: 10000, default value: 30. - -MAX_INITIALIZER_LINES = 30 - -# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at -# the bottom of the documentation of classes and structs. If set to YES, the -# list will mention the files that were used to generate the documentation. -# The default value is: YES. - -SHOW_USED_FILES = YES - -# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This -# will remove the Files entry from the Quick Index and from the Folder Tree View -# (if specified). -# The default value is: YES. - -SHOW_FILES = YES - -# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces -# page. This will remove the Namespaces entry from the Quick Index and from the -# Folder Tree View (if specified). -# The default value is: YES. - -SHOW_NAMESPACES = YES - -# The FILE_VERSION_FILTER tag can be used to specify a program or script that -# doxygen should invoke to get the current version for each file (typically from -# the version control system). Doxygen will invoke the program by executing (via -# popen()) the command command input-file, where command is the value of the -# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided -# by doxygen. Whatever the program writes to standard output is used as the file -# version. For an example see the documentation. - -FILE_VERSION_FILTER = - -# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed -# by doxygen. The layout file controls the global structure of the generated -# output files in an output format independent way. To create the layout file -# that represents doxygen's defaults, run doxygen with the -l option. You can -# optionally specify a file name after the option, if omitted DoxygenLayout.xml -# will be used as the name of the layout file. -# -# Note that if you run doxygen from a directory containing a file called -# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE -# tag is left empty. - -LAYOUT_FILE = - -# The CITE_BIB_FILES tag can be used to specify one or more bib files containing -# the reference definitions. This must be a list of .bib files. The .bib -# extension is automatically appended if omitted. This requires the bibtex tool -# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. -# For LaTeX the style of the bibliography can be controlled using -# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the -# search path. See also \cite for info how to create references. - -CITE_BIB_FILES = - -#--------------------------------------------------------------------------- -# Configuration options related to warning and progress messages -#--------------------------------------------------------------------------- - -# The QUIET tag can be used to turn on/off the messages that are generated to -# standard output by doxygen. If QUIET is set to YES this implies that the -# messages are off. -# The default value is: NO. - -QUIET = NO - -# The WARNINGS tag can be used to turn on/off the warning messages that are -# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES -# this implies that the warnings are on. -# -# Tip: Turn warnings on while writing the documentation. -# The default value is: YES. - -WARNINGS = YES - -# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate -# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag -# will automatically be disabled. -# The default value is: YES. - -WARN_IF_UNDOCUMENTED = YES - -# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some parameters -# in a documented function, or documenting parameters that don't exist or using -# markup commands wrongly. -# The default value is: YES. - -WARN_IF_DOC_ERROR = YES - -# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that -# are documented, but have no documentation for their parameters or return -# value. If set to NO, doxygen will only warn about wrong or incomplete -# parameter documentation, but not about the absence of documentation. If -# EXTRACT_ALL is set to YES then this flag will automatically be disabled. -# The default value is: NO. - -WARN_NO_PARAMDOC = NO - -# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when -# a warning is encountered. -# The default value is: NO. - -WARN_AS_ERROR = NO - -# The WARN_FORMAT tag determines the format of the warning messages that doxygen -# can produce. The string should contain the $file, $line, and $text tags, which -# will be replaced by the file and line number from which the warning originated -# and the warning text. Optionally the format may contain $version, which will -# be replaced by the version of the file (if it could be obtained via -# FILE_VERSION_FILTER) -# The default value is: $file:$line: $text. - -WARN_FORMAT = "$file:$line: $text" - -# The WARN_LOGFILE tag can be used to specify a file to which warning and error -# messages should be written. If left blank the output is written to standard -# error (stderr). - -WARN_LOGFILE = doxygen-warning - -#--------------------------------------------------------------------------- -# Configuration options related to the input files -#--------------------------------------------------------------------------- - -# The INPUT tag is used to specify the files and/or directories that contain -# documented source files. You may enter file names like myfile.cpp or -# directories like /usr/src/myproject. Separate the files or directories with -# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING -# Note: If this tag is empty the current directory is searched. - -INPUT = @top_srcdir@/blitz/ \ - @top_srcdir@/random/ - -# This tag can be used to specify the character encoding of the source files -# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses -# libiconv (or the iconv built into libc) for the transcoding. See the libiconv -# documentation (see: https://www.gnu.org/software/libiconv/) for the list of -# possible encodings. -# The default value is: UTF-8. - -INPUT_ENCODING = UTF-8 - -# If the value of the INPUT tag contains directories, you can use the -# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and -# *.h) to filter out the source-files in the directories. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# read by doxygen. -# -# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, -# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, -# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, -# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, -# *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, *.qsf and *.ice. - -FILE_PATTERNS = *.h - -# The RECURSIVE tag can be used to specify whether or not subdirectories should -# be searched for input files as well. -# The default value is: NO. - -RECURSIVE = YES - -# The EXCLUDE tag can be used to specify files and/or directories that should be -# excluded from the INPUT source files. This way you can easily exclude a -# subdirectory from a directory tree whose root is specified with the INPUT tag. -# -# Note that relative paths are relative to the directory from which doxygen is -# run. - -EXCLUDE = @top_srcdir@/blitz/generate \ - @top_srcdir@/blitz/array-old.h \ - @top_srcdir@/blitz/bench.h \ - @top_srcdir@/blitz/benchext.h \ - @top_srcdir@/blitz/bzdebug.h \ - @top_srcdir@/blitz/tau.h \ - @top_srcdir@/blitz/timer.h \ - @top_srcdir@/blitz/limits-hack.h \ - @top_srcdir@/blitz/promote.h \ - @top_srcdir@/blitz/promote-old.h \ - @top_srcdir@/blitz/array \ - @top_srcdir@/blitz/meta \ - @top_srcdir@/blitz/applics.h \ - @top_srcdir@/blitz/funcs.h \ - @top_srcdir@/blitz/mathfunc.h \ - @top_srcdir@/blitz/mathf2.h \ - @top_srcdir@/blitz/numinquire.h \ - @top_srcdir@/blitz/update.h \ - @top_srcdir@/blitz/tiny.h \ - @top_srcdir@/blitz/tinymat.h \ - @top_srcdir@/blitz/tinymatexpr.h \ - @top_srcdir@/blitz/matbops.h \ - @top_srcdir@/blitz/matexpr.h \ - @top_srcdir@/blitz/matuops.h \ - @top_srcdir@/blitz/matref.h \ - @top_srcdir@/blitz/randref.h \ - @top_srcdir@/blitz/vecexpr.h \ - @top_srcdir@/blitz/vecexprwrap.h \ - @top_srcdir@/blitz/vecwhere.h - -# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or -# directories that are symbolic links (a Unix file system feature) are excluded -# from the input. -# The default value is: NO. - -EXCLUDE_SYMLINKS = NO - -# If the value of the INPUT tag contains directories, you can use the -# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -# certain files from those directories. -# -# Note that the wildcards are matched against the file with absolute path, so to -# exclude all test directories for example use the pattern */test/* - -EXCLUDE_PATTERNS = CVS \ - *.in - -# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names -# (namespaces, classes, functions, etc.) that should be excluded from the -# output. The symbol name can be a fully qualified name, a word, or if the -# wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test -# -# Note that the wildcards are matched against the file with absolute path, so to -# exclude all test directories use the pattern */test/* - -EXCLUDE_SYMBOLS = - -# The EXAMPLE_PATH tag can be used to specify one or more files or directories -# that contain example code fragments that are included (see the \include -# command). - -EXAMPLE_PATH = @top_srcdir@/examples - -# If the value of the EXAMPLE_PATH tag contains directories, you can use the -# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and -# *.h) to filter out the source-files in the directories. If left blank all -# files are included. - -EXAMPLE_PATTERNS = *.cpp - -# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be -# searched for input files to be used with the \include or \dontinclude commands -# irrespective of the value of the RECURSIVE tag. -# The default value is: NO. - -EXAMPLE_RECURSIVE = NO - -# The IMAGE_PATH tag can be used to specify one or more files or directories -# that contain images that are to be included in the documentation (see the -# \image command). - -IMAGE_PATH = - -# The INPUT_FILTER tag can be used to specify a program that doxygen should -# invoke to filter for each input file. Doxygen will invoke the filter program -# by executing (via popen()) the command: -# -# -# -# where is the value of the INPUT_FILTER tag, and is the -# name of an input file. Doxygen will then use the output that the filter -# program writes to standard output. If FILTER_PATTERNS is specified, this tag -# will be ignored. -# -# Note that the filter must not add or remove lines; it is applied before the -# code is scanned, but not when the output code is generated. If lines are added -# or removed, the anchors will not be placed correctly. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# properly processed by doxygen. - -INPUT_FILTER = - -# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern -# basis. Doxygen will compare the file name with each pattern and apply the -# filter if there is a match. The filters are a list of the form: pattern=filter -# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how -# filters are used. If the FILTER_PATTERNS tag is empty or if none of the -# patterns match the file name, INPUT_FILTER is applied. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# properly processed by doxygen. - -FILTER_PATTERNS = - -# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using -# INPUT_FILTER) will also be used to filter the input files that are used for -# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). -# The default value is: NO. - -FILTER_SOURCE_FILES = NO - -# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file -# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and -# it is also possible to disable source filtering for a specific pattern using -# *.ext= (so without naming a filter). -# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. - -FILTER_SOURCE_PATTERNS = - -# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that -# is part of the input, its contents will be placed on the main page -# (index.html). This can be useful if you have a project on for instance GitHub -# and want to reuse the introduction page also for the doxygen output. - -USE_MDFILE_AS_MAINPAGE = - -#--------------------------------------------------------------------------- -# Configuration options related to source browsing -#--------------------------------------------------------------------------- - -# If the SOURCE_BROWSER tag is set to YES then a list of source files will be -# generated. Documented entities will be cross-referenced with these sources. -# -# Note: To get rid of all source code in the generated output, make sure that -# also VERBATIM_HEADERS is set to NO. -# The default value is: NO. - -SOURCE_BROWSER = NO - -# Setting the INLINE_SOURCES tag to YES will include the body of functions, -# classes and enums directly into the documentation. -# The default value is: NO. - -INLINE_SOURCES = NO - -# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any -# special comment blocks from generated source code fragments. Normal C, C++ and -# Fortran comments will always remain visible. -# The default value is: YES. - -STRIP_CODE_COMMENTS = YES - -# If the REFERENCED_BY_RELATION tag is set to YES then for each documented -# entity all documented functions referencing it will be listed. -# The default value is: NO. - -REFERENCED_BY_RELATION = YES - -# If the REFERENCES_RELATION tag is set to YES then for each documented function -# all documented entities called/used by that function will be listed. -# The default value is: NO. - -REFERENCES_RELATION = YES - -# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set -# to YES then the hyperlinks from functions in REFERENCES_RELATION and -# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will -# link to the documentation. -# The default value is: YES. - -REFERENCES_LINK_SOURCE = YES - -# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the -# source code will show a tooltip with additional information such as prototype, -# brief description and links to the definition and documentation. Since this -# will make the HTML file larger and loading of large files a bit slower, you -# can opt to disable this feature. -# The default value is: YES. -# This tag requires that the tag SOURCE_BROWSER is set to YES. - -SOURCE_TOOLTIPS = YES - -# If the USE_HTAGS tag is set to YES then the references to source code will -# point to the HTML generated by the htags(1) tool instead of doxygen built-in -# source browser. The htags tool is part of GNU's global source tagging system -# (see https://www.gnu.org/software/global/global.html). You will need version -# 4.8.6 or higher. -# -# To use it do the following: -# - Install the latest version of global -# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file -# - Make sure the INPUT points to the root of the source tree -# - Run doxygen as normal -# -# Doxygen will invoke htags (and that will in turn invoke gtags), so these -# tools must be available from the command line (i.e. in the search path). -# -# The result: instead of the source browser generated by doxygen, the links to -# source code will now point to the output of htags. -# The default value is: NO. -# This tag requires that the tag SOURCE_BROWSER is set to YES. - -USE_HTAGS = NO - -# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a -# verbatim copy of the header file for each class for which an include is -# specified. Set to NO to disable this. -# See also: Section \class. -# The default value is: YES. - -VERBATIM_HEADERS = YES - -#--------------------------------------------------------------------------- -# Configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- - -# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all -# compounds will be generated. Enable this if the project contains a lot of -# classes, structs, unions or interfaces. -# The default value is: YES. - -ALPHABETICAL_INDEX = YES - -# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in -# which the alphabetical index list will be split. -# Minimum value: 1, maximum value: 20, default value: 5. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -COLS_IN_ALPHA_INDEX = 2 - -# In case all classes in a project start with a common prefix, all classes will -# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag -# can be used to specify a prefix (or a list of prefixes) that should be ignored -# while generating the index headers. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -IGNORE_PREFIX = - -#--------------------------------------------------------------------------- -# Configuration options related to the HTML output -#--------------------------------------------------------------------------- - -# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output -# The default value is: YES. - -GENERATE_HTML = @enable_html_docs@ - -# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a -# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of -# it. -# The default directory is: html. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_OUTPUT = html - -# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each -# generated HTML page (for example: .htm, .php, .asp). -# The default value is: .html. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FILE_EXTENSION = .html - -# The HTML_HEADER tag can be used to specify a user-defined HTML header file for -# each generated HTML page. If the tag is left blank doxygen will generate a -# standard header. -# -# To get valid HTML the header file that includes any scripts and style sheets -# that doxygen needs, which is dependent on the configuration options used (e.g. -# the setting GENERATE_TREEVIEW). It is highly recommended to start with a -# default header using -# doxygen -w html new_header.html new_footer.html new_stylesheet.css -# YourConfigFile -# and then modify the file new_header.html. See also section "Doxygen usage" -# for information on how to generate the default header that doxygen normally -# uses. -# Note: The header is subject to change so you typically have to regenerate the -# default header when upgrading to a newer version of doxygen. For a description -# of the possible markers and block names see the documentation. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_HEADER = - -# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each -# generated HTML page. If the tag is left blank doxygen will generate a standard -# footer. See HTML_HEADER for more information on how to generate a default -# footer and what special commands can be used inside the footer. See also -# section "Doxygen usage" for information on how to generate the default footer -# that doxygen normally uses. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FOOTER = - -# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style -# sheet that is used by each HTML page. It can be used to fine-tune the look of -# the HTML output. If left blank doxygen will generate a default style sheet. -# See also section "Doxygen usage" for information on how to generate the style -# sheet that doxygen normally uses. -# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as -# it is more robust and this tag (HTML_STYLESHEET) will in the future become -# obsolete. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_STYLESHEET = - -# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined -# cascading style sheets that are included after the standard style sheets -# created by doxygen. Using this option one can overrule certain style aspects. -# This is preferred over using HTML_STYLESHEET since it does not replace the -# standard style sheet and is therefore more robust against future updates. -# Doxygen will copy the style sheet files to the output directory. -# Note: The order of the extra style sheet files is of importance (e.g. the last -# style sheet in the list overrules the setting of the previous ones in the -# list). For an example see the documentation. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_EXTRA_STYLESHEET = - -# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or -# other source files which should be copied to the HTML output directory. Note -# that these files will be copied to the base HTML output directory. Use the -# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these -# files. In the HTML_STYLESHEET file, use the file name only. Also note that the -# files will be copied as-is; there are no commands or markers available. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_EXTRA_FILES = - -# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen -# will adjust the colors in the style sheet and background images according to -# this color. Hue is specified as an angle on a colorwheel, see -# https://en.wikipedia.org/wiki/Hue for more information. For instance the value -# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 -# purple, and 360 is red again. -# Minimum value: 0, maximum value: 359, default value: 220. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_HUE = 220 - -# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors -# in the HTML output. For a value of 0 the output will use grayscales only. A -# value of 255 will produce the most vivid colors. -# Minimum value: 0, maximum value: 255, default value: 100. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_SAT = 100 - -# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the -# luminance component of the colors in the HTML output. Values below 100 -# gradually make the output lighter, whereas values above 100 make the output -# darker. The value divided by 100 is the actual gamma applied, so 80 represents -# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not -# change the gamma. -# Minimum value: 40, maximum value: 240, default value: 80. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_GAMMA = 80 - -# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML -# page will contain the date and time when the page was generated. Setting this -# to YES can help to show when doxygen was last run and thus if the -# documentation is up to date. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_TIMESTAMP = NO - -# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML -# documentation will contain a main index with vertical navigation menus that -# are dynamically created via Javascript. If disabled, the navigation index will -# consists of multiple levels of tabs that are statically embedded in every HTML -# page. Disable this option to support browsers that do not have Javascript, -# like the Qt help browser. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_DYNAMIC_MENUS = YES - -# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML -# documentation will contain sections that can be hidden and shown after the -# page has loaded. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_DYNAMIC_SECTIONS = NO - -# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries -# shown in the various tree structured indices initially; the user can expand -# and collapse entries dynamically later on. Doxygen will expand the tree to -# such a level that at most the specified number of entries are visible (unless -# a fully collapsed tree already exceeds this amount). So setting the number of -# entries 1 will produce a full collapsed tree by default. 0 is a special value -# representing an infinite number of entries and will result in a full expanded -# tree by default. -# Minimum value: 0, maximum value: 9999, default value: 100. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_INDEX_NUM_ENTRIES = 100 - -# If the GENERATE_DOCSET tag is set to YES, additional index files will be -# generated that can be used as input for Apple's Xcode 3 integrated development -# environment (see: https://developer.apple.com/xcode/), introduced with OSX -# 10.5 (Leopard). To create a documentation set, doxygen will generate a -# Makefile in the HTML output directory. Running make will produce the docset in -# that directory and running make install will install the docset in -# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at -# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy -# genXcode/_index.html for more information. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_DOCSET = NO - -# This tag determines the name of the docset feed. A documentation feed provides -# an umbrella under which multiple documentation sets from a single provider -# (such as a company or product suite) can be grouped. -# The default value is: Doxygen generated docs. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_FEEDNAME = "Doxygen generated docs" - -# This tag specifies a string that should uniquely identify the documentation -# set bundle. This should be a reverse domain-name style string, e.g. -# com.mycompany.MyDocSet. Doxygen will append .docset to the name. -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_BUNDLE_ID = org.doxygen.Project - -# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify -# the documentation publisher. This should be a reverse domain-name style -# string, e.g. com.mycompany.MyDocSet.documentation. -# The default value is: org.doxygen.Publisher. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_PUBLISHER_ID = org.doxygen.Publisher - -# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. -# The default value is: Publisher. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_PUBLISHER_NAME = Publisher - -# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three -# additional HTML index files: index.hhp, index.hhc, and index.hhk. The -# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop -# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on -# Windows. -# -# The HTML Help Workshop contains a compiler that can convert all HTML output -# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML -# files are now used as the Windows 98 help format, and will replace the old -# Windows help format (.hlp) on all Windows platforms in the future. Compressed -# HTML files also contain an index, a table of contents, and you can search for -# words in the documentation. The HTML workshop also contains a viewer for -# compressed HTML files. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_HTMLHELP = NO - -# The CHM_FILE tag can be used to specify the file name of the resulting .chm -# file. You can add a path in front of the file if the result should not be -# written to the html output directory. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -CHM_FILE = - -# The HHC_LOCATION tag can be used to specify the location (absolute path -# including file name) of the HTML help compiler (hhc.exe). If non-empty, -# doxygen will try to run the HTML help compiler on the generated index.hhp. -# The file has to be specified with full path. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -HHC_LOCATION = - -# The GENERATE_CHI flag controls if a separate .chi index file is generated -# (YES) or that it should be included in the master .chm file (NO). -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -GENERATE_CHI = NO - -# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) -# and project file content. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -CHM_INDEX_ENCODING = - -# The BINARY_TOC flag controls whether a binary table of contents is generated -# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it -# enables the Previous and Next buttons. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -BINARY_TOC = NO - -# The TOC_EXPAND flag can be set to YES to add extra items for group members to -# the table of contents of the HTML help documentation and to the tree view. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -TOC_EXPAND = NO - -# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and -# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that -# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help -# (.qch) of the generated HTML documentation. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_QHP = NO - -# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify -# the file name of the resulting .qch file. The path specified is relative to -# the HTML output folder. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QCH_FILE = - -# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help -# Project output. For more information please see Qt Help Project / Namespace -# (see: http://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_NAMESPACE = - -# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt -# Help Project output. For more information please see Qt Help Project / Virtual -# Folders (see: http://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual- -# folders). -# The default value is: doc. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_VIRTUAL_FOLDER = doc - -# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom -# filter to add. For more information please see Qt Help Project / Custom -# Filters (see: http://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- -# filters). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_CUST_FILTER_NAME = - -# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the -# custom filter to add. For more information please see Qt Help Project / Custom -# Filters (see: http://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- -# filters). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_CUST_FILTER_ATTRS = - -# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this -# project's filter section matches. Qt Help Project / Filter Attributes (see: -# http://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_SECT_FILTER_ATTRS = - -# The QHG_LOCATION tag can be used to specify the location of Qt's -# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the -# generated .qhp file. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHG_LOCATION = - -# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be -# generated, together with the HTML files, they form an Eclipse help plugin. To -# install this plugin and make it available under the help contents menu in -# Eclipse, the contents of the directory containing the HTML and XML files needs -# to be copied into the plugins directory of eclipse. The name of the directory -# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. -# After copying Eclipse needs to be restarted before the help appears. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_ECLIPSEHELP = NO - -# A unique identifier for the Eclipse help plugin. When installing the plugin -# the directory name containing the HTML and XML files should also have this -# name. Each documentation set should have its own identifier. -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. - -ECLIPSE_DOC_ID = org.doxygen.Project - -# If you want full control over the layout of the generated HTML pages it might -# be necessary to disable the index and replace it with your own. The -# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top -# of each HTML page. A value of NO enables the index and the value YES disables -# it. Since the tabs in the index contain the same information as the navigation -# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -DISABLE_INDEX = NO - -# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index -# structure should be generated to display hierarchical information. If the tag -# value is set to YES, a side panel will be generated containing a tree-like -# index structure (just like the one that is generated for HTML Help). For this -# to work a browser that supports JavaScript, DHTML, CSS and frames is required -# (i.e. any modern browser). Windows users are probably better off using the -# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can -# further fine-tune the look of the index. As an example, the default style -# sheet generated by doxygen has an example that shows how to put an image at -# the root of the tree instead of the PROJECT_NAME. Since the tree basically has -# the same information as the tab index, you could consider setting -# DISABLE_INDEX to YES when enabling this option. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_TREEVIEW = YES - -# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that -# doxygen will group on one line in the generated HTML documentation. -# -# Note that a value of 0 will completely suppress the enum values from appearing -# in the overview section. -# Minimum value: 0, maximum value: 20, default value: 4. -# This tag requires that the tag GENERATE_HTML is set to YES. - -ENUM_VALUES_PER_LINE = 4 - -# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used -# to set the initial width (in pixels) of the frame in which the tree is shown. -# Minimum value: 0, maximum value: 1500, default value: 250. -# This tag requires that the tag GENERATE_HTML is set to YES. - -TREEVIEW_WIDTH = 250 - -# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to -# external symbols imported via tag files in a separate window. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -EXT_LINKS_IN_WINDOW = NO - -# Use this tag to change the font size of LaTeX formulas included as images in -# the HTML documentation. When you change the font size after a successful -# doxygen run you need to manually remove any form_*.png images from the HTML -# output directory to force them to be regenerated. -# Minimum value: 8, maximum value: 50, default value: 10. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_FONTSIZE = 10 - -# Use the FORMULA_TRANSPARENT tag to determine whether or not the images -# generated for formulas are transparent PNGs. Transparent PNGs are not -# supported properly for IE 6.0, but are supported on all modern browsers. -# -# Note that when changing this option you need to delete any form_*.png files in -# the HTML output directory before the changes have effect. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_TRANSPARENT = YES - -# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see -# https://www.mathjax.org) which uses client side Javascript for the rendering -# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX -# installed or if you want to formulas look prettier in the HTML output. When -# enabled you may also need to install MathJax separately and configure the path -# to it using the MATHJAX_RELPATH option. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -USE_MATHJAX = NO - -# When MathJax is enabled you can set the default output format to be used for -# the MathJax output. See the MathJax site (see: -# http://docs.mathjax.org/en/latest/output.html) for more details. -# Possible values are: HTML-CSS (which is slower, but has the best -# compatibility), NativeMML (i.e. MathML) and SVG. -# The default value is: HTML-CSS. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_FORMAT = HTML-CSS - -# When MathJax is enabled you need to specify the location relative to the HTML -# output directory using the MATHJAX_RELPATH option. The destination directory -# should contain the MathJax.js script. For instance, if the mathjax directory -# is located at the same level as the HTML output directory, then -# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax -# Content Delivery Network so you can quickly see the result without installing -# MathJax. However, it is strongly recommended to install a local copy of -# MathJax from https://www.mathjax.org before deployment. -# The default value is: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_RELPATH = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/ - -# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax -# extension names that should be enabled during MathJax rendering. For example -# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_EXTENSIONS = - -# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces -# of code that will be used on startup of the MathJax code. See the MathJax site -# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an -# example see the documentation. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_CODEFILE = - -# When the SEARCHENGINE tag is enabled doxygen will generate a search box for -# the HTML output. The underlying search engine uses javascript and DHTML and -# should work on any modern browser. Note that when using HTML help -# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) -# there is already a search function so this one should typically be disabled. -# For large projects the javascript based search engine can be slow, then -# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to -# search using the keyboard; to jump to the search box use + S -# (what the is depends on the OS and browser, but it is typically -# , /